TFLite GPU: Replace tflite::gpu::Status with absl::Status.

PiperOrigin-RevId: 302551400
Change-Id: Ib36038b364fda986c12543576471c07eef87db14
This commit is contained in:
Juhyun Lee 2020-03-23 17:05:30 -07:00 committed by TensorFlower Gardener
parent b5c7256118
commit f18fa5b6b0
286 changed files with 3877 additions and 3922 deletions

View File

@ -12,6 +12,12 @@ exports_files([
"metal_delegate.h",
])
# Primary purpose of this config is to replace ::util::Status with our custom
# light implementation ::tflite::gpu::StatusLite to reduce binary size. Besides
# that, certain features that were hard to communicate without full open source
# were hidden away too such as compiled models, serialization, and metadata.
# While the latter will be fully available with the open source release, the
# former will have to stay until absl::Status is released.
config_setting(
name = "tflite_gpu_binary_release",
values = {"copt": "-DTFLITE_GPU_BINARY_RELEASE"},

View File

@ -220,8 +220,7 @@ class InferenceBuilder {
// Sets new shape for the input if underlying implementation and graph
// structure allows dynamic tensors.
virtual absl::Status SetInputShape(int index,
const Dimensions& dimensions) = 0;
virtual Status SetInputShape(int index, const Dimensions& dimensions) = 0;
// Updates object definitions for the given index. Implementation may allow
// to use different layouts and/or data type conversions between objects
@ -230,21 +229,21 @@ class InferenceBuilder {
// A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4.
// An implementation may allow this transformation to happen automatically
// under the hood.
virtual absl::Status SetInputObjectDef(int index, ObjectDef def) = 0;
virtual absl::Status SetOutputObjectDef(int index, ObjectDef def) = 0;
virtual absl::Status SetAllInputObjectDefsTo(ObjectDef def) {
virtual Status SetInputObjectDef(int index, ObjectDef def) = 0;
virtual Status SetOutputObjectDef(int index, ObjectDef def) = 0;
virtual Status SetAllInputObjectDefsTo(ObjectDef def) {
auto input_defs = inputs();
for (int i = 0; i < input_defs.size(); ++i) {
RETURN_IF_ERROR(SetInputObjectDef(i, def));
}
return absl::OkStatus();
return OkStatus();
}
virtual absl::Status SetAllOutputObjectDefsTo(ObjectDef def) {
virtual Status SetAllOutputObjectDefsTo(ObjectDef def) {
auto output_defs = outputs();
for (int i = 0; i < output_defs.size(); ++i) {
RETURN_IF_ERROR(SetOutputObjectDef(i, def));
}
return absl::OkStatus();
return OkStatus();
}
// Creates new instance of the inference runner. InferenceBuilder stays valid
@ -252,7 +251,7 @@ class InferenceBuilder {
//
// This method may take significant time to prepare new inference runner. For
// example, it may require to compile OpenGL shaders.
virtual absl::Status Build(std::unique_ptr<InferenceRunner>* runner) = 0;
virtual Status Build(std::unique_ptr<InferenceRunner>* runner) = 0;
};
// Runs prepared inference. Every object marked as external needs to be set
@ -269,12 +268,12 @@ class InferenceRunner {
// Setters allow to set or change external object for the given index. Note,
// object need to match object definition set before in InferenceBuilder.
virtual absl::Status GetInputObject(int index, TensorObject* object) = 0;
virtual absl::Status GetOutputObject(int index, TensorObject* object) = 0;
virtual absl::Status SetInputObject(int index, TensorObject object) = 0;
virtual absl::Status SetOutputObject(int index, TensorObject object) = 0;
virtual Status GetInputObject(int index, TensorObject* object) = 0;
virtual Status GetOutputObject(int index, TensorObject* object) = 0;
virtual Status SetInputObject(int index, TensorObject object) = 0;
virtual Status SetOutputObject(int index, TensorObject object) = 0;
virtual absl::Status Run() = 0;
virtual Status Run() = 0;
};
// Encapsulated compilation/runtime tradeoffs.

View File

@ -54,22 +54,22 @@ class NoopTensorTie : public TensorTie {
return def.external_def == def.internal_def;
}
absl::Status SetExternalObject(TensorObject obj) final {
Status SetExternalObject(TensorObject obj) final {
if (!def().external_def.object_def.user_provided) {
return absl::InvalidArgumentError("Tensor object is readonly.");
return InvalidArgumentError("Tensor object is readonly.");
}
if (!IsValid(def().external_def, obj)) {
return absl::InvalidArgumentError("Given object is not valid");
return InvalidArgumentError("Given object is not valid");
}
obj_ = obj;
return absl::OkStatus();
return OkStatus();
}
TensorObject GetExternalObject() final { return obj_; }
absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
Status CopyToExternalObject() final { return OkStatus(); }
absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
Status CopyFromExternalObject() final { return OkStatus(); }
private:
TensorObject obj_;
@ -93,45 +93,45 @@ class DefaultTensorTie : public TensorTie {
converter_builder.IsSupported(def.external_def, def.internal_def);
}
static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
Environment* env, std::unique_ptr<TensorTie>* tie) {
static Status New(const TensorTieDef& def, TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
Environment* env, std::unique_ptr<TensorTie>* tie) {
auto tie_impl = absl::make_unique<DefaultTensorTie>(def, internal_object);
RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
*tie = std::move(tie_impl);
return absl::OkStatus();
return OkStatus();
}
absl::Status CopyToExternalObject() final {
Status CopyToExternalObject() final {
if (!converter_to_) {
return absl::UnavailableError("Conversion is not available");
return UnavailableError("Conversion is not available");
}
return converter_to_->Convert(internal_obj_, GetExternalObject());
}
absl::Status CopyFromExternalObject() final {
Status CopyFromExternalObject() final {
if (!converter_from_) {
return absl::UnavailableError("Conversion is not available");
return UnavailableError("Conversion is not available");
}
return converter_from_->Convert(GetExternalObject(), internal_obj_);
}
absl::Status SetExternalObject(TensorObject obj) final {
Status SetExternalObject(TensorObject obj) final {
if (!def().external_def.object_def.user_provided) {
return absl::InvalidArgumentError("External object is read-only");
return InvalidArgumentError("External object is read-only");
}
if (!IsValid(def().external_def, obj)) {
return absl::InvalidArgumentError("Given object is not valid");
return InvalidArgumentError("Given object is not valid");
}
external_obj_ = obj;
return absl::OkStatus();
return OkStatus();
}
TensorObject GetExternalObject() final { return external_obj_; }
private:
absl::Status Init(TensorObjectConverterBuilder* converter_builder,
Environment* env) {
Status Init(TensorObjectConverterBuilder* converter_builder,
Environment* env) {
RETURN_IF_ERROR(converter_builder->MakeConverter(
def().internal_def, def().external_def, &converter_to_));
RETURN_IF_ERROR(converter_builder->MakeConverter(
@ -139,10 +139,10 @@ class DefaultTensorTie : public TensorTie {
return MaybeAllocateExternalObject(env);
}
absl::Status MaybeAllocateExternalObject(Environment* env) {
Status MaybeAllocateExternalObject(Environment* env) {
const TensorObjectDef& d = def().external_def;
if (d.object_def.user_provided) {
return absl::OkStatus();
return OkStatus();
}
switch (d.object_def.object_type) {
case ObjectType::CPU_MEMORY: {
@ -170,9 +170,9 @@ class DefaultTensorTie : public TensorTie {
break;
}
default:
return absl::InternalError("Unexpected object type");
return InternalError("Unexpected object type");
}
return absl::OkStatus();
return OkStatus();
}
const TensorObject internal_obj_;
@ -198,26 +198,26 @@ class TwoStepTensorTie : public TensorTie {
DefaultTensorTie::IsSupported(defs.second, converter_builder);
}
static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
Environment* env, std::unique_ptr<TensorTie>* tie) {
static Status New(const TensorTieDef& def, TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
Environment* env, std::unique_ptr<TensorTie>* tie) {
auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
*tie = std::move(tie_impl);
return absl::OkStatus();
return OkStatus();
}
absl::Status CopyToExternalObject() final {
Status CopyToExternalObject() final {
RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
return outer_tie_->CopyToExternalObject();
}
absl::Status CopyFromExternalObject() final {
Status CopyFromExternalObject() final {
RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
return inner_tie_->CopyFromExternalObject();
}
absl::Status SetExternalObject(TensorObject obj) final {
Status SetExternalObject(TensorObject obj) final {
return outer_tie_->SetExternalObject(obj);
}
@ -241,9 +241,9 @@ class TwoStepTensorTie : public TensorTie {
return std::make_pair(outer_def, inner_def);
}
absl::Status Init(TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
Environment* env) {
Status Init(TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
Environment* env) {
auto defs = MakeOuterInnerDefs(def());
RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
converter_builder, env, &inner_tie_));
@ -274,27 +274,27 @@ class GlBufferHolder : public TensorTie {
return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
}
static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
GlInteropFabric* gl_interop_fabric, Environment* env,
std::unique_ptr<TensorTie>* tie) {
static Status New(const TensorTieDef& def, TensorObject internal_object,
TensorObjectConverterBuilder* converter_builder,
GlInteropFabric* gl_interop_fabric, Environment* env,
std::unique_ptr<TensorTie>* tie) {
auto tie_impl =
absl::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
converter_builder, env,
&tie_impl->tie_));
*tie = std::move(tie_impl);
return absl::OkStatus();
return OkStatus();
}
absl::Status SetExternalObject(TensorObject obj) final {
Status SetExternalObject(TensorObject obj) final {
auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
if (!ssbo) {
return absl::InvalidArgumentError("Missing OpenGL SSBO");
return InvalidArgumentError("Missing OpenGL SSBO");
}
auto old_ssbo = absl::get_if<OpenGlBuffer>(&external_obj_);
if (old_ssbo && ssbo->id == old_ssbo->id) {
return absl::OkStatus();
return OkStatus();
}
if (cl_object_.memory()) {
gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
@ -304,18 +304,16 @@ class GlBufferHolder : public TensorTie {
external_obj_ = obj;
RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
gl_interop_fabric_->RegisterMemory(cl_object_.memory());
return absl::OkStatus();
return OkStatus();
}
TensorObject GetExternalObject() final { return external_obj_; }
absl::Status CopyFromExternalObject() final {
Status CopyFromExternalObject() final {
return tie_->CopyFromExternalObject();
}
absl::Status CopyToExternalObject() final {
return tie_->CopyToExternalObject();
}
Status CopyToExternalObject() final { return tie_->CopyToExternalObject(); }
private:
static TensorTieDef MakeClDef(const TensorTieDef& def) {
@ -360,20 +358,20 @@ class TensorTieFactory {
TwoStepTensorTie::IsSupported(def, *converter_builder_));
}
absl::Status NewTensorTie(const TensorTieDef& def,
std::unique_ptr<TensorTie>* tie) {
Status NewTensorTie(const TensorTieDef& def,
std::unique_ptr<TensorTie>* tie) {
TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
auto converter = converter_builder_.get();
if (NoopTensorTie::IsSupported(def)) {
*tie = absl::make_unique<NoopTensorTie>(def, internal_object);
return absl::OkStatus();
return OkStatus();
}
if (DefaultTensorTie::IsSupported(def, *converter)) {
return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
}
if (GlBufferHolder::IsSupported(def, *converter)) {
if (!gl_interop_fabric_) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"GL object is used but InferenceEnvironmentOptions does not have "
"EGL display and context set.");
}
@ -383,7 +381,7 @@ class TensorTieFactory {
if (TwoStepTensorTie::IsSupported(def, *converter)) {
return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
}
return absl::UnimplementedError("Unsupported tensor tie definition.");
return UnimplementedError("Unsupported tensor tie definition.");
}
private:
@ -402,9 +400,9 @@ class InferenceRunnerImpl : public InferenceRunner {
context_(std::move(context)),
gl_interop_fabric_(std::move(gl_interop_fabric)) {}
absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
const std::vector<TensorTieDef>& outputs,
TensorTieFactory* factory) {
Status Initialize(const std::vector<TensorTieDef>& inputs,
const std::vector<TensorTieDef>& outputs,
TensorTieFactory* factory) {
RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
return LinkTensors(outputs, factory, &outputs_);
}
@ -417,37 +415,37 @@ class InferenceRunnerImpl : public InferenceRunner {
return GetExternalDefinitions(outputs_);
}
absl::Status GetInputObject(int index, TensorObject* object) override {
Status GetInputObject(int index, TensorObject* object) override {
if (index < 0 || index >= inputs_.size()) {
return absl::OutOfRangeError("Index is out of range");
return OutOfRangeError("Index is out of range");
}
*object = inputs_[index]->GetExternalObject();
return absl::OkStatus();
return OkStatus();
}
absl::Status GetOutputObject(int index, TensorObject* object) override {
Status GetOutputObject(int index, TensorObject* object) override {
if (index < 0 || index >= outputs_.size()) {
return absl::OutOfRangeError("Index is out of range");
return OutOfRangeError("Index is out of range");
}
*object = outputs_[index]->GetExternalObject();
return absl::OkStatus();
return OkStatus();
}
absl::Status SetInputObject(int index, TensorObject object) override {
Status SetInputObject(int index, TensorObject object) override {
if (index < 0 || index >= inputs_.size()) {
return absl::OutOfRangeError("Index is out of range");
return OutOfRangeError("Index is out of range");
}
return inputs_[index]->SetExternalObject(object);
}
absl::Status SetOutputObject(int index, TensorObject object) override {
Status SetOutputObject(int index, TensorObject object) override {
if (index < 0 || index >= outputs_.size()) {
return absl::OutOfRangeError("Index is out of range");
return OutOfRangeError("Index is out of range");
}
return outputs_[index]->SetExternalObject(object);
}
absl::Status Run() override {
Status Run() override {
if (gl_interop_fabric_) {
RETURN_IF_ERROR(gl_interop_fabric_->Start());
}
@ -462,20 +460,20 @@ class InferenceRunnerImpl : public InferenceRunner {
if (gl_interop_fabric_) {
RETURN_IF_ERROR(gl_interop_fabric_->Finish());
}
return absl::OkStatus();
return OkStatus();
}
private:
static absl::Status LinkTensors(
const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
std::vector<std::unique_ptr<TensorTie>>* objects) {
static Status LinkTensors(const std::vector<TensorTieDef>& defs,
TensorTieFactory* factory,
std::vector<std::unique_ptr<TensorTie>>* objects) {
objects->reserve(defs.size());
for (auto& def : defs) {
std::unique_ptr<TensorTie> object;
RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
objects->push_back(std::move(object));
}
return absl::OkStatus();
return OkStatus();
}
static std::vector<TensorObjectDef> GetExternalDefinitions(
@ -513,9 +511,9 @@ class InferenceBuilderImpl : public InferenceBuilder {
explicit InferenceBuilderImpl(Environment* environment)
: environment_(environment) {}
absl::Status Initialize(const InferenceOptions& options,
const InferenceEnvironmentOptions& env_options,
const GraphFloat32& graph) {
Status Initialize(const InferenceOptions& options,
const InferenceEnvironmentOptions& env_options,
const GraphFloat32& graph) {
context_ = absl::make_unique<InferenceContext>();
InferenceContext::CreateInferenceInfo create_info;
create_info.precision = GetPrecision(options);
@ -535,7 +533,7 @@ class InferenceBuilderImpl : public InferenceBuilder {
inputs_ = LinkTensors(graph, graph.inputs());
outputs_ = LinkTensors(graph, graph.outputs());
return absl::OkStatus();
return OkStatus();
}
std::vector<TensorObjectDef> inputs() const override {
@ -546,42 +544,40 @@ class InferenceBuilderImpl : public InferenceBuilder {
return GetExternalDefinitions(outputs_);
}
absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
Status SetInputShape(int index, const Dimensions& dimensions) override {
if (index < 0 || index >= inputs_.size()) {
return absl::OutOfRangeError("Index is out of range");
return OutOfRangeError("Index is out of range");
}
return absl::UnimplementedError("Changing input shapes is not supported");
return UnimplementedError("Changing input shapes is not supported");
}
absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
Status SetInputObjectDef(int index, ObjectDef new_def) override {
if (index < 0 || index >= inputs_.size()) {
return absl::OutOfRangeError("Index is out of range");
return OutOfRangeError("Index is out of range");
}
auto def = inputs_[index];
def.external_def.object_def = new_def;
if (!tie_factory_->IsSupported(def)) {
return absl::InvalidArgumentError(
"New object definition is not supported.");
return InvalidArgumentError("New object definition is not supported.");
}
inputs_[index] = def;
return absl::OkStatus();
return OkStatus();
}
absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
Status SetOutputObjectDef(int index, ObjectDef new_def) override {
if (index < 0 || index >= outputs_.size()) {
return absl::OutOfRangeError("Index is out of range");
return OutOfRangeError("Index is out of range");
}
auto def = outputs_[index];
def.external_def.object_def = new_def;
if (!tie_factory_->IsSupported(def)) {
return absl::InvalidArgumentError(
"New object definition is not supported.");
return InvalidArgumentError("New object definition is not supported.");
}
outputs_[index] = def;
return absl::OkStatus();
return OkStatus();
}
absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
Status Build(std::unique_ptr<InferenceRunner>* runner) override {
if (gl_interop_fabric_ && !HasGlObjects()) {
// destroy interop layer when there are no GL objects to avoid
// extra synchronization cost.
@ -592,7 +588,7 @@ class InferenceBuilderImpl : public InferenceBuilder {
RETURN_IF_ERROR(
runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
*runner = std::move(runner_impl);
return absl::OkStatus();
return OkStatus();
}
private:
@ -700,7 +696,7 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
: options_(options) {}
absl::Status Init() {
Status Init() {
RETURN_IF_ERROR(LoadOpenCL());
properties_.is_opencl_available = true;
@ -720,13 +716,13 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
properties_.is_cl_to_gl_fast_sync_supported =
IsEglSyncFromClEventSupported();
if (options_.IsGlAware() && !properties_.is_gl_sharing_supported) {
return absl::UnavailableError("GL sharing is not supported");
return UnavailableError("GL sharing is not supported");
}
CLContext context;
if (options_.context) {
if (options_.IsGlAware()) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"OpenCL context and EGL parameters are set in the same time.");
}
context = CLContext(options_.context, /* has_ownership = */ false);
@ -758,11 +754,11 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
return environment_.Init();
}
absl::Status NewInferenceBuilder(
const InferenceOptions& options, GraphFloat32 model,
std::unique_ptr<InferenceBuilder>* builder) final {
Status NewInferenceBuilder(const InferenceOptions& options,
GraphFloat32 model,
std::unique_ptr<InferenceBuilder>* builder) final {
if (!IsValid(options)) {
return absl::InvalidArgumentError("InferenceOptions are invalid.");
return InvalidArgumentError("InferenceOptions are invalid.");
}
InferenceOptions resolved_options = options;
ResolveAutoPriority(&resolved_options);
@ -780,7 +776,7 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
RETURN_IF_ERROR(
builder_impl->Initialize(resolved_options, options_, model));
*builder = std::move(builder_impl);
return absl::OkStatus();
return OkStatus();
}
std::vector<uint8_t> GetSerializedBinaryCache() const final {
@ -804,18 +800,18 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
} // namespace
absl::Status NewInferenceEnvironment(
Status NewInferenceEnvironment(
const InferenceEnvironmentOptions& options,
std::unique_ptr<InferenceEnvironment>* environment,
InferenceEnvironmentProperties* properties) {
auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
absl::Status status = env_impl->Init();
Status status = env_impl->Init();
if (properties) {
*properties = env_impl->properties();
}
RETURN_IF_ERROR(status);
*environment = std::move(env_impl);
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -70,7 +70,7 @@ class InferenceEnvironment {
public:
virtual ~InferenceEnvironment() {}
virtual absl::Status NewInferenceBuilder(
virtual Status NewInferenceBuilder(
const InferenceOptions& options, GraphFloat32 model,
std::unique_ptr<InferenceBuilder>* builder) = 0;
@ -112,7 +112,7 @@ struct InferenceEnvironmentOptions {
// Creates new OpenCL environment that needs to stay around until all inference
// runners are destroyed.
absl::Status NewInferenceEnvironment(
Status NewInferenceEnvironment(
const InferenceEnvironmentOptions& options,
std::unique_ptr<InferenceEnvironment>* environment,
InferenceEnvironmentProperties* properties /* optional */);

View File

@ -21,10 +21,8 @@ namespace tflite {
namespace gpu {
namespace cl {
namespace {
absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only,
const void* data, CLContext* context,
Buffer* result) {
Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only, const void* data,
CLContext* context, Buffer* result) {
cl_mem_flags flags = gpu_read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
if (data != nullptr) {
flags |= CL_MEM_COPY_HOST_PTR;
@ -33,14 +31,14 @@ absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only,
cl_mem buffer = clCreateBuffer(context->context(), flags, size_in_bytes,
const_cast<void*>(data), &error_code);
if (!buffer) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to allocate device memory with clCreateBuffer",
CLErrorCodeToString(error_code)));
}
*result = Buffer(buffer, size_in_bytes);
return absl::OkStatus();
return OkStatus();
}
} // namespace
@ -71,18 +69,18 @@ void Buffer::Release() {
}
}
absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result) {
Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result) {
return CreateBuffer(size_in_bytes, true, nullptr, context, result);
}
absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void* data,
CLContext* context, Buffer* result) {
Status CreateReadOnlyBuffer(size_t size_in_bytes, const void* data,
CLContext* context, Buffer* result) {
return CreateBuffer(size_in_bytes, true, data, context, result);
}
absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result) {
Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result) {
return CreateBuffer(size_in_bytes, false, nullptr, context, result);
}

View File

@ -51,11 +51,11 @@ class Buffer {
// Writes data to a buffer. Data should point to a region that
// has exact size in bytes as size_in_bytes(constructor parameter).
template <typename T>
absl::Status WriteData(CLCommandQueue* queue, const absl::Span<T> data);
Status WriteData(CLCommandQueue* queue, const absl::Span<T> data);
// Reads data from Buffer into CPU memory.
template <typename T>
absl::Status ReadData(CLCommandQueue* queue, std::vector<T>* result) const;
Status ReadData(CLCommandQueue* queue, std::vector<T>* result) const;
private:
void Release();
@ -64,31 +64,29 @@ class Buffer {
size_t size_;
};
absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result);
Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result);
absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void* data,
CLContext* context, Buffer* result);
Status CreateReadOnlyBuffer(size_t size_in_bytes, const void* data,
CLContext* context, Buffer* result);
absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result);
Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext* context,
Buffer* result);
template <typename T>
absl::Status Buffer::WriteData(CLCommandQueue* queue,
const absl::Span<T> data) {
Status Buffer::WriteData(CLCommandQueue* queue, const absl::Span<T> data) {
if (size_ != sizeof(T) * data.size()) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"absl::Span<T> data size is different from buffer allocated size.");
}
RETURN_IF_ERROR(queue->EnqueueWriteBuffer(buffer_, size_, data.data()));
return absl::OkStatus();
return OkStatus();
}
template <typename T>
absl::Status Buffer::ReadData(CLCommandQueue* queue,
std::vector<T>* result) const {
Status Buffer::ReadData(CLCommandQueue* queue, std::vector<T>* result) const {
if (size_ % sizeof(T) != 0) {
return absl::UnknownError("Wrong element size(typename T is not correct?");
return UnknownError("Wrong element size(typename T is not correct?");
}
const int elements_count = size_ / sizeof(T);

View File

@ -56,9 +56,8 @@ void CLCommandQueue::Release() {
}
}
absl::Status CLCommandQueue::DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size,
CLEvent* event) {
Status CLCommandQueue::DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size, CLEvent* event) {
std::vector<size_t> local(3);
std::vector<size_t> global(3);
for (int i = 0; i < 3; ++i) {
@ -73,31 +72,30 @@ absl::Status CLCommandQueue::DispatchImplicit(const CLKernel& kernel, int3 grid,
*event = CLEvent(resulting_event);
}
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to clEnqueueNDRangeKernel - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to clEnqueueNDRangeKernel - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CLCommandQueue::DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size) {
Status CLCommandQueue::DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size) {
return DispatchImplicit(kernel, grid, work_group_size, nullptr);
}
absl::Status CLCommandQueue::EnqueueEvent(CLEvent* event) {
Status CLCommandQueue::EnqueueEvent(CLEvent* event) {
cl_event resulting_event;
const int error_code = clEnqueueMarker(queue_, &resulting_event);
*event = CLEvent(resulting_event);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat("Failed to clEnqueueMarker - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to clEnqueueMarker - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region,
const void* data) {
Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region,
const void* data) {
const size_t origin[] = {0, 0, 0};
const size_t r[] = {static_cast<size_t>(region.x),
static_cast<size_t>(region.y),
@ -105,16 +103,16 @@ absl::Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region,
auto error_code = clEnqueueWriteImage(queue_, memory, CL_TRUE, origin, r, 0,
0, data, 0, nullptr, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to upload data to GPU (clEnqueueWriteImage) - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region,
void* data) {
Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region,
void* data) {
const size_t origin[] = {0, 0, 0};
const size_t r[] = {static_cast<size_t>(region.x),
static_cast<size_t>(region.y),
@ -122,47 +120,45 @@ absl::Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region,
auto error_code = clEnqueueReadImage(queue_, memory, CL_TRUE, origin, r, 0, 0,
data, 0, nullptr, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to read data from GPU (clEnqueueReadImage) - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory,
size_t size_in_bytes,
const void* data) {
Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes,
const void* data) {
auto error_code = clEnqueueWriteBuffer(
queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to upload data to GPU (clEnqueueWriteBuffer) - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory,
size_t size_in_bytes,
void* data) {
Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes,
void* data) {
auto error_code = clEnqueueReadBuffer(
queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to read data from GPU (clEnqueueReadBuffer) - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CLCommandQueue::WaitForCompletion() {
Status CLCommandQueue::WaitForCompletion() {
auto error_code = clFinish(queue_);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to clFinish - ", CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
ProfilingCommandQueue::ProfilingCommandQueue(cl_command_queue queue)
@ -191,14 +187,14 @@ void ProfilingCommandQueue::SetEventsLabel(const std::string& name) {
void ProfilingCommandQueue::ResetMeasurements() { events_.clear(); }
absl::Status ProfilingCommandQueue::DispatchImplicit(const CLKernel& kernel,
int3 grid,
int3 work_group_size) {
Status ProfilingCommandQueue::DispatchImplicit(const CLKernel& kernel,
int3 grid,
int3 work_group_size) {
events_.push_back(CLEvent());
RETURN_IF_ERROR(CLCommandQueue::DispatchImplicit(
kernel, grid, work_group_size, &events_[events_.size() - 1]));
events_.back().SetName(current_label_);
return absl::OkStatus();
return OkStatus();
}
ProfilingInfo ProfilingCommandQueue::GetProfilingInfo() const {
@ -212,7 +208,7 @@ ProfilingInfo ProfilingCommandQueue::GetProfilingInfo() const {
return result;
}
absl::Status ProfilingCommandQueue::GetBestWorkGroupIndex(
Status ProfilingCommandQueue::GetBestWorkGroupIndex(
const CLKernel& kernel, const DeviceInfo& device_info, const int3& grid,
const std::vector<int3>& work_group_sizes, int* index) {
// Some Adreno 3xx can have wrong numbers for some events
@ -272,22 +268,20 @@ absl::Status ProfilingCommandQueue::GetBestWorkGroupIndex(
*index = minimum_index;
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateCLCommandQueue(const CLDevice& device,
const CLContext& context,
CLCommandQueue* result) {
Status CreateCLCommandQueue(const CLDevice& device, const CLContext& context,
CLCommandQueue* result) {
int error_code;
cl_command_queue queue =
clCreateCommandQueue(context.context(), device.id(), 0, &error_code);
if (!queue) {
return absl::UnknownError(
absl::StrCat("Failed to create a command queue - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to create a command queue - ",
CLErrorCodeToString(error_code)));
}
*result = CLCommandQueue(queue, true);
return absl::OkStatus();
return OkStatus();
}
double ProfilingCommandQueue::GetQueueExecutionTimeMs() const {
@ -306,20 +300,19 @@ double ProfilingCommandQueue::GetSumOfEventsTimeMs() const {
return sum;
}
absl::Status CreateProfilingCommandQueue(const CLDevice& device,
const CLContext& context,
ProfilingCommandQueue* result) {
Status CreateProfilingCommandQueue(const CLDevice& device,
const CLContext& context,
ProfilingCommandQueue* result) {
int error_code;
cl_command_queue queue = clCreateCommandQueue(
context.context(), device.id(), CL_QUEUE_PROFILING_ENABLE, &error_code);
if (!queue) {
return absl::UnknownError(
absl::StrCat("Failed to create a command queue - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to create a command queue - ",
CLErrorCodeToString(error_code)));
}
*result = ProfilingCommandQueue(queue);
return absl::OkStatus();
return OkStatus();
}
absl::Duration ProfilingInfo::GetTotalTime() const {

View File

@ -74,23 +74,22 @@ class CLCommandQueue {
cl_command_queue queue() const { return queue_; }
virtual absl::Status DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size);
virtual Status DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size);
absl::Status EnqueueEvent(CLEvent* event);
Status EnqueueEvent(CLEvent* event);
absl::Status DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size, CLEvent* event);
Status DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size, CLEvent* event);
absl::Status EnqueueWriteImage(cl_mem memory, int3 region, const void* data);
absl::Status EnqueueReadImage(cl_mem memory, int3 region, void* data);
Status EnqueueWriteImage(cl_mem memory, int3 region, const void* data);
Status EnqueueReadImage(cl_mem memory, int3 region, void* data);
absl::Status EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes,
const void* data);
absl::Status EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes,
void* data);
Status EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes,
const void* data);
Status EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void* data);
absl::Status WaitForCompletion();
Status WaitForCompletion();
protected:
void Release();
@ -110,15 +109,14 @@ class ProfilingCommandQueue : public CLCommandQueue {
ProfilingCommandQueue(const ProfilingCommandQueue&) = delete;
ProfilingCommandQueue& operator=(const ProfilingCommandQueue&) = delete;
absl::Status DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size) override;
Status DispatchImplicit(const CLKernel& kernel, int3 grid,
int3 work_group_size) override;
// will write index for fastest work_group among work_group_sizes
absl::Status GetBestWorkGroupIndex(const CLKernel& kernel,
const DeviceInfo& device_info,
const int3& grid,
const std::vector<int3>& work_group_sizes,
int* index);
Status GetBestWorkGroupIndex(const CLKernel& kernel,
const DeviceInfo& device_info, const int3& grid,
const std::vector<int3>& work_group_sizes,
int* index);
// call ResetMeasurements() to start new seriese of measurements
void ResetMeasurements();
@ -141,13 +139,12 @@ class ProfilingCommandQueue : public CLCommandQueue {
std::string current_label_;
};
absl::Status CreateCLCommandQueue(const CLDevice& device,
const CLContext& context,
CLCommandQueue* result);
Status CreateCLCommandQueue(const CLDevice& device, const CLContext& context,
CLCommandQueue* result);
absl::Status CreateProfilingCommandQueue(const CLDevice& device,
const CLContext& context,
ProfilingCommandQueue* result);
Status CreateProfilingCommandQueue(const CLDevice& device,
const CLContext& context,
ProfilingCommandQueue* result);
} // namespace cl
} // namespace gpu

View File

@ -43,21 +43,19 @@ std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context,
return result;
}
absl::Status CreateCLContext(const CLDevice& device,
cl_context_properties* properties,
CLContext* result) {
Status CreateCLContext(const CLDevice& device,
cl_context_properties* properties, CLContext* result) {
int error_code;
cl_device_id device_id = device.id();
cl_context context =
clCreateContext(properties, 1, &device_id, nullptr, nullptr, &error_code);
if (!context) {
return absl::UnknownError(
absl::StrCat("Failed to create a compute context - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to create a compute context - ",
CLErrorCodeToString(error_code)));
}
*result = CLContext(context, true);
return absl::OkStatus();
return OkStatus();
}
} // namespace
@ -101,16 +99,15 @@ bool CLContext::IsFloatTexture2DSupported(int num_channels, DataType data_type,
return false;
}
absl::Status CreateCLContext(const CLDevice& device, CLContext* result) {
Status CreateCLContext(const CLDevice& device, CLContext* result) {
return CreateCLContext(device, nullptr, result);
}
absl::Status CreateCLGLContext(const CLDevice& device,
cl_context_properties egl_context,
cl_context_properties egl_display,
CLContext* result) {
Status CreateCLGLContext(const CLDevice& device,
cl_context_properties egl_context,
cl_context_properties egl_display, CLContext* result) {
if (!device.SupportsExtension("cl_khr_gl_sharing")) {
return absl::UnavailableError("Device doesn't support CL-GL sharing.");
return UnavailableError("Device doesn't support CL-GL sharing.");
}
cl_context_properties platform =
reinterpret_cast<cl_context_properties>(device.platform());

View File

@ -51,11 +51,10 @@ class CLContext {
bool has_ownership_ = false;
};
absl::Status CreateCLContext(const CLDevice& device, CLContext* result);
absl::Status CreateCLGLContext(const CLDevice& device,
cl_context_properties egl_context,
cl_context_properties egl_display,
CLContext* result);
Status CreateCLContext(const CLDevice& device, CLContext* result);
Status CreateCLGLContext(const CLDevice& device,
cl_context_properties egl_context,
cl_context_properties egl_display, CLContext* result);
} // namespace cl
} // namespace gpu

View File

@ -516,11 +516,11 @@ void CLDevice::DisableOneLayerTextureArray() {
info_.adreno_info.support_one_layer_texture_array = false;
}
absl::Status CreateDefaultGPUDevice(CLDevice* result) {
Status CreateDefaultGPUDevice(CLDevice* result) {
cl_uint num_platforms;
clGetPlatformIDs(0, nullptr, &num_platforms);
if (num_platforms == 0) {
return absl::UnknownError("No supported OpenCL platform.");
return UnknownError("No supported OpenCL platform.");
}
std::vector<cl_platform_id> platforms(num_platforms);
clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
@ -529,7 +529,7 @@ absl::Status CreateDefaultGPUDevice(CLDevice* result) {
cl_uint num_devices;
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
if (num_devices == 0) {
return absl::UnknownError("No GPU on current platform.");
return UnknownError("No GPU on current platform.");
}
std::vector<cl_device_id> devices(num_devices);
@ -537,7 +537,7 @@ absl::Status CreateDefaultGPUDevice(CLDevice* result) {
nullptr);
*result = CLDevice(devices[0], platform_id);
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -191,7 +191,7 @@ class CLDevice {
DeviceInfo info_;
};
absl::Status CreateDefaultGPUDevice(CLDevice* result);
Status CreateDefaultGPUDevice(CLDevice* result);
template <typename T>
T GetDeviceInfo(cl_device_id id, cl_device_info info) {
@ -204,12 +204,12 @@ T GetDeviceInfo(cl_device_id id, cl_device_info info) {
}
template <typename T>
absl::Status GetDeviceInfo(cl_device_id id, cl_device_info info, T* result) {
Status GetDeviceInfo(cl_device_id id, cl_device_info info, T* result) {
cl_int error = clGetDeviceInfo(id, info, sizeof(T), result, nullptr);
if (error != CL_SUCCESS) {
return absl::InvalidArgumentError(CLErrorCodeToString(error));
return InvalidArgumentError(CLErrorCodeToString(error));
}
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -27,12 +27,11 @@ namespace cl {
// @return if error_code is success, then return OK status. Otherwise translates
// error code into a message.
inline absl::Status GetOpenCLError(cl_int error_code) {
inline Status GetOpenCLError(cl_int error_code) {
if (error_code == CL_SUCCESS) {
return absl::OkStatus();
return OkStatus();
}
return absl::InternalError("OpenCL error: " +
CLErrorCodeToString(error_code));
return InternalError("OpenCL error: " + CLErrorCodeToString(error_code));
}
} // namespace cl

View File

@ -25,34 +25,34 @@ namespace gpu {
namespace cl {
namespace {
absl::Status GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id,
int* result) {
Status GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id,
int* result) {
size_t max_work_group_size;
cl_int error_code =
clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(size_t), &max_work_group_size, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to get info CL_KERNEL_WORK_GROUP_SIZE ",
CLErrorCodeToString(error_code)));
}
*result = static_cast<int>(max_work_group_size);
return absl::OkStatus();
return OkStatus();
}
absl::Status GetKernelPrivateMemorySize(cl_kernel kernel,
cl_device_id device_id, int* result) {
Status GetKernelPrivateMemorySize(cl_kernel kernel, cl_device_id device_id,
int* result) {
cl_ulong private_mem_size;
cl_int error_code =
clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PRIVATE_MEM_SIZE,
sizeof(cl_ulong), &private_mem_size, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
return UnknownError(
absl::StrCat("Failed to get info CL_KERNEL_PRIVATE_MEM_SIZE ",
CLErrorCodeToString(error_code)));
}
*result = static_cast<int>(private_mem_size);
return absl::OkStatus();
return OkStatus();
}
} // namespace
@ -82,17 +82,17 @@ CLKernel& CLKernel::operator=(CLKernel&& kernel) {
CLKernel::~CLKernel() { Release(); }
absl::Status CLKernel::ReInit() const {
Status CLKernel::ReInit() const {
clReleaseKernel(kernel_);
cl_kernel* kern_ptr = const_cast<cl_kernel*>(&kernel_);
int error_code;
*kern_ptr = clCreateKernel(program_, function_name_.c_str(), &error_code);
if (!kernel_ || error_code != CL_SUCCESS) {
*kern_ptr = nullptr;
return absl::UnknownError(absl::StrCat("Failed to create ", function_name_,
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to create ", function_name_,
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
void CLKernel::Release() {
@ -103,16 +103,16 @@ void CLKernel::Release() {
}
}
absl::Status CLKernel::CreateFromProgram(const CLProgram& program,
const std::string& function_name) {
Status CLKernel::CreateFromProgram(const CLProgram& program,
const std::string& function_name) {
int error_code;
function_name_ = function_name;
kernel_ =
clCreateKernel(program.program(), function_name.c_str(), &error_code);
if (!kernel_ || error_code != CL_SUCCESS) {
kernel_ = nullptr;
return absl::UnknownError(absl::StrCat("Failed to create ", function_name,
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to create ", function_name,
CLErrorCodeToString(error_code)));
}
program_ = program.program();
@ -122,64 +122,64 @@ absl::Status CLKernel::CreateFromProgram(const CLProgram& program,
&private_memory_size_));
RETURN_IF_ERROR(GetKernelMaxWorkGroupSize(kernel_, program.GetDeviceId(),
&max_work_group_size_));
return absl::OkStatus();
return OkStatus();
}
absl::Status CLKernel::SetMemory(int index, cl_mem memory) {
Status CLKernel::SetMemory(int index, cl_mem memory) {
return SetBytes(index, &memory, sizeof(cl_mem));
}
absl::Status CLKernel::SetMemoryAuto(cl_mem memory) {
Status CLKernel::SetMemoryAuto(cl_mem memory) {
return SetBytesAuto(&memory, sizeof(cl_mem));
}
absl::Status CLKernel::SetBytes(int index, const void* ptr, int length) const {
Status CLKernel::SetBytes(int index, const void* ptr, int length) const {
const int error_code = clSetKernelArg(kernel_, index, length, ptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to set kernel arguments - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CLKernel::SetBytesAuto(const void* ptr, int length) {
Status CLKernel::SetBytesAuto(const void* ptr, int length) {
const int error_code = clSetKernelArg(kernel_, binding_counter_, length, ptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", binding_counter_, ")"));
return UnknownError(absl::StrCat("Failed to set kernel arguments - ",
CLErrorCodeToString(error_code),
"(at index - ", binding_counter_, ")"));
}
binding_counter_++;
return absl::OkStatus();
return OkStatus();
}
template <>
absl::Status CLKernel::SetBytes<FLT>(int index, const FLT& value) const {
Status CLKernel::SetBytes<FLT>(int index, const FLT& value) const {
return SetBytes(index, value.GetData(), value.GetSize());
}
template <>
absl::Status CLKernel::SetBytes<FLT2>(int index, const FLT2& value) const {
Status CLKernel::SetBytes<FLT2>(int index, const FLT2& value) const {
return SetBytes(index, value.GetData(), value.GetSize());
}
template <>
absl::Status CLKernel::SetBytes<FLT4>(int index, const FLT4& value) const {
Status CLKernel::SetBytes<FLT4>(int index, const FLT4& value) const {
return SetBytes(index, value.GetData(), value.GetSize());
}
template <>
absl::Status CLKernel::SetBytesAuto<FLT>(const FLT& value) {
Status CLKernel::SetBytesAuto<FLT>(const FLT& value) {
return SetBytesAuto(value.GetData(), value.GetSize());
}
template <>
absl::Status CLKernel::SetBytesAuto<FLT2>(const FLT2& value) {
Status CLKernel::SetBytesAuto<FLT2>(const FLT2& value) {
return SetBytesAuto(value.GetData(), value.GetSize());
}
template <>
absl::Status CLKernel::SetBytesAuto<FLT4>(const FLT4& value) {
Status CLKernel::SetBytesAuto<FLT4>(const FLT4& value) {
return SetBytesAuto(value.GetData(), value.GetSize());
}

View File

@ -48,17 +48,17 @@ class CLKernel {
cl_kernel kernel() const { return kernel_; }
absl::Status CreateFromProgram(const CLProgram& program,
const std::string& function_name);
Status CreateFromProgram(const CLProgram& program,
const std::string& function_name);
absl::Status SetMemory(int index, cl_mem memory);
absl::Status SetMemoryAuto(cl_mem memory);
Status SetMemory(int index, cl_mem memory);
Status SetMemoryAuto(cl_mem memory);
template <typename T>
absl::Status SetBytes(int index, const T& value) const {
Status SetBytes(int index, const T& value) const {
return SetBytes(index, static_cast<const void*>(&value), sizeof(T));
}
template <typename T>
absl::Status SetBytesAuto(const T& value) {
Status SetBytesAuto(const T& value) {
return SetBytesAuto(static_cast<const void*>(&value), sizeof(T));
}
@ -69,12 +69,12 @@ class CLKernel {
// Do not use this function
// workaround for Mali memory leak
absl::Status ReInit() const;
Status ReInit() const;
private:
void Release();
absl::Status SetBytes(int index, const void* ptr, int length) const;
absl::Status SetBytesAuto(const void* ptr, int length);
Status SetBytes(int index, const void* ptr, int length) const;
Status SetBytesAuto(const void* ptr, int length);
int private_memory_size_;
int max_work_group_size_;
@ -87,22 +87,22 @@ class CLKernel {
};
template <>
absl::Status CLKernel::SetBytes<FLT>(int index, const FLT& value) const;
Status CLKernel::SetBytes<FLT>(int index, const FLT& value) const;
template <>
absl::Status CLKernel::SetBytes<FLT2>(int index, const FLT2& value) const;
Status CLKernel::SetBytes<FLT2>(int index, const FLT2& value) const;
template <>
absl::Status CLKernel::SetBytes<FLT4>(int index, const FLT4& value) const;
Status CLKernel::SetBytes<FLT4>(int index, const FLT4& value) const;
template <>
absl::Status CLKernel::SetBytesAuto<FLT>(const FLT& value);
Status CLKernel::SetBytesAuto<FLT>(const FLT& value);
template <>
absl::Status CLKernel::SetBytesAuto<FLT2>(const FLT2& value);
Status CLKernel::SetBytesAuto<FLT2>(const FLT2& value);
template <>
absl::Status CLKernel::SetBytesAuto<FLT4>(const FLT4& value);
Status CLKernel::SetBytesAuto<FLT4>(const FLT4& value);
} // namespace cl
} // namespace gpu

View File

@ -49,29 +49,28 @@ std::string GetProgramBuildInfo(cl_program program, cl_device_id id,
return result;
}
absl::Status GetBinarySize(cl_program program, size_t* binary_size) {
Status GetBinarySize(cl_program program, size_t* binary_size) {
cl_int error_code = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
sizeof(size_t), binary_size, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to get program binary size - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to get program binary size - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status BuildProgram(cl_program program, const CLDevice& device,
const std::string& compiler_options) {
Status BuildProgram(cl_program program, const CLDevice& device,
const std::string& compiler_options) {
const int error_code = clBuildProgram(
program, 0, nullptr, compiler_options.c_str(), nullptr, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
return UnknownError(absl::StrCat(
"Failed to build program executable - ",
CLErrorCodeToString(error_code),
GetProgramBuildInfo(program, device.id(), CL_PROGRAM_BUILD_LOG)));
}
return absl::OkStatus();
return OkStatus();
}
std::string CompilerOptionToString(const CLDevice& device,
@ -134,7 +133,7 @@ void CLProgram::Release() {
}
}
absl::Status CLProgram::GetBinary(std::vector<uint8_t>* result) const {
Status CLProgram::GetBinary(std::vector<uint8_t>* result) const {
size_t binary_size;
RETURN_IF_ERROR(GetBinarySize(program_, &binary_size));
result->resize(result->size() + binary_size);
@ -142,36 +141,35 @@ absl::Status CLProgram::GetBinary(std::vector<uint8_t>* result) const {
cl_int error_code = clGetProgramInfo(program_, CL_PROGRAM_BINARIES,
binary_size, &binary_ptr, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat("Failed to get program binary - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to get program binary - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateCLProgram(const std::string& code,
const std::string& compiler_options,
const CLContext& context, const CLDevice& device,
CLProgram* result) {
Status CreateCLProgram(const std::string& code,
const std::string& compiler_options,
const CLContext& context, const CLDevice& device,
CLProgram* result) {
int error_code;
const char* source = code.c_str();
cl_program program = clCreateProgramWithSource(context.context(), 1, &source,
nullptr, &error_code);
if (!program || error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to create compute program - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to create compute program - ",
CLErrorCodeToString(error_code)));
}
*result = CLProgram(program, device.id());
RETURN_IF_ERROR(BuildProgram(program, device, compiler_options));
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateCLProgramFromBinary(const CLContext& context,
const CLDevice& device,
absl::Span<const uint8_t> binary,
CLProgram* result) {
Status CreateCLProgramFromBinary(const CLContext& context,
const CLDevice& device,
absl::Span<const uint8_t> binary,
CLProgram* result) {
cl_int binary_status;
cl_int error_code;
cl_device_id devices_list[] = {device.id()};
@ -181,13 +179,13 @@ absl::Status CreateCLProgramFromBinary(const CLContext& context,
context.context(), 1, devices_list, &binary_size, &binary_pointer,
&binary_status, &error_code);
if (binary_status != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
return UnknownError(absl::StrCat(
"Something wrong with binary after clCreateProgramWithBinary - ",
binary_status));
}
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat("Failed to create program - ",
CLErrorCodeToString(error_code)));
return UnknownError(absl::StrCat("Failed to create program - ",
CLErrorCodeToString(error_code)));
}
*result = CLProgram(program, device.id());
return BuildProgram(program, device, "");

View File

@ -68,7 +68,7 @@ class CLProgram {
// was created using clCreateProgramWithBinary.
cl_device_id GetDeviceId() const { return device_id_; }
absl::Status GetBinary(std::vector<uint8_t>* result) const;
Status GetBinary(std::vector<uint8_t>* result) const;
private:
void Release();
@ -79,15 +79,15 @@ class CLProgram {
cl_device_id device_id_ = nullptr;
};
absl::Status CreateCLProgram(const std::string& code,
const std::string& compiler_options,
const CLContext& context, const CLDevice& device,
CLProgram* result);
Status CreateCLProgram(const std::string& code,
const std::string& compiler_options,
const CLContext& context, const CLDevice& device,
CLProgram* result);
absl::Status CreateCLProgramFromBinary(const CLContext& context,
const CLDevice& device,
absl::Span<const uint8_t> binary,
CLProgram* result);
Status CreateCLProgramFromBinary(const CLContext& context,
const CLDevice& device,
absl::Span<const uint8_t> binary,
CLProgram* result);
} // namespace cl
} // namespace gpu

View File

@ -21,15 +21,15 @@ namespace tflite {
namespace gpu {
namespace cl {
absl::Status EglSync::NewFence(EGLDisplay display, EglSync* sync) {
Status EglSync::NewFence(EGLDisplay display, EglSync* sync) {
EGLSyncKHR egl_sync;
RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(eglCreateSyncKHR, &egl_sync, display,
EGL_SYNC_FENCE_KHR, nullptr));
if (egl_sync == EGL_NO_SYNC_KHR) {
return absl::InternalError("Returned empty KHR EGL sync");
return InternalError("Returned empty KHR EGL sync");
}
*sync = EglSync(display, egl_sync);
return absl::OkStatus();
return OkStatus();
}
EglSync& EglSync::operator=(EglSync&& sync) {
@ -48,23 +48,22 @@ void EglSync::Invalidate() {
}
}
absl::Status EglSync::ServerWait() {
Status EglSync::ServerWait() {
EGLint result;
RETURN_IF_ERROR(
TFLITE_GPU_CALL_EGL(eglWaitSyncKHR, &result, display_, sync_, 0));
return result == EGL_TRUE ? absl::OkStatus()
: absl::InternalError("eglWaitSync failed");
return result == EGL_TRUE ? OkStatus() : InternalError("eglWaitSync failed");
}
absl::Status EglSync::ClientWait() {
Status EglSync::ClientWait() {
EGLint result;
// TODO(akulik): make it active wait for better performance
RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(eglClientWaitSyncKHR, &result, display_,
sync_, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR,
EGL_FOREVER_KHR));
return result == EGL_CONDITION_SATISFIED_KHR
? absl::OkStatus()
: absl::InternalError("eglClientWaitSync failed");
? OkStatus()
: InternalError("eglClientWaitSync failed");
}
} // namespace cl

View File

@ -32,7 +32,7 @@ class EglSync {
// flushed.
//
// Depends on EGL_KHR_fence_sync extension.
static absl::Status NewFence(EGLDisplay display, EglSync* sync);
static Status NewFence(EGLDisplay display, EglSync* sync);
// Creates invalid object.
EglSync() : EglSync(EGL_NO_DISPLAY, EGL_NO_SYNC_KHR) {}
@ -50,10 +50,10 @@ class EglSync {
// Causes GPU to block and wait until this sync has been signaled.
// This call does not block and returns immediately.
absl::Status ServerWait();
Status ServerWait();
// Causes CPU to block and wait until this sync has been signaled.
absl::Status ClientWait();
Status ClientWait();
// Returns the EGLDisplay on which this instance was created.
EGLDisplay display() const { return display_; }

View File

@ -26,7 +26,6 @@ namespace tflite {
namespace gpu {
namespace cl {
namespace {
std::string GetKernelOneLayerTextureArray() {
return R"(
@ -44,12 +43,12 @@ __kernel void main_function(__write_only image2d_array_t dst) {
// texture, we will get zeroes instead of actual values.
// The same kernel will work, if we use texture array with more than one layer.
// With help of this code we can detect this bug.
absl::Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
bool* result) {
Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
bool* result) {
// No bug on Adreno 6xx
if (env->device().GetInfo().adreno_info.gpu_version >= 600) {
*result = true;
return absl::OkStatus();
return OkStatus();
}
CLKernel kernel;
RETURN_IF_ERROR(env->program_cache()->GetOrCreateCLKernel(
@ -76,12 +75,12 @@ absl::Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
break;
}
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateEnvironment(Environment* result, bool shared,
cl_context_properties egl_context,
cl_context_properties egl_display) {
Status CreateEnvironment(Environment* result, bool shared,
cl_context_properties egl_context,
cl_context_properties egl_display) {
CLDevice gpu;
RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
@ -108,9 +107,8 @@ absl::Status CreateEnvironment(Environment* result, bool shared,
}
}
return absl::OkStatus();
return OkStatus();
}
} // namespace
Environment::Environment(CLDevice&& device, CLContext&& context,
@ -139,7 +137,7 @@ Environment& Environment::operator=(Environment&& environment) {
return *this;
}
absl::Status Environment::Init() {
Status Environment::Init() {
if (device().IsAdreno() && device().SupportsTextureArray()) {
bool supports_one_layer;
RETURN_IF_ERROR(
@ -148,7 +146,7 @@ absl::Status Environment::Init() {
GetDevicePtr()->DisableOneLayerTextureArray();
}
}
return absl::OkStatus();
return OkStatus();
}
void Environment::SetHighPerformance() const {
@ -268,7 +266,7 @@ TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
return TensorStorageType::BUFFER;
}
absl::Status CreateEnvironment(Environment* result) {
Status CreateEnvironment(Environment* result) {
CLDevice gpu;
RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));

View File

@ -57,7 +57,7 @@ class Environment {
std::vector<TensorStorageType> GetSupportedStorages() const;
bool IsSupported(TensorStorageType storage_type) const;
absl::Status Init();
Status Init();
void SetHighPerformance() const;
void SetDefaultPerformance() const;
@ -75,7 +75,7 @@ TensorStorageType GetFastestStorageType(const CLDevice& gpu);
TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
const CLDevice& gpu);
absl::Status CreateEnvironment(Environment* result);
Status CreateEnvironment(Environment* result);
} // namespace cl
} // namespace gpu

View File

@ -41,11 +41,10 @@ PFNEGLCREATESYNCPROC g_eglCreateSync = nullptr;
} // namespace
absl::Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
EglSync* sync) {
Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
EglSync* sync) {
if (!IsEglSyncFromClEventSupported()) {
return absl::UnimplementedError(
"CreateEglSyncFromClEvent is not supported");
return UnimplementedError("CreateEglSyncFromClEvent is not supported");
}
EGLSync egl_sync;
const EGLAttrib attributes[] = {EGL_CL_EVENT_HANDLE,
@ -53,10 +52,10 @@ absl::Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(g_eglCreateSync, &egl_sync, display,
EGL_SYNC_CL_EVENT, attributes));
if (egl_sync == EGL_NO_SYNC) {
return absl::InternalError("Returned empty EGL sync");
return InternalError("Returned empty EGL sync");
}
*sync = EglSync(display, egl_sync);
return absl::OkStatus();
return OkStatus();
}
bool IsEglSyncFromClEventSupported() {
@ -74,54 +73,52 @@ bool IsEglSyncFromClEventSupported() {
return supported;
}
absl::Status CreateClEventFromEglSync(cl_context context,
const EglSync& egl_sync, CLEvent* event) {
Status CreateClEventFromEglSync(cl_context context, const EglSync& egl_sync,
CLEvent* event) {
cl_int error_code;
cl_event new_event = clCreateEventFromEGLSyncKHR(
context, egl_sync.sync(), egl_sync.display(), &error_code);
if (error_code != CL_SUCCESS) {
return absl::InternalError(
return InternalError(
absl::StrCat("Unable to create CL sync from EGL sync. ",
CLErrorCodeToString(error_code)));
}
*event = CLEvent(new_event);
return absl::OkStatus();
return OkStatus();
}
bool IsClEventFromEglSyncSupported(const CLDevice& device) {
return device.SupportsExtension("cl_khr_egl_event");
}
absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
AccessType access_type,
CLContext* context, CLMemory* memory) {
Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id, AccessType access_type,
CLContext* context, CLMemory* memory) {
cl_int error_code;
auto mem = clCreateFromGLBuffer(context->context(), ToClMemFlags(access_type),
gl_ssbo_id, &error_code);
if (error_code != CL_SUCCESS) {
return absl::InternalError(
return InternalError(
absl::StrCat("Unable to acquire CL buffer from GL buffer. ",
CLErrorCodeToString(error_code)));
}
*memory = CLMemory(mem, true);
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
GLuint texture_id,
AccessType access_type,
CLContext* context, CLMemory* memory) {
Status CreateClMemoryFromGlTexture(GLenum texture_target, GLuint texture_id,
AccessType access_type, CLContext* context,
CLMemory* memory) {
cl_int error_code;
auto mem =
clCreateFromGLTexture(context->context(), ToClMemFlags(access_type),
texture_target, 0, texture_id, &error_code);
if (error_code != CL_SUCCESS) {
return absl::InternalError(
return InternalError(
absl::StrCat("Unable to create CL buffer from GL texture. ",
CLErrorCodeToString(error_code)));
}
*memory = CLMemory(mem, true);
return absl::OkStatus();
return OkStatus();
}
bool IsGlSharingSupported(const CLDevice& device) {
@ -131,18 +128,19 @@ bool IsGlSharingSupported(const CLDevice& device) {
AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
absl::Status AcquiredGlObjects::Acquire(
const std::vector<cl_mem>& memory, cl_command_queue queue,
const std::vector<cl_event>& wait_events, CLEvent* acquire_event,
AcquiredGlObjects* objects) {
Status AcquiredGlObjects::Acquire(const std::vector<cl_mem>& memory,
cl_command_queue queue,
const std::vector<cl_event>& wait_events,
CLEvent* acquire_event,
AcquiredGlObjects* objects) {
if (!memory.empty()) {
cl_event new_event;
cl_int error_code = clEnqueueAcquireGLObjects(
queue, memory.size(), memory.data(), wait_events.size(),
wait_events.data(), acquire_event ? &new_event : nullptr);
if (error_code != CL_SUCCESS) {
return absl::InternalError(absl::StrCat("Unable to acquire GL object. ",
CLErrorCodeToString(error_code)));
return InternalError(absl::StrCat("Unable to acquire GL object. ",
CLErrorCodeToString(error_code)));
}
if (acquire_event) {
*acquire_event = CLEvent(new_event);
@ -150,19 +148,19 @@ absl::Status AcquiredGlObjects::Acquire(
clFlush(queue);
}
*objects = AcquiredGlObjects(memory, queue);
return absl::OkStatus();
return OkStatus();
}
absl::Status AcquiredGlObjects::Release(
const std::vector<cl_event>& wait_events, CLEvent* release_event) {
Status AcquiredGlObjects::Release(const std::vector<cl_event>& wait_events,
CLEvent* release_event) {
if (queue_ && !memory_.empty()) {
cl_event new_event;
cl_int error_code = clEnqueueReleaseGLObjects(
queue_, memory_.size(), memory_.data(), wait_events.size(),
wait_events.data(), release_event ? &new_event : nullptr);
if (error_code != CL_SUCCESS) {
return absl::InternalError(absl::StrCat("Unable to release GL object. ",
CLErrorCodeToString(error_code)));
return InternalError(absl::StrCat("Unable to release GL object. ",
CLErrorCodeToString(error_code)));
}
if (release_event) {
*release_event = CLEvent(new_event);
@ -170,7 +168,7 @@ absl::Status AcquiredGlObjects::Release(
clFlush(queue_);
queue_ = nullptr;
}
return absl::OkStatus();
return OkStatus();
}
GlInteropFabric::GlInteropFabric(EGLDisplay egl_display,
@ -194,9 +192,9 @@ void GlInteropFabric::UnregisterMemory(cl_mem memory) {
}
}
absl::Status GlInteropFabric::Start() {
Status GlInteropFabric::Start() {
if (!is_enabled()) {
return absl::OkStatus();
return OkStatus();
}
// In GL-CL interoperability, we need to make sure GL finished processing of
@ -237,9 +235,9 @@ absl::Status GlInteropFabric::Start() {
nullptr, &gl_objects_);
}
absl::Status GlInteropFabric::Finish() {
Status GlInteropFabric::Finish() {
if (!is_enabled()) {
return absl::OkStatus();
return OkStatus();
}
RETURN_IF_ERROR(gl_objects_.Release({}, &outbound_event_));
@ -260,7 +258,7 @@ absl::Status GlInteropFabric::Finish() {
// This slow sync is the only working solution right now. We have to debug why
// above version is not working fast and reliable.
outbound_event_.Wait();
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -39,8 +39,8 @@ namespace cl {
// returned sync and could be safely destroyed.
//
// Depends on EGL 1.5.
absl::Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
EglSync* sync);
Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
EglSync* sync);
// Returns true if 'CreateEglSyncFromClEvent' is supported.
bool IsEglSyncFromClEventSupported();
@ -48,22 +48,20 @@ bool IsEglSyncFromClEventSupported();
// Creates CL event from EGL sync.
// Created event could only be consumed by AcquiredGlObject::Acquire call as
// a 'wait_event'.
absl::Status CreateClEventFromEglSync(cl_context context,
const EglSync& egl_sync, CLEvent* event);
Status CreateClEventFromEglSync(cl_context context, const EglSync& egl_sync,
CLEvent* event);
// Returns true if 'CreateClEventFromEglSync' is supported.
bool IsClEventFromEglSyncSupported(const CLDevice& device);
// Creates new CL memory object from OpenGL buffer.
absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
AccessType access_type,
CLContext* context, CLMemory* memory);
Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id, AccessType access_type,
CLContext* context, CLMemory* memory);
// Creates new CL memory object from OpenGL texture.
absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
GLuint texture_id,
AccessType access_type,
CLContext* context, CLMemory* memory);
Status CreateClMemoryFromGlTexture(GLenum texture_target, GLuint texture_id,
AccessType access_type, CLContext* context,
CLMemory* memory);
// Returns true if GL objects could be shared with OpenCL context.
bool IsGlSharingSupported(const CLDevice& device);
@ -83,16 +81,16 @@ class AcquiredGlObjects {
// CreateClMemoryFromGlBuffer or CreateClMemoryFromGlTexture calls.
// If 'acquire_event' is not nullptr, it will be signared once acquisition is
// complete.
static absl::Status Acquire(const std::vector<cl_mem>& memory,
cl_command_queue queue,
const std::vector<cl_event>& wait_events,
CLEvent* acquire_event /* optional */,
AcquiredGlObjects* objects);
static Status Acquire(const std::vector<cl_mem>& memory,
cl_command_queue queue,
const std::vector<cl_event>& wait_events,
CLEvent* acquire_event /* optional */,
AcquiredGlObjects* objects);
// Releases OpenCL memory back to OpenGL context. If 'release_event' is not
// nullptr, it will be signalled once release is complete.
absl::Status Release(const std::vector<cl_event>& wait_events,
CLEvent* release_event /* optional */);
Status Release(const std::vector<cl_event>& wait_events,
CLEvent* release_event /* optional */);
private:
AcquiredGlObjects(const std::vector<cl_mem>& memory, cl_command_queue queue)
@ -110,10 +108,10 @@ class GlInteropFabric {
// Ensures proper GL->CL synchronization is in place before
// GL objects that are mapped to CL objects are used.
absl::Status Start();
Status Start();
// Puts appropriate CL->GL synchronization after all work is complete.
absl::Status Finish();
Status Finish();
// Registers memory to be used from GL context. Such CL memory object must
// be created with CreateClMemoryFromGlBuffer or CreateClMemoryFromGlTexture

View File

@ -87,8 +87,8 @@ class Delegate {
}
}
absl::Status Prepare(TfLiteContext* context,
const TfLiteDelegateParams* delegate_params) {
Status Prepare(TfLiteContext* context,
const TfLiteDelegateParams* delegate_params) {
// Extract TFLite delegate execution plan from the context and convert it
// into FlowGraph32.
GraphFloat32 graph;
@ -98,7 +98,7 @@ class Delegate {
NullTransformationReporter reporter;
ModelTransformer transformer(&graph, &reporter);
if (!ApplyGeneralTransformations(&transformer)) {
return absl::InternalError("Graph general transformations failed");
return InternalError("Graph general transformations failed");
}
InferenceEnvironmentOptions env_options;
@ -108,7 +108,7 @@ class Delegate {
options_.serialized_binary_cache_data,
options_.serialized_binary_cache_size};
InferenceEnvironmentProperties properties;
absl::Status status =
Status status =
NewInferenceEnvironment(env_options, &environment_, &properties);
if (!properties.is_opencl_available) {
context->ReportError(context,
@ -200,7 +200,7 @@ class Delegate {
return builder->Build(&runner_);
}
absl::Status SetInputsAndOutputs(TfLiteContext* context) {
Status SetInputsAndOutputs(TfLiteContext* context) {
int i = 0;
for (auto index : input_indices_) {
RETURN_IF_ERROR(
@ -211,10 +211,10 @@ class Delegate {
RETURN_IF_ERROR(
runner_->SetOutputObject(i++, GetTensorObject(index, context)));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status Invoke(TfLiteContext* context) {
Status Invoke(TfLiteContext* context) {
RETURN_IF_ERROR(SetInputsAndOutputs(context));
return runner_->Run();
}
@ -310,7 +310,7 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
const auto status = gpu_delegate->Prepare(context, params);
if (!status.ok()) {
context->ReportError(context, "TfLiteGpuDelegate Init: %s",
std::string(status.message()).c_str());
status.error_message().c_str());
return nullptr;
}
return gpu_delegate;
@ -335,7 +335,7 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
const auto status = GetDelegate(node)->Invoke(context);
if (!status.ok()) {
context->ReportError(context, "TfLiteGpuDelegate Invoke: %s",
std::string(status.message()).c_str());
status.error_message().c_str());
return kTfLiteError;
}
return kTfLiteOk;

View File

@ -169,9 +169,9 @@ CLNode& CLNode::operator=(CLNode&& node) {
return *this;
}
absl::Status InferenceContext::InitFromGraph(
const CreateInferenceInfo& create_info, const GraphFloat32& graph,
Environment* env) {
Status InferenceContext::InitFromGraph(const CreateInferenceInfo& create_info,
const GraphFloat32& graph,
Environment* env) {
CreationContext creation_context;
creation_context.device = env->GetDevicePtr();
creation_context.context = &env->context();
@ -206,15 +206,15 @@ absl::Status InferenceContext::InitFromGraph(
tuning_parameters.tuning_type = TuningType::FAST;
}
RETURN_IF_ERROR(Tune(tuning_parameters));
return absl::OkStatus();
return OkStatus();
}
absl::Status InferenceContext::InitFromGraphWithTransforms(
Status InferenceContext::InitFromGraphWithTransforms(
const CreateInferenceInfo& create_info, GraphFloat32* graph,
Environment* env) {
RETURN_IF_ERROR(RunGraphTransforms(graph));
RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env));
return absl::OkStatus();
return OkStatus();
}
void InferenceContext::CopyInAndOutIds(const GraphFloat32& graph) {
@ -258,7 +258,7 @@ void InferenceContext::ReserveGraphTensors(
tensor_reserver_.SetNext(max_id + 1);
}
absl::Status InferenceContext::ConvertOperations(
Status InferenceContext::ConvertOperations(
const CreationContext& creation_context, const GraphFloat32& graph,
ModelHints hints) {
std::vector<Node*> graph_nodes = graph.nodes();
@ -343,7 +343,7 @@ absl::Status InferenceContext::ConvertOperations(
}
}
return absl::OkStatus();
return OkStatus();
}
void InferenceContext::Merge() {
@ -424,15 +424,15 @@ void InferenceContext::GetUsages(
}
}
absl::Status InferenceContext::AllocateMemory(const CLDevice& device,
CLContext* context) {
Status InferenceContext::AllocateMemory(const CLDevice& device,
CLContext* context) {
RETURN_IF_ERROR(AllocateMemoryForBuffers(device, context));
RETURN_IF_ERROR(AllocateMemoryForStrongShapes(device, context));
return absl::OkStatus();
return OkStatus();
}
absl::Status InferenceContext::AllocateMemoryForBuffers(const CLDevice& device,
CLContext* context) {
Status InferenceContext::AllocateMemoryForBuffers(const CLDevice& device,
CLContext* context) {
std::map<ValueId, int2> buffer_usages;
GetUsages(
[](const TensorDescriptor& t) { return IsBufferBased(t.storage_type); },
@ -480,11 +480,11 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(const CLDevice& device,
created_tensors[tensor_index] = true;
}
}
return absl::OkStatus();
return OkStatus();
}
absl::Status InferenceContext::AllocateMemoryForStrongShapes(
const CLDevice& device, CLContext* context) {
Status InferenceContext::AllocateMemoryForStrongShapes(const CLDevice& device,
CLContext* context) {
std::map<ValueId, int2> usages;
GetUsages(
[](const TensorDescriptor& t) { return !IsBufferBased(t.storage_type); },
@ -517,7 +517,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes(
}
}
}
return absl::OkStatus();
return OkStatus();
}
void InferenceContext::BindMemoryToOperations() {
@ -539,22 +539,21 @@ void InferenceContext::BindMemoryToOperations() {
}
}
absl::Status InferenceContext::Compile(
const CreationContext& creation_context) {
Status InferenceContext::Compile(const CreationContext& creation_context) {
for (auto& node : nodes_) {
RETURN_IF_ERROR(node.operations[0]->Compile(creation_context));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status InferenceContext::Tune(const TuningParameters& tuning_parameters) {
Status InferenceContext::Tune(const TuningParameters& tuning_parameters) {
for (auto& node : nodes_) {
RETURN_IF_ERROR(node.operations[0]->Tune(tuning_parameters));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status InferenceContext::AddToQueue(CLCommandQueue* queue) {
Status InferenceContext::AddToQueue(CLCommandQueue* queue) {
if (need_manual_release_) {
if (prev_enqueue_start_point_.is_valid()) {
prev_enqueue_start_point_.Wait();
@ -572,11 +571,11 @@ absl::Status InferenceContext::AddToQueue(CLCommandQueue* queue) {
if (need_flush_) {
clFlush(queue->queue());
}
return absl::OkStatus();
return OkStatus();
}
absl::Status InferenceContext::Profile(ProfilingCommandQueue* queue,
ProfilingInfo* result) {
Status InferenceContext::Profile(ProfilingCommandQueue* queue,
ProfilingInfo* result) {
queue->ResetMeasurements();
for (auto& node : nodes_) {
queue->SetEventsLabel(node.name);
@ -584,7 +583,7 @@ absl::Status InferenceContext::Profile(ProfilingCommandQueue* queue,
}
RETURN_IF_ERROR(queue->WaitForCompletion());
*result = queue->GetProfilingInfo();
return absl::OkStatus();
return OkStatus();
}
uint64_t InferenceContext::GetSizeOfMemoryAllocatedForIntermediateTensors()
@ -609,15 +608,13 @@ Tensor* InferenceContext::GetTensor(ValueId id) {
}
}
absl::Status InferenceContext::SetInputTensor(ValueId id,
const TensorFloat32& tensor,
CLCommandQueue* queue) {
Status InferenceContext::SetInputTensor(ValueId id, const TensorFloat32& tensor,
CLCommandQueue* queue) {
return GetTensor(id)->WriteData(queue, tensor);
}
absl::Status InferenceContext::GetOutputTensor(ValueId id,
CLCommandQueue* queue,
TensorFloat32* result) {
Status InferenceContext::GetOutputTensor(ValueId id, CLCommandQueue* queue,
TensorFloat32* result) {
const auto& gpu_tensor = *GetTensor(id);
const auto dst_shape = BHWC(gpu_tensor.Batch(), gpu_tensor.Height(),
gpu_tensor.Width(), gpu_tensor.Channels());
@ -627,17 +624,17 @@ absl::Status InferenceContext::GetOutputTensor(ValueId id,
return gpu_tensor.ReadData(queue, result);
}
absl::Status RunGraphTransforms(GraphFloat32* graph) {
Status RunGraphTransforms(GraphFloat32* graph) {
auto merge_padding_transform = NewMergePaddingWithAdd();
auto add_bias_transform = NewAddBias();
ModelTransformer transformer(graph, /*reporter=*/nullptr);
if (!transformer.Apply("add_bias", add_bias_transform.get())) {
return absl::InternalError("Invalid add_bias transform");
return InternalError("Invalid add_bias transform");
}
if (!transformer.Apply("merge_padding", merge_padding_transform.get())) {
return absl::InternalError("Invalid merge_padding transform");
return InternalError("Invalid merge_padding transform");
}
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -65,55 +65,53 @@ class InferenceContext {
TensorStorageType storage_type;
ModelHints hints;
};
absl::Status InitFromGraph(const CreateInferenceInfo& create_info,
const GraphFloat32& graph, Environment* env);
Status InitFromGraph(const CreateInferenceInfo& create_info,
const GraphFloat32& graph, Environment* env);
// Applies OpenCL-specific transformations to the graph before the
// initialization. These transformations are either impossible or useless in
// other backends.
absl::Status InitFromGraphWithTransforms(
const CreateInferenceInfo& create_info, GraphFloat32* graph,
Environment* env);
Status InitFromGraphWithTransforms(const CreateInferenceInfo& create_info,
GraphFloat32* graph, Environment* env);
absl::Status AddToQueue(CLCommandQueue* queue);
absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
Status AddToQueue(CLCommandQueue* queue);
Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
// for profiling and memory statistics
uint64_t GetSizeOfMemoryAllocatedForIntermediateTensors() const;
absl::Status SetInputTensor(ValueId id, const TensorFloat32& tensor,
CLCommandQueue* queue);
Status SetInputTensor(ValueId id, const TensorFloat32& tensor,
CLCommandQueue* queue);
// It will work only with input/output tensor ids. For all other ids we don't
// have any guarantees.
Tensor* GetTensor(ValueId id);
absl::Status GetOutputTensor(ValueId id, CLCommandQueue* queue,
TensorFloat32* result);
Status GetOutputTensor(ValueId id, CLCommandQueue* queue,
TensorFloat32* result);
private:
void CopyInAndOutIds(const GraphFloat32& graph);
absl::Status ConvertOperations(const CreationContext& creation_context,
const GraphFloat32& graph, ModelHints hints);
Status ConvertOperations(const CreationContext& creation_context,
const GraphFloat32& graph, ModelHints hints);
void CreateLinks();
void ReserveGraphTensors(const CreateInferenceInfo& create_info,
const CreationContext& creation_context,
const GraphFloat32& graph);
void Merge();
absl::Status AllocateMemory(const CLDevice& device, CLContext* context);
Status AllocateMemory(const CLDevice& device, CLContext* context);
absl::Status AllocateMemoryForBuffers(const CLDevice& device,
CLContext* context);
Status AllocateMemoryForBuffers(const CLDevice& device, CLContext* context);
absl::Status AllocateMemoryForStrongShapes(const CLDevice& device,
CLContext* context);
Status AllocateMemoryForStrongShapes(const CLDevice& device,
CLContext* context);
// utility function
void GetUsages(const std::function<bool(const TensorDescriptor&)>& functor,
std::map<ValueId, int2>* usages);
void BindMemoryToOperations();
absl::Status Compile(const CreationContext& creation_context);
absl::Status Tune(const TuningParameters& tuning_parameters);
Status Compile(const CreationContext& creation_context);
Status Tune(const TuningParameters& tuning_parameters);
// performance hacks
bool need_flush_ = false;
@ -177,7 +175,7 @@ class InferenceContext {
};
// Runs OpenCL specific transforms for the graph.
absl::Status RunGraphTransforms(GraphFloat32* graph);
Status RunGraphTransforms(GraphFloat32* graph);
} // namespace cl
} // namespace gpu

View File

@ -143,17 +143,17 @@ std::string Add::GetArgsDeclaration() const {
return args;
}
absl::Status Add::BindArguments(CLKernel* kernel) {
Status Add::BindArguments(CLKernel* kernel) {
for (int i = 1; i < src_depthes_.size(); ++i) {
RETURN_IF_ERROR(kernel->SetMemoryAuto(src_[i]->GetMemoryPtr()));
}
for (int i = 1; i < src_depthes_.size(); ++i) {
RETURN_IF_ERROR(kernel->SetBytesAuto(src_[i]->GetWBatchedHSB()));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status Add::Compile(const CreationContext& creation_context) {
Status Add::Compile(const CreationContext& creation_context) {
const auto code = GetElementWiseCode(definition_, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,

View File

@ -36,7 +36,7 @@ class Add : public ElementwiseOperation {
Add(const OperationDef& definition, const std::vector<int>& channels,
int dst_channels);
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Add(Add&& operation);
@ -47,7 +47,7 @@ class Add : public ElementwiseOperation {
void SetLinkIndex(int index) override;
std::string GetCoreCode(const LinkingContext& context) const override;
std::string GetArgsDeclaration() const override;
absl::Status BindArguments(CLKernel* kernel) override;
Status BindArguments(CLKernel* kernel) override;
private:
std::string GetElementWiseCode(

View File

@ -21,17 +21,17 @@ namespace tflite {
namespace gpu {
namespace cl {
absl::Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation,
const std::vector<BHWC>& dst_sizes,
const std::vector<TensorFloat32*>& dst_cpu) {
Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation,
const std::vector<BHWC>& dst_sizes,
const std::vector<TensorFloat32*>& dst_cpu) {
const OperationDef& op_def = operation->GetDefinition();
std::vector<Tensor> src(src_cpu.size());
for (int i = 0; i < src_cpu.size(); ++i) {
auto src_shape = src_cpu[i].shape;
if (src_shape.b != 1 && !op_def.IsBatchSupported()) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"Layout doesn't have Batch dimension, but shape.b != 1");
}
RETURN_IF_ERROR(CreateTensor(*creation_context.context,
@ -45,7 +45,7 @@ absl::Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
for (int i = 0; i < dst_cpu.size(); ++i) {
auto dst_shape = dst_sizes[i];
if (dst_shape.b != 1 && !op_def.IsBatchSupported()) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"Layout doesn't have Batch dimension, but shape.b != 1");
}
RETURN_IF_ERROR(CreateTensor(*creation_context.context,
@ -64,22 +64,22 @@ absl::Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
dst_cpu[i]->data = std::vector<float>(dst_sizes[i].DimensionsProduct(), 0);
RETURN_IF_ERROR(dst[i].ReadData(creation_context.queue, dst_cpu[i]));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result) {
Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result) {
return ExecuteGPUOperation(
std::vector<TensorFloat32>{src_cpu}, creation_context, operation,
std::vector<BHWC>{dst_size}, std::vector<TensorFloat32*>{result});
}
absl::Status ExecuteGPUOperation(const TensorFloat32& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result) {
Status ExecuteGPUOperation(const TensorFloat32& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result) {
return ExecuteGPUOperation(std::vector<TensorFloat32>{src_cpu},
creation_context, operation, dst_size, result);
}

View File

@ -51,21 +51,21 @@ class OpenCLOperationTest : public ::testing::Test {
CreationContext creation_context_;
};
absl::Status ExecuteGPUOperation(const TensorFloat32& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result);
Status ExecuteGPUOperation(const TensorFloat32& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result);
absl::Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result);
Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation, const BHWC& dst_size,
TensorFloat32* result);
absl::Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation,
const std::vector<BHWC>& dst_sizes,
const std::vector<TensorFloat32*>& dst_cpu);
Status ExecuteGPUOperation(const std::vector<TensorFloat32>& src_cpu,
const CreationContext& creation_context,
GPUOperation* operation,
const std::vector<BHWC>& dst_sizes,
const std::vector<TensorFloat32*>& dst_cpu);
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -96,7 +96,7 @@ ConcatXY& ConcatXY::operator=(ConcatXY&& operation) {
return *this;
}
absl::Status ConcatXY::Compile(const CreationContext& creation_context) {
Status ConcatXY::Compile(const CreationContext& creation_context) {
const auto code =
GetConcatKernelCode(definition_, tensors_count_, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
@ -104,7 +104,7 @@ absl::Status ConcatXY::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status ConcatXY::BindArguments() {
Status ConcatXY::BindArguments() {
kernel_.ResetBindingCounter();
for (int i = 0; i < tensors_count_; ++i) {
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[i]->GetMemoryPtr()));
@ -122,7 +122,7 @@ absl::Status ConcatXY::BindArguments() {
y_offset += attr_.axis == Axis::HEIGHT ? height : 0;
}
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ConcatXY::GetGridSize() const {
@ -140,12 +140,12 @@ int3 ConcatXY::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConcatXY::Tune(const TuningParameters& params) {
Status ConcatXY::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status ConcatXY::AddToQueue(CLCommandQueue* queue) {
Status ConcatXY::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -31,10 +31,10 @@ class ConcatXY : public GPUOperation {
ConcatXY(const OperationDef& definition, const ConcatAttributes& attr,
int tensors_count)
: GPUOperation(definition), attr_(attr), tensors_count_(tensors_count) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConcatXY(ConcatXY&& operation);
@ -43,7 +43,7 @@ class ConcatXY : public GPUOperation {
ConcatXY& operator=(const ConcatXY&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
ConcatAttributes attr_;

View File

@ -25,8 +25,8 @@ limitations under the License.
namespace tflite {
namespace gpu {
namespace cl {
namespace {
namespace {
bool IsAllChannelsX4(const std::vector<int>& channels) {
for (int channel : channels) {
if (channel % 4 != 0) {
@ -146,7 +146,6 @@ std::string GetConcatKernelCode(
c += "}\n";
return c;
}
} // namespace
ConcatZ::ConcatZ(ConcatZ&& kernel)
@ -165,7 +164,7 @@ ConcatZ& ConcatZ::operator=(ConcatZ&& kernel) {
return *this;
}
absl::Status ConcatZ::Compile(const CreationContext& creation_context) {
Status ConcatZ::Compile(const CreationContext& creation_context) {
const auto code =
GetConcatKernelCode(definition_, channels_, linked_operations_);
std::vector<CompilerOptions> options;
@ -187,7 +186,7 @@ absl::Status ConcatZ::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status ConcatZ::BindArguments() {
Status ConcatZ::BindArguments() {
kernel_.ResetBindingCounter();
for (int i = 0; i < channels_.size(); ++i) {
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[i]->GetMemoryPtr()));
@ -198,7 +197,7 @@ absl::Status ConcatZ::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[i]->Slices()));
}
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ConcatZ::GetGridSize() const {
@ -208,12 +207,12 @@ int3 ConcatZ::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConcatZ::Tune(const TuningParameters& params) {
Status ConcatZ::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status ConcatZ::AddToQueue(CLCommandQueue* queue) {
Status ConcatZ::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -32,10 +32,10 @@ class ConcatZ : public GPUOperation {
public:
ConcatZ(const OperationDef& definition, const std::vector<int>& channels)
: GPUOperation(definition), channels_(channels) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConcatZ(ConcatZ&& kernel);
@ -44,7 +44,7 @@ class ConcatZ : public GPUOperation {
ConcatZ& operator=(const ConcatZ&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
std::vector<int> channels_;

View File

@ -76,7 +76,7 @@ Conv3D& Conv3D::operator=(Conv3D&& operation) {
return *this;
}
absl::Status Conv3D::Compile(const CreationContext& creation_context) {
Status Conv3D::Compile(const CreationContext& creation_context) {
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
const std::string code =
@ -92,7 +92,7 @@ absl::Status Conv3D::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Conv3D::BindArguments() {
Status Conv3D::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
if (conv_params_.AreWeightsBuffer()) {
@ -131,7 +131,7 @@ absl::Status Conv3D::BindArguments() {
IntegralDivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w)));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHDS()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHDS()));
return absl::OkStatus();
return OkStatus();
}
int3 Conv3D::GetGridSize() const {
@ -154,12 +154,12 @@ int3 Conv3D::GetGridSize() const {
conv_params_.work_group_size.z);
}
absl::Status Conv3D::Tune(const TuningParameters& params) {
Status Conv3D::Tune(const TuningParameters& params) {
if (conv_params_.weights_upload_type ==
WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP ||
conv_params_.weights_upload_type ==
WeightsUploadType::LOCAL_MEM_BY_THREADS) {
return absl::OkStatus();
return OkStatus();
}
if (conv_params_.work_group_launch_order[0] == 0 &&
conv_params_.work_group_launch_order[1] == 1 &&
@ -168,10 +168,10 @@ absl::Status Conv3D::Tune(const TuningParameters& params) {
return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
&conv_params_.work_group_size);
}
return absl::OkStatus();
return OkStatus();
}
absl::Status Conv3D::AddToQueue(CLCommandQueue* queue) {
Status Conv3D::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(),
conv_params_.work_group_size);
@ -903,9 +903,9 @@ Conv3D::ConvParams Conv3D::GuessBestParams(
x_kernel_is_1, y_kernel_is_1, z_kernel_is_1);
}
absl::Status CreateConv3D(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution3DAttributes& attr, Conv3D* result) {
Status CreateConv3D(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution3DAttributes& attr, Conv3D* result) {
*result = Conv3D(definition, attr, *creation_context.device);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}

View File

@ -39,9 +39,9 @@ namespace cl {
class Conv3D : public GPUOperation {
public:
Conv3D() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Conv3D(Conv3D&& operation);
@ -75,21 +75,21 @@ class Conv3D : public GPUOperation {
const CLDevice& device);
template <DataType T>
absl::Status UploadData(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
Status UploadData(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWDI, S>& weights,
absl::Span<T> dst);
friend absl::Status CreateConv3D(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution3DAttributes& attr,
Conv3D* result);
friend Status CreateConv3D(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution3DAttributes& attr,
Conv3D* result);
friend std::string GenerateConv3D(
const OperationDef& op_def, const LinearStorage& biases,
@ -105,7 +105,7 @@ class Conv3D : public GPUOperation {
int dst_slices, bool x_kernel_is_1,
bool y_kernel_is_1, bool z_kernel_is_1) const;
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Texture2D weights_0_;
@ -125,9 +125,9 @@ class Conv3D : public GPUOperation {
};
template <DataType T>
absl::Status Conv3D::UploadData(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context) {
Status Conv3D::UploadData(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context) {
RETURN_IF_ERROR(UploadWeights(weights, context));
LinearStorageCreateInfo create_info;
create_info.storage_type = conv_params_.AreWeightsBuffer()
@ -139,12 +139,12 @@ absl::Status Conv3D::UploadData(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
create_info.name = "biases";
create_info.aligned_size = weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(create_info, biases, context, &biases_));
return absl::OkStatus();
return OkStatus();
}
template <DataType T>
absl::Status Conv3D::UploadWeights(
const ::tflite::gpu::Tensor<OHWDI, T>& weights, CLContext* context) {
Status Conv3D::UploadWeights(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context) {
const int block_size = conv_params_.block_size.w;
const int dst_slices =
AlignByN(IntegralDivideRoundUp(weights.shape.o, 4), block_size);
@ -211,7 +211,7 @@ absl::Status Conv3D::UploadWeights(
}
}
return absl::OkStatus();
return OkStatus();
}
template <DataType S, typename T>
@ -271,9 +271,9 @@ void Conv3D::RearrangeWeightsData(
}
}
absl::Status CreateConv3D(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution3DAttributes& attr, Conv3D* result);
Status CreateConv3D(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution3DAttributes& attr, Conv3D* result);
} // namespace cl
} // namespace gpu

View File

@ -291,16 +291,16 @@ ConvBuffer1x1& ConvBuffer1x1::operator=(ConvBuffer1x1&& operation) {
return *this;
}
absl::Status ConvBuffer1x1::Compile(const CreationContext& creation_context) {
Status ConvBuffer1x1::Compile(const CreationContext& creation_context) {
std::string code =
GenerateConvBuffer1x1(definition_, conv_params_, linked_operations_);
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_));
return absl::OkStatus();
return OkStatus();
}
absl::Status ConvBuffer1x1::BindArguments() {
Status ConvBuffer1x1::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
@ -313,7 +313,7 @@ absl::Status ConvBuffer1x1::BindArguments() {
src_width_elements * src_[0]->Height());
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_size));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ConvBuffer1x1::GetGridSize() const {
@ -328,13 +328,13 @@ int3 ConvBuffer1x1::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvBuffer1x1::Tune(const TuningParameters& params) {
Status ConvBuffer1x1::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
&conv_params_.work_group_size);
}
absl::Status ConvBuffer1x1::AddToQueue(CLCommandQueue* queue) {
Status ConvBuffer1x1::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(),
conv_params_.work_group_size);
@ -351,12 +351,12 @@ bool IsConvBuffer1x1Supported(const OperationDef& definition,
attr.padding.appended.w == 0 && attr.padding.appended.h == 0;
}
absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape) {
Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape) {
if (!IsConvBuffer1x1Supported(definition, attr)) {
return absl::InvalidArgumentError("ConvBuffer1x1 doesn't supported");
return InvalidArgumentError("ConvBuffer1x1 doesn't supported");
}
const int dst_depth = IntegralDivideRoundUp(attr.weights.shape.o, 4);
const int src_depth = IntegralDivideRoundUp(attr.weights.shape.i, 4);
@ -372,10 +372,10 @@ absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape) {
Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape) {
const int dst_depth = IntegralDivideRoundUp(attr.weights.shape.o, 4);
const int src_depth = IntegralDivideRoundUp(attr.weights.shape.i, 4);
ConvBuffer1x1::ConvParams conv_params;
@ -392,10 +392,11 @@ absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
absl::Status CreateConvBuffer1x1Wino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvBuffer1x1* result,
const BHWC* shape) {
Status CreateConvBuffer1x1Wino4x4To6x6(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvBuffer1x1* result,
const BHWC* shape) {
const int dst_depth = IntegralDivideRoundUp(attr.weights.shape.o, 4);
const int src_depth = IntegralDivideRoundUp(attr.weights.shape.i, 4);
ConvBuffer1x1::ConvParams conv_params;

View File

@ -45,10 +45,10 @@ class ConvBuffer1x1 : public GPUOperation {
ConvBuffer1x1(const ConvBuffer1x1&) = delete;
ConvBuffer1x1& operator=(const ConvBuffer1x1&) = delete;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
struct ConvParams {
int3 block_size = int3(1, 1, 1);
@ -64,33 +64,33 @@ class ConvBuffer1x1 : public GPUOperation {
private:
ConvBuffer1x1(const OperationDef& definition, const ConvParams& conv_params);
friend absl::Status CreateConvBuffer1x1(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvBuffer1x1* result,
const BHWC* shape);
friend absl::Status CreateConvBuffer1x1(
const CreationContext& creation_context, const OperationDef& definition,
const FullyConnectedAttributes& attr, ConvBuffer1x1* result,
const BHWC* shape);
friend absl::Status CreateConvBuffer1x1Wino4x4To6x6(
friend Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape);
friend Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape);
friend Status CreateConvBuffer1x1Wino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvBuffer1x1* result,
const BHWC* shape);
template <DataType T>
absl::Status UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
Status UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
template <DataType T>
absl::Status UploadDataForWinograd4x4To6x6(
Status UploadDataForWinograd4x4To6x6(
const ::tflite::gpu::Tensor<OHWI, T>& weights, const CLDevice& device,
CLContext* context);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Buffer weights_;
@ -101,20 +101,20 @@ class ConvBuffer1x1 : public GPUOperation {
};
template <DataType T>
absl::Status ConvBuffer1x1::UploadData(
const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
Status ConvBuffer1x1::UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context) {
RETURN_IF_ERROR(UploadWeights(weights, context));
LinearStorageCreateInfo create_info;
create_info.storage_type = LinearStorageType::BUFFER;
create_info.data_type = definition_.GetDataType();
create_info.aligned_size = weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(create_info, biases, context, &biases_));
return absl::OkStatus();
return OkStatus();
}
template <DataType T>
absl::Status ConvBuffer1x1::UploadDataForWinograd4x4To6x6(
Status ConvBuffer1x1::UploadDataForWinograd4x4To6x6(
const ::tflite::gpu::Tensor<OHWI, T>& weights, const CLDevice& device,
CLContext* context) {
::tflite::gpu::Tensor<OHWI, T> wino_weights;
@ -132,7 +132,7 @@ absl::Status ConvBuffer1x1::UploadDataForWinograd4x4To6x6(
}
template <DataType T>
absl::Status ConvBuffer1x1::UploadWeights(
Status ConvBuffer1x1::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int dst_depth = IntegralDivideRoundUp(weights.shape.o, 4);
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
@ -162,22 +162,21 @@ absl::Status ConvBuffer1x1::UploadWeights(
bool IsConvBuffer1x1Supported(const OperationDef& definition,
const Convolution2DAttributes& attr);
absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvBuffer1x1* result,
const BHWC* shape = nullptr);
Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape = nullptr);
absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvBuffer1x1* result,
const BHWC* shape = nullptr);
Status CreateConvBuffer1x1(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvBuffer1x1* result, const BHWC* shape = nullptr);
absl::Status CreateConvBuffer1x1Wino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvBuffer1x1* result,
const BHWC* shape = nullptr);
Status CreateConvBuffer1x1Wino4x4To6x6(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvBuffer1x1* result,
const BHWC* shape = nullptr);
} // namespace cl
} // namespace gpu

View File

@ -219,7 +219,7 @@ ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) {
return *this;
}
absl::Status ConvConstants::Compile(const CreationContext& creation_context) {
Status ConvConstants::Compile(const CreationContext& creation_context) {
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
const auto code = GenerateConvolutionConstantCode(
@ -240,7 +240,7 @@ absl::Status ConvConstants::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status ConvConstants::BindArguments() {
Status ConvConstants::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
@ -254,7 +254,7 @@ absl::Status ConvConstants::BindArguments() {
kernel_.SetBytesAuto(int2(dilation_.x * src_[0]->Batch(), dilation_.y)));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ConvConstants::GetGridSize() const {
@ -263,12 +263,12 @@ int3 ConvConstants::GetGridSize() const {
return int3(grid_x, grid_y, 1);
}
absl::Status ConvConstants::Tune(const TuningParameters& params) {
Status ConvConstants::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status ConvConstants::AddToQueue(CLCommandQueue* queue) {
Status ConvConstants::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -294,12 +294,12 @@ bool IsConvConstantsSupported(const CLDevice& device,
return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8;
}
absl::Status CreateConvConstants(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvConstants* result) {
Status CreateConvConstants(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvConstants* result) {
if (!IsConvConstantsSupported(*creation_context.device, definition, attr)) {
return absl::InvalidArgumentError("ConvConstants doesn't supported");
return InvalidArgumentError("ConvConstants doesn't supported");
}
*result = ConvConstants(definition, attr);
RETURN_IF_ERROR(
@ -310,7 +310,8 @@ absl::Status CreateConvConstants(const CreationContext& creation_context,
create_info.aligned_size = attr.weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -35,10 +35,10 @@ namespace cl {
class ConvConstants : public GPUOperation {
public:
ConvConstants() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvConstants(ConvConstants&& kernel);
@ -47,9 +47,10 @@ class ConvConstants : public GPUOperation {
ConvConstants& operator=(const ConvConstants&) = delete;
private:
friend absl::Status CreateConvConstants(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvConstants* result);
friend Status CreateConvConstants(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvConstants* result);
explicit ConvConstants(const OperationDef& definition,
const Convolution2DAttributes& attr)
: GPUOperation(definition),
@ -61,14 +62,14 @@ class ConvConstants : public GPUOperation {
dst_channels_(attr.weights.shape.o) {}
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Buffer weights_;
@ -86,7 +87,7 @@ class ConvConstants : public GPUOperation {
};
template <DataType T>
absl::Status ConvConstants::UploadWeights(
Status ConvConstants::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int dst_depth = IntegralDivideRoundUp(weights.shape.o, 4);
const int kernel_x = weights.shape.w;
@ -156,10 +157,10 @@ bool IsConvConstantsSupported(const CLDevice& device,
const OperationDef& definition,
const Convolution2DAttributes& attr);
absl::Status CreateConvConstants(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvConstants* result);
Status CreateConvConstants(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvConstants* result);
} // namespace cl
} // namespace gpu

View File

@ -173,7 +173,7 @@ ConvPowerVR& ConvPowerVR::operator=(ConvPowerVR&& operation) {
return *this;
}
absl::Status ConvPowerVR::Compile(const CreationContext& creation_context) {
Status ConvPowerVR::Compile(const CreationContext& creation_context) {
const bool stride_correction =
definition_.IsBatchSupported() && stride_padding_.x != 1;
const std::string code =
@ -189,7 +189,7 @@ absl::Status ConvPowerVR::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status ConvPowerVR::BindArguments() {
Status ConvPowerVR::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
@ -211,7 +211,7 @@ absl::Status ConvPowerVR::BindArguments() {
}
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ConvPowerVR::GetGridSize() const {
@ -245,13 +245,13 @@ int3 ConvPowerVR::GetGridSize() const {
}
}
absl::Status ConvPowerVR::Tune(const TuningParameters& params) {
Status ConvPowerVR::Tune(const TuningParameters& params) {
if (conv_params_.weights_upload_type ==
WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP ||
conv_params_.weights_upload_type ==
WeightsUploadType::LOCAL_MEM_BY_THREADS ||
conv_params_.fixed_work_group_size) {
return absl::OkStatus();
return OkStatus();
}
if (conv_params_.work_group_launch_order[0] == 0 &&
conv_params_.work_group_launch_order[1] == 1 &&
@ -260,10 +260,10 @@ absl::Status ConvPowerVR::Tune(const TuningParameters& params) {
return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
&conv_params_.work_group_size);
}
return absl::OkStatus();
return OkStatus();
}
absl::Status ConvPowerVR::AddToQueue(CLCommandQueue* queue) {
Status ConvPowerVR::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(),
conv_params_.work_group_size);
@ -848,26 +848,27 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParamsWinograd(
return params;
}
absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape) {
Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape) {
*result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape) {
Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape) {
*result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
absl::Status CreateConvPowerVRWino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvPowerVR* result,
const BHWC* dst_shape) {
Status CreateConvPowerVRWino4x4To6x6(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result,
const BHWC* dst_shape) {
*result = ConvPowerVR(definition);
result->conv_params_ = result->GuessBestParamsWinograd(
*creation_context.device, definition, attr, dst_shape);

View File

@ -39,9 +39,9 @@ namespace cl {
class ConvPowerVR : public GPUOperation {
public:
ConvPowerVR() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvPowerVR(ConvPowerVR&& operation);
@ -87,31 +87,29 @@ class ConvPowerVR : public GPUOperation {
explicit ConvPowerVR(const OperationDef& definition);
template <DataType T>
absl::Status UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
Status UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
template <DataType T>
absl::Status UploadDataForWinograd4x4To6x6(
Status UploadDataForWinograd4x4To6x6(
const ::tflite::gpu::Tensor<OHWI, T>& weights, const CLDevice& device,
CLContext* context);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
friend absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result,
const BHWC* dst_shape);
friend Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape);
friend absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvPowerVR* result,
const BHWC* dst_shape);
friend Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape);
friend absl::Status CreateConvPowerVRWino4x4To6x6(
friend Status CreateConvPowerVRWino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvPowerVR* result,
const BHWC* dst_shape);
@ -140,7 +138,7 @@ class ConvPowerVR : public GPUOperation {
bool different_weights_for_height,
const BHWC* dst_shape = nullptr) const;
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Buffer weights_;
@ -154,20 +152,20 @@ class ConvPowerVR : public GPUOperation {
};
template <DataType T>
absl::Status ConvPowerVR::UploadData(
const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
Status ConvPowerVR::UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context) {
RETURN_IF_ERROR(UploadWeights(weights, context));
LinearStorageCreateInfo create_info;
create_info.storage_type = LinearStorageType::BUFFER;
create_info.data_type = conv_params_.weights_data_type;
create_info.aligned_size = weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(create_info, biases, context, &biases_));
return absl::OkStatus();
return OkStatus();
}
template <DataType T>
absl::Status ConvPowerVR::UploadDataForWinograd4x4To6x6(
Status ConvPowerVR::UploadDataForWinograd4x4To6x6(
const ::tflite::gpu::Tensor<OHWI, T>& weights, const CLDevice& device,
CLContext* context) {
::tflite::gpu::Tensor<OHWI, T> wino_weights;
@ -181,12 +179,12 @@ absl::Status ConvPowerVR::UploadDataForWinograd4x4To6x6(
bias.shape = Linear(weights.shape.o);
bias.data.resize(weights.shape.o, 0.0f);
RETURN_IF_ERROR(CreateLinearStorage(create_info, bias, context, &biases_));
return absl::OkStatus();
return OkStatus();
}
template <DataType T>
absl::Status ConvPowerVR::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
Status ConvPowerVR::UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context) {
const int dst_depth = IntegralDivideRoundUp(weights.shape.o, 4);
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
@ -212,22 +210,21 @@ absl::Status ConvPowerVR::UploadWeights(
}
}
absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result,
const BHWC* dst_shape = nullptr);
Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape = nullptr);
absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvPowerVR* result,
const BHWC* dst_shape = nullptr);
Status CreateConvPowerVR(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape = nullptr);
absl::Status CreateConvPowerVRWino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvPowerVR* result,
const BHWC* dst_shape = nullptr);
Status CreateConvPowerVRWino4x4To6x6(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvPowerVR* result,
const BHWC* dst_shape = nullptr);
} // namespace cl
} // namespace gpu

View File

@ -30,7 +30,6 @@ namespace tflite {
namespace gpu {
namespace cl {
namespace {
std::string GenerateConvCode(
const OperationDef& op_def, const int3& block_size, bool is1x1,
bool adreno4xx_optimization, bool stride_correction,
@ -385,7 +384,7 @@ ConvTexture& ConvTexture::operator=(ConvTexture&& operation) {
return *this;
}
absl::Status ConvTexture::Compile(const CreationContext& creation_context) {
Status ConvTexture::Compile(const CreationContext& creation_context) {
auto storage_type = definition_.GetPrimaryStorageType();
bool is1x1 = kernel_size_.x == 1 && kernel_size_.y == 1;
bool adreno4xx_optimization =
@ -408,7 +407,7 @@ absl::Status ConvTexture::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status ConvTexture::BindArguments() {
Status ConvTexture::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_0_.GetMemoryPtr()));
@ -428,7 +427,7 @@ absl::Status ConvTexture::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(stride_));
RETURN_IF_ERROR(
kernel_.SetBytesAuto(int2(padding_.x * src_[0]->Batch(), padding_.y)));
return absl::OkStatus();
return OkStatus();
}
int3 ConvTexture::GetGridSize() const {
@ -439,36 +438,37 @@ int3 ConvTexture::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvTexture::Tune(const TuningParameters& params) {
Status ConvTexture::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
&work_group_size_);
}
absl::Status ConvTexture::AddToQueue(CLCommandQueue* queue) {
Status ConvTexture::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result) {
Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result) {
*result = ConvTexture(definition, attr);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
absl::Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvTexture* result) {
Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvTexture* result) {
*result = ConvTexture(definition);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
absl::Status CreateConvTextureWino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvTexture* result) {
Status CreateConvTextureWino4x4To6x6(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result) {
*result = ConvTexture(definition);
result->different_weights_for_height_ = true;
result->block_size_ = {4, 1, 2};

View File

@ -41,10 +41,10 @@ namespace cl {
class ConvTexture : public GPUOperation {
public:
ConvTexture() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvTexture(ConvTexture&& operation);
@ -53,16 +53,16 @@ class ConvTexture : public GPUOperation {
ConvTexture& operator=(const ConvTexture&) = delete;
private:
friend absl::Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result);
friend absl::Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvTexture* result);
friend Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result);
friend Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvTexture* result);
friend absl::Status CreateConvTextureWino4x4To6x6(
friend Status CreateConvTextureWino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvTexture* result);
@ -70,25 +70,25 @@ class ConvTexture : public GPUOperation {
const Convolution2DAttributes& attr);
explicit ConvTexture(const OperationDef& definition);
template <DataType T>
absl::Status UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
Status UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
template <DataType T>
absl::Status UploadDataForWinograd4x4To6x6(
Status UploadDataForWinograd4x4To6x6(
const ::tflite::gpu::Tensor<OHWI, T>& weights, const CLDevice& device,
CLContext* context);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst_0, absl::Span<T> dst_1,
absl::Span<T> dst_2, absl::Span<T> dst_3);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Texture2D weights_0_;
@ -114,20 +114,20 @@ class ConvTexture : public GPUOperation {
};
template <DataType T>
absl::Status ConvTexture::UploadData(
const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
Status ConvTexture::UploadData(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context) {
RETURN_IF_ERROR(UploadWeights(weights, context));
LinearStorageCreateInfo create_info;
create_info.storage_type = LinearStorageType::TEXTURE_2D;
create_info.data_type = definition_.GetDataType();
create_info.aligned_size = weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(create_info, biases, context, &biases_));
return absl::OkStatus();
return OkStatus();
}
template <DataType T>
absl::Status ConvTexture::UploadDataForWinograd4x4To6x6(
Status ConvTexture::UploadDataForWinograd4x4To6x6(
const ::tflite::gpu::Tensor<OHWI, T>& weights, const CLDevice& device,
CLContext* context) {
::tflite::gpu::Tensor<OHWI, T> wino_weights;
@ -145,8 +145,8 @@ absl::Status ConvTexture::UploadDataForWinograd4x4To6x6(
}
template <DataType T>
absl::Status ConvTexture::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
Status ConvTexture::UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context) {
int dst_depth = IntegralDivideRoundUp(weights.shape.o, 4);
dst_depth = AlignByN(dst_depth, block_size_.z);
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
@ -246,19 +246,20 @@ void ConvTexture::RearrangeWeightsData(
}
}
absl::Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result);
Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result);
absl::Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvTexture* result);
Status CreateConvTexture(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
ConvTexture* result);
absl::Status CreateConvTextureWino4x4To6x6(
const CreationContext& creation_context, const OperationDef& definition,
const Convolution2DAttributes& attr, ConvTexture* result);
Status CreateConvTextureWino4x4To6x6(const CreationContext& creation_context,
const OperationDef& definition,
const Convolution2DAttributes& attr,
ConvTexture* result);
} // namespace cl
} // namespace gpu

View File

@ -35,12 +35,12 @@ namespace {
class OpenClConverterImpl : public TensorObjectConverter {
public:
virtual absl::Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) = 0;
virtual Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) = 0;
protected:
absl::Status DispatchKernel(cl_mem input, cl_mem output) {
Status DispatchKernel(cl_mem input, cl_mem output) {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(input));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(output));
@ -119,9 +119,9 @@ class FromTensorConverter : public OpenClConverterImpl {
})");
}
absl::Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
auto params_kernel = output_def.object_def.data_layout == DataLayout::BHWC
? GetToBhwcKernel(input_def, output_def)
: GetToDhwc4Kernel(input_def, output_def);
@ -157,12 +157,11 @@ __kernel void from_tensor()" +
environment->device(), &kernel_);
}
absl::Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
auto output = absl::get_if<OpenClBuffer>(&output_obj);
if (!output || !output->memobj) {
return absl::InvalidArgumentError(
"Missing output in from_tensor converter");
return InvalidArgumentError("Missing output in from_tensor converter");
}
auto input_texture = absl::get_if<OpenClTexture>(&input_obj);
if (input_texture && input_texture->memobj) {
@ -172,7 +171,7 @@ __kernel void from_tensor()" +
if (input_buffer && input_buffer->memobj) {
return DispatchKernel(input_buffer->memobj, output->memobj);
}
return absl::InvalidArgumentError("Missing input in from_tensor converter");
return InvalidArgumentError("Missing input in from_tensor converter");
}
};
@ -226,9 +225,9 @@ class ToTensorConverter : public OpenClConverterImpl {
)");
}
absl::Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
auto params_kernel = input_def.object_def.data_layout == DataLayout::BHWC
? GetFromBhwcKernel(input_def, output_def)
: GetFromDhwc4Kernel(input_def, output_def);
@ -262,11 +261,11 @@ __kernel void to_tensor()" +
&kernel_);
}
absl::Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
auto input = absl::get_if<OpenClBuffer>(&input_obj);
if (!input || !input->memobj) {
return absl::InvalidArgumentError("Missing input in to_tensor converter");
return InvalidArgumentError("Missing input in to_tensor converter");
}
auto output_texture = absl::get_if<OpenClTexture>(&output_obj);
if (output_texture && output_texture->memobj) {
@ -276,7 +275,7 @@ __kernel void to_tensor()" +
if (output_buffer && output_buffer->memobj) {
return DispatchKernel(input->memobj, output_buffer->memobj);
}
return absl::InvalidArgumentError("Missing input in to_tensor converter");
return InvalidArgumentError("Missing input in to_tensor converter");
}
};
@ -319,18 +318,18 @@ class TrivialCopier : public OpenClConverterImpl {
input.data_layout == output.data_layout;
}
absl::Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
dims_ = input_def.dimensions;
data_type_ = input_def.object_def.data_type;
queue_ = environment->queue();
region_ = CalculateTextureRegion(output_def);
return absl::OkStatus();
return OkStatus();
}
absl::Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
if (texture_input && texture_output) {
@ -341,12 +340,12 @@ class TrivialCopier : public OpenClConverterImpl {
if (buffer_input && buffer_output) {
return Copy(*buffer_input, *buffer_output);
}
return absl::InternalError("Unexpected object");
return InternalError("Unexpected object");
}
absl::Status Copy(const OpenClBuffer& input, const OpenClBuffer& output) {
Status Copy(const OpenClBuffer& input, const OpenClBuffer& output) {
if (input.memobj == output.memobj) {
return absl::OkStatus();
return OkStatus();
}
return GetOpenCLError(clEnqueueCopyBuffer(
queue_->queue(), input.memobj, output.memobj, 0, 0,
@ -354,9 +353,9 @@ class TrivialCopier : public OpenClConverterImpl {
nullptr));
}
absl::Status Copy(const OpenClTexture& input, const OpenClTexture& output) {
Status Copy(const OpenClTexture& input, const OpenClTexture& output) {
if (input.memobj == output.memobj) {
return absl::OkStatus();
return OkStatus();
}
size_t origin[3] = {0, 0, 0};
return GetOpenCLError(
@ -381,18 +380,18 @@ class CpuCopier : public OpenClConverterImpl {
IsOpenClTextureOrBuffer(input.object_type)));
}
absl::Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
Status Init(const TensorObjectDef& input_def,
const TensorObjectDef& output_def,
Environment* environment) final {
region_ = CalculateTextureRegion(
input_def.object_def.object_type == ObjectType::CPU_MEMORY ? output_def
: input_def);
queue_ = environment->queue();
return absl::OkStatus();
return OkStatus();
}
absl::Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
auto cpu_input = absl::get_if<CpuMemory>(&input_obj);
auto cpu_output = absl::get_if<CpuMemory>(&output_obj);
if (cpu_input) {
@ -420,7 +419,7 @@ class CpuCopier : public OpenClConverterImpl {
buffer_input->memobj, cpu_output->size_bytes, cpu_output->data);
}
}
return absl::InternalError("Unexpected object");
return InternalError("Unexpected object");
}
private:
@ -443,7 +442,7 @@ class OpenClTensorConverterBuilder : public TensorObjectConverterBuilder {
ToTensorConverter::IsSupported(input_def, output_def));
}
absl::Status MakeConverter(
Status MakeConverter(
const TensorObjectDef& input, const TensorObjectDef& output,
std::unique_ptr<TensorObjectConverter>* converter) final {
std::unique_ptr<OpenClConverterImpl> impl;
@ -458,11 +457,11 @@ class OpenClTensorConverterBuilder : public TensorObjectConverterBuilder {
} else if (ToTensorConverter::IsSupported(input_def, output_def)) {
impl = absl::make_unique<ToTensorConverter>();
} else {
return absl::UnimplementedError("Unsupported conversion");
return UnimplementedError("Unsupported conversion");
}
RETURN_IF_ERROR(impl->Init(input, output, environment_));
*converter = std::move(impl);
return absl::OkStatus();
return OkStatus();
}
Environment* environment_;

View File

@ -368,8 +368,7 @@ ConvolutionTransposed& ConvolutionTransposed::operator=(
return *this;
}
absl::Status ConvolutionTransposed::Compile(
const CreationContext& creation_context) {
Status ConvolutionTransposed::Compile(const CreationContext& creation_context) {
const auto code = GenerateConvolutionTransposedCode(
definition_, biases_, *creation_context.device, weights_are_buffer_,
block_size_, linked_operations_);
@ -381,7 +380,7 @@ absl::Status ConvolutionTransposed::Compile(
*creation_context.device, &kernel_);
}
absl::Status ConvolutionTransposed::BindArguments() {
Status ConvolutionTransposed::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
if (weights_are_buffer_) {
@ -400,7 +399,7 @@ absl::Status ConvolutionTransposed::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(padding_));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ConvolutionTransposed::GetGridSize() const {
@ -413,21 +412,21 @@ int3 ConvolutionTransposed::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvolutionTransposed::Tune(const TuningParameters& params) {
Status ConvolutionTransposed::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
&work_group_size_);
}
absl::Status ConvolutionTransposed::AddToQueue(CLCommandQueue* queue) {
Status ConvolutionTransposed::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status CreateConvolutionTransposed(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed* result) {
Status CreateConvolutionTransposed(const CreationContext& creation_context,
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed* result) {
*result = ConvolutionTransposed(definition, attr, *creation_context.device);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
@ -439,7 +438,8 @@ absl::Status CreateConvolutionTransposed(
create_info.aligned_size = attr.weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -38,10 +38,10 @@ namespace cl {
class ConvolutionTransposed : public GPUOperation {
public:
ConvolutionTransposed() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvolutionTransposed(ConvolutionTransposed&& operation);
@ -50,7 +50,7 @@ class ConvolutionTransposed : public GPUOperation {
ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
private:
friend absl::Status CreateConvolutionTransposed(
friend Status CreateConvolutionTransposed(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed* result);
@ -58,14 +58,14 @@ class ConvolutionTransposed : public GPUOperation {
const ConvolutionTransposedAttributes& attr,
const CLDevice& device);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
LinearStorage biases_;
@ -88,7 +88,7 @@ class ConvolutionTransposed : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed::UploadWeights(
Status ConvolutionTransposed::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int dst_depth =
AlignByN(IntegralDivideRoundUp(weights.shape.o, 4), block_size_.z);
@ -153,7 +153,7 @@ absl::Status ConvolutionTransposed::UploadWeights(
}
}
return absl::OkStatus();
return OkStatus();
}
template <DataType S, typename T>
@ -208,9 +208,10 @@ void ConvolutionTransposed::RearrangeWeightsData(
}
}
absl::Status CreateConvolutionTransposed(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr, ConvolutionTransposed* result);
Status CreateConvolutionTransposed(const CreationContext& creation_context,
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed* result);
} // namespace cl
} // namespace gpu

View File

@ -396,7 +396,7 @@ ConvolutionTransposed3D& ConvolutionTransposed3D::operator=(
return *this;
}
absl::Status ConvolutionTransposed3D::Compile(
Status ConvolutionTransposed3D::Compile(
const CreationContext& creation_context) {
const auto code = GenerateConvolutionTransposed3DCode(
definition_, biases_, *creation_context.device, weights_are_buffer_,
@ -417,7 +417,7 @@ absl::Status ConvolutionTransposed3D::Compile(
*creation_context.device, &kernel_);
}
absl::Status ConvolutionTransposed3D::BindArguments() {
Status ConvolutionTransposed3D::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
if (weights_are_buffer_) {
@ -444,7 +444,7 @@ absl::Status ConvolutionTransposed3D::BindArguments() {
IntegralDivideRoundUp(dst_[0]->Slices(), block_size_.w)));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHDS()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHDS()));
return absl::OkStatus();
return OkStatus();
}
int3 ConvolutionTransposed3D::GetGridSize() const {
@ -459,18 +459,18 @@ int3 ConvolutionTransposed3D::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvolutionTransposed3D::Tune(const TuningParameters& params) {
Status ConvolutionTransposed3D::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
&work_group_size_);
}
absl::Status ConvolutionTransposed3D::AddToQueue(CLCommandQueue* queue) {
Status ConvolutionTransposed3D::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status CreateConvolutionTransposed3D(
Status CreateConvolutionTransposed3D(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr,
ConvolutionTransposed3D* result) {
@ -485,7 +485,8 @@ absl::Status CreateConvolutionTransposed3D(
create_info.aligned_size = attr.weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -38,10 +38,10 @@ namespace cl {
class ConvolutionTransposed3D : public GPUOperation {
public:
ConvolutionTransposed3D() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvolutionTransposed3D(ConvolutionTransposed3D&& operation);
@ -50,7 +50,7 @@ class ConvolutionTransposed3D : public GPUOperation {
ConvolutionTransposed3D& operator=(const ConvolutionTransposed3D&) = delete;
private:
friend absl::Status CreateConvolutionTransposed3D(
friend Status CreateConvolutionTransposed3D(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr,
ConvolutionTransposed3D* result);
@ -58,14 +58,14 @@ class ConvolutionTransposed3D : public GPUOperation {
const ConvolutionTransposed3DAttributes& attr,
const CLDevice& device);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWDI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
LinearStorage biases_;
@ -88,7 +88,7 @@ class ConvolutionTransposed3D : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed3D::UploadWeights(
Status ConvolutionTransposed3D::UploadWeights(
const ::tflite::gpu::Tensor<OHWDI, T>& weights, CLContext* context) {
const int dst_depth =
AlignByN(IntegralDivideRoundUp(weights.shape.o, 4), block_size_.z);
@ -155,7 +155,7 @@ absl::Status ConvolutionTransposed3D::UploadWeights(
}
}
return absl::OkStatus();
return OkStatus();
}
template <DataType S, typename T>
@ -214,7 +214,7 @@ void ConvolutionTransposed3D::RearrangeWeightsData(
}
}
absl::Status CreateConvolutionTransposed3D(
Status CreateConvolutionTransposed3D(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr,
ConvolutionTransposed3D* result);

View File

@ -304,11 +304,12 @@ ConvolutionTransposed3x3& ConvolutionTransposed3x3::operator=(
return *this;
}
absl::Status ConvolutionTransposed3x3::Compile(
Status ConvolutionTransposed3x3::Compile(
const CreationContext& creation_context) {
const auto code = GenerateConvolutionTransposedCode(
definition_, biases_, linked_operations_, weights_upload_type_, padding_,
work_group_launch_order_);
std::vector<CompilerOptions> options;
if (definition_.precision == CalculationsPrecision::F16 &&
creation_context.device->IsPowerVR()) {
@ -317,10 +318,11 @@ absl::Status ConvolutionTransposed3x3::Compile(
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
code, "main_function", options, *creation_context.context,
*creation_context.device, &kernel_));
return absl::OkStatus();
return OkStatus();
}
absl::Status ConvolutionTransposed3x3::BindArguments() {
Status ConvolutionTransposed3x3::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
@ -335,7 +337,10 @@ absl::Status ConvolutionTransposed3x3::BindArguments() {
padding_.x >= 1 ? (padding_.x - 1) / 2 : (padding_.x - 2) / 2;
const int padding_y =
padding_.y >= 1 ? (padding_.y - 1) / 2 : (padding_.y - 2) / 2;
return kernel_.SetBytesAuto(int2(padding_x * src_[0]->Batch(), padding_y));
RETURN_IF_ERROR(
kernel_.SetBytesAuto(int2(padding_x * src_[0]->Batch(), padding_y)));
return OkStatus();
}
int3 ConvolutionTransposed3x3::GetGridSize() const {
@ -353,7 +358,7 @@ int3 ConvolutionTransposed3x3::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvolutionTransposed3x3::AddToQueue(CLCommandQueue* queue) {
Status ConvolutionTransposed3x3::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -365,13 +370,13 @@ bool IsConvolutionTransposed3x3Supported(
attr.stride.w == 2 && attr.stride.h == 2;
}
absl::Status CreateConvolutionTransposed3x3(
Status CreateConvolutionTransposed3x3(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3* result) {
if (!IsConvolutionTransposed3x3Supported(*creation_context.device, definition,
attr)) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"ConvolutionTransposed3x3 doesn't support this attributes");
}
const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h);
@ -386,7 +391,7 @@ absl::Status CreateConvolutionTransposed3x3(
create_info.aligned_size = attr.weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -37,8 +37,8 @@ namespace cl {
class ConvolutionTransposed3x3 : public GPUOperation {
public:
ConvolutionTransposed3x3() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvolutionTransposed3x3(ConvolutionTransposed3x3&& operation);
@ -56,19 +56,19 @@ class ConvolutionTransposed3x3 : public GPUOperation {
private:
ConvolutionTransposed3x3(const OperationDef& definition,
const CLDevice& device, int2 padding);
friend absl::Status CreateConvolutionTransposed3x3(
friend Status CreateConvolutionTransposed3x3(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3* result);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
int2 padding_;
@ -82,7 +82,7 @@ class ConvolutionTransposed3x3 : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed3x3::UploadWeights(
Status ConvolutionTransposed3x3::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
const int dst_depth = IntegralDivideRoundUp(weights.shape.o, 4);
@ -165,7 +165,7 @@ bool IsConvolutionTransposed3x3Supported(
const CLDevice& device, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposed3x3(
Status CreateConvolutionTransposed3x3(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3* result);

View File

@ -221,18 +221,19 @@ ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=(
return *this;
}
absl::Status ConvolutionTransposed3x3Thin::Compile(
Status ConvolutionTransposed3x3Thin::Compile(
const CreationContext& creation_context) {
const auto code = GenerateConvolutionTransposedCode(
definition_, biases_, IntegralDivideRoundUp(src_channels_, 4),
IntegralDivideRoundUp(dst_channels_, 4), *creation_context.device,
linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_);
}
absl::Status ConvolutionTransposed3x3Thin::BindArguments() {
Status ConvolutionTransposed3x3Thin::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
@ -241,7 +242,7 @@ absl::Status ConvolutionTransposed3x3Thin::BindArguments() {
RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ConvolutionTransposed3x3Thin::GetGridSize() const {
@ -251,13 +252,12 @@ int3 ConvolutionTransposed3x3Thin::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvolutionTransposed3x3Thin::Tune(
const TuningParameters& params) {
Status ConvolutionTransposed3x3Thin::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status ConvolutionTransposed3x3Thin::AddToQueue(CLCommandQueue* queue) {
Status ConvolutionTransposed3x3Thin::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -271,13 +271,13 @@ bool IsConvolutionTransposed3x3ThinSupported(
attr.padding.appended.h == 1;
}
absl::Status CreateConvolutionTransposed3x3Thin(
Status CreateConvolutionTransposed3x3Thin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3Thin* result) {
if (!IsConvolutionTransposed3x3ThinSupported(*creation_context.device,
attr)) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"ConvolutionTransposed3x3Thin doesn't support this attributes");
}
*result = ConvolutionTransposed3x3Thin(definition, attr);
@ -291,7 +291,8 @@ absl::Status CreateConvolutionTransposed3x3Thin(
create_info.aligned_size = attr.weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -37,10 +37,10 @@ namespace cl {
class ConvolutionTransposed3x3Thin : public GPUOperation {
public:
ConvolutionTransposed3x3Thin() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvolutionTransposed3x3Thin(ConvolutionTransposed3x3Thin&& operation);
@ -51,7 +51,7 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
delete;
private:
friend absl::Status CreateConvolutionTransposed3x3Thin(
friend Status CreateConvolutionTransposed3x3Thin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3Thin* result);
@ -59,14 +59,14 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Buffer weights_;
@ -80,7 +80,7 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed3x3Thin::UploadWeights(
Status ConvolutionTransposed3x3Thin::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int src_depth = IntegralDivideRoundUp(src_channels_, 4);
const int dst_depth = IntegralDivideRoundUp(dst_channels_, 4);
@ -150,7 +150,7 @@ void ConvolutionTransposed3x3Thin::RearrangeWeightsData(
bool IsConvolutionTransposed3x3ThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposed3x3Thin(
Status CreateConvolutionTransposed3x3Thin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3Thin* result);

View File

@ -301,7 +301,7 @@ ConvolutionTransposed4x4& ConvolutionTransposed4x4::operator=(
return *this;
}
absl::Status ConvolutionTransposed4x4::Compile(
Status ConvolutionTransposed4x4::Compile(
const CreationContext& creation_context) {
const auto code = GenerateConvolutionTransposedCode(
definition_, biases_, linked_operations_, weights_upload_type_);
@ -314,10 +314,11 @@ absl::Status ConvolutionTransposed4x4::Compile(
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
code, "main_function", options, *creation_context.context,
*creation_context.device, &kernel_));
return absl::OkStatus();
return OkStatus();
}
absl::Status ConvolutionTransposed4x4::BindArguments() {
Status ConvolutionTransposed4x4::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
@ -328,7 +329,8 @@ absl::Status ConvolutionTransposed4x4::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
const int32_t filters_offset = 4 * 16 * src_[0]->Slices();
RETURN_IF_ERROR(kernel_.SetBytesAuto(filters_offset));
return absl::OkStatus();
return OkStatus();
}
int3 ConvolutionTransposed4x4::GetGridSize() const {
@ -339,7 +341,7 @@ int3 ConvolutionTransposed4x4::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvolutionTransposed4x4::AddToQueue(CLCommandQueue* queue) {
Status ConvolutionTransposed4x4::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -352,13 +354,13 @@ bool IsConvolutionTransposed4x4Supported(
attr.padding.prepended.w == 1 && attr.padding.prepended.h == 1;
}
absl::Status CreateConvolutionTransposed4x4(
Status CreateConvolutionTransposed4x4(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed4x4* result) {
if (!IsConvolutionTransposed4x4Supported(*creation_context.device, definition,
attr)) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"ConvolutionTransposed4x4 doesn't support this attributes");
}
*result = ConvolutionTransposed4x4(definition, *creation_context.device);
@ -371,7 +373,7 @@ absl::Status CreateConvolutionTransposed4x4(
create_info.aligned_size = attr.weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -37,8 +37,8 @@ namespace cl {
class ConvolutionTransposed4x4 : public GPUOperation {
public:
ConvolutionTransposed4x4() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvolutionTransposed4x4(ConvolutionTransposed4x4&& operation);
@ -56,19 +56,19 @@ class ConvolutionTransposed4x4 : public GPUOperation {
private:
ConvolutionTransposed4x4(const OperationDef& definition,
const CLDevice& device);
friend absl::Status CreateConvolutionTransposed4x4(
friend Status CreateConvolutionTransposed4x4(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed4x4* result);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Buffer weights_;
@ -80,7 +80,7 @@ class ConvolutionTransposed4x4 : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed4x4::UploadWeights(
Status ConvolutionTransposed4x4::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
const int dst_depth = IntegralDivideRoundUp(weights.shape.o, 4);
@ -150,7 +150,7 @@ bool IsConvolutionTransposed4x4Supported(
const CLDevice& device, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposed4x4(
Status CreateConvolutionTransposed4x4(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed4x4* result);

View File

@ -184,7 +184,7 @@ ConvolutionTransposedThin& ConvolutionTransposedThin::operator=(
return *this;
}
absl::Status ConvolutionTransposedThin::Compile(
Status ConvolutionTransposedThin::Compile(
const CreationContext& creation_context) {
const auto code = GenerateConvolutionTransposedCode(
definition_, IntegralDivideRoundUp(src_channels_, 4), dst_channels_,
@ -201,7 +201,7 @@ absl::Status ConvolutionTransposedThin::Compile(
*creation_context.device, &kernel_);
}
absl::Status ConvolutionTransposedThin::BindArguments() {
Status ConvolutionTransposedThin::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_buf_.GetMemoryPtr()));
@ -210,7 +210,7 @@ absl::Status ConvolutionTransposedThin::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(bias_value_));
return absl::OkStatus();
return OkStatus();
}
int3 ConvolutionTransposedThin::GetGridSize() const {
@ -220,12 +220,12 @@ int3 ConvolutionTransposedThin::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ConvolutionTransposedThin::Tune(const TuningParameters& params) {
Status ConvolutionTransposedThin::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status ConvolutionTransposedThin::AddToQueue(CLCommandQueue* queue) {
Status ConvolutionTransposedThin::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -238,18 +238,18 @@ bool IsConvolutionTransposedThinSupported(
attr.padding.appended.w == 0 && attr.padding.appended.h == 0;
}
absl::Status CreateConvolutionTransposedThin(
Status CreateConvolutionTransposedThin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposedThin* result) {
if (!IsConvolutionTransposedThinSupported(*creation_context.device, attr)) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"ConvolutionTransposedThin doesn't support this attributes");
}
*result = ConvolutionTransposedThin(definition, attr);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -38,10 +38,10 @@ namespace cl {
class ConvolutionTransposedThin : public GPUOperation {
public:
ConvolutionTransposedThin() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ConvolutionTransposedThin(ConvolutionTransposedThin&& operation);
@ -51,21 +51,21 @@ class ConvolutionTransposedThin : public GPUOperation {
delete;
private:
friend absl::Status CreateConvolutionTransposedThin(
friend Status CreateConvolutionTransposedThin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposedThin* result);
ConvolutionTransposedThin(const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Buffer weights_buf_;
@ -80,7 +80,7 @@ class ConvolutionTransposedThin : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposedThin::UploadWeights(
Status ConvolutionTransposedThin::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int src_depth = IntegralDivideRoundUp(src_channels_, 4);
const int elements_count =
@ -136,7 +136,7 @@ void ConvolutionTransposedThin::RearrangeWeightsData(
bool IsConvolutionTransposedThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposedThin(
Status CreateConvolutionTransposedThin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposedThin* result);

View File

@ -226,8 +226,7 @@ DepthWiseConvolution& DepthWiseConvolution::operator=(
return *this;
}
absl::Status DepthWiseConvolution::Compile(
const CreationContext& creation_context) {
Status DepthWiseConvolution::Compile(const CreationContext& creation_context) {
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
const auto code = GenerateDepthWiseConvolutionCode(
@ -238,7 +237,7 @@ absl::Status DepthWiseConvolution::Compile(
*creation_context.device, &kernel_);
}
absl::Status DepthWiseConvolution::BindArguments() {
Status DepthWiseConvolution::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
@ -256,7 +255,7 @@ absl::Status DepthWiseConvolution::BindArguments() {
}
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 DepthWiseConvolution::GetGridSize() const {
@ -266,20 +265,20 @@ int3 DepthWiseConvolution::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status DepthWiseConvolution::Tune(const TuningParameters& params) {
Status DepthWiseConvolution::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status DepthWiseConvolution::AddToQueue(CLCommandQueue* queue) {
Status DepthWiseConvolution::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status CreateDepthWiseConvolution(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConvolution* result) {
Status CreateDepthWiseConvolution(const CreationContext& creation_context,
const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConvolution* result) {
bool weights_are_buffer = creation_context.device->IsMali();
*result = DepthWiseConvolution(definition, attr, weights_are_buffer);
RETURN_IF_ERROR(
@ -292,7 +291,7 @@ absl::Status CreateDepthWiseConvolution(
create_info.aligned_size = attr.weights.shape.o * attr.weights.shape.i;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -38,10 +38,10 @@ namespace cl {
class DepthWiseConvolution : public GPUOperation {
public:
DepthWiseConvolution() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
DepthWiseConvolution(DepthWiseConvolution&& operation);
@ -50,7 +50,7 @@ class DepthWiseConvolution : public GPUOperation {
DepthWiseConvolution& operator=(const DepthWiseConvolution&) = delete;
private:
friend absl::Status CreateDepthWiseConvolution(
friend Status CreateDepthWiseConvolution(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConvolution* result);
@ -58,14 +58,14 @@ class DepthWiseConvolution : public GPUOperation {
const DepthwiseConvolution2DAttributes& attr,
bool weights_are_buffer);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
bool weights_are_buffer_;
@ -86,7 +86,7 @@ class DepthWiseConvolution : public GPUOperation {
};
template <DataType T>
absl::Status DepthWiseConvolution::UploadWeights(
Status DepthWiseConvolution::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_depth = IntegralDivideRoundUp(dst_channels, 4);
@ -130,7 +130,7 @@ absl::Status DepthWiseConvolution::UploadWeights(
weights_ = weights_tex2d_.GetMemoryPtr();
}
return absl::OkStatus();
return OkStatus();
}
template <DataType S, typename T>
@ -162,9 +162,10 @@ void DepthWiseConvolution::RearrangeWeightsData(
}
}
absl::Status CreateDepthWiseConvolution(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConvolution* result);
Status CreateDepthWiseConvolution(const CreationContext& creation_context,
const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConvolution* result);
} // namespace cl
} // namespace gpu

View File

@ -256,7 +256,7 @@ DepthWiseConvolution3D& DepthWiseConvolution3D::operator=(
return *this;
}
absl::Status DepthWiseConvolution3D::Compile(
Status DepthWiseConvolution3D::Compile(
const CreationContext& creation_context) {
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
@ -268,7 +268,7 @@ absl::Status DepthWiseConvolution3D::Compile(
*creation_context.device, &kernel_);
}
absl::Status DepthWiseConvolution3D::BindArguments() {
Status DepthWiseConvolution3D::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
if (weights_are_buffer_) {
@ -295,7 +295,7 @@ absl::Status DepthWiseConvolution3D::BindArguments() {
}
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHDS()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHDS()));
return absl::OkStatus();
return OkStatus();
}
int3 DepthWiseConvolution3D::GetGridSize() const {
@ -305,17 +305,17 @@ int3 DepthWiseConvolution3D::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status DepthWiseConvolution3D::Tune(const TuningParameters& params) {
Status DepthWiseConvolution3D::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status DepthWiseConvolution3D::AddToQueue(CLCommandQueue* queue) {
Status DepthWiseConvolution3D::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status CreateDepthWiseConvolution3D(
Status CreateDepthWiseConvolution3D(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
DepthWiseConvolution3D* result) {
@ -330,7 +330,7 @@ absl::Status CreateDepthWiseConvolution3D(
create_info.aligned_size = attr.weights.shape.o * attr.weights.shape.i;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -38,10 +38,10 @@ namespace cl {
class DepthWiseConvolution3D : public GPUOperation {
public:
DepthWiseConvolution3D() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
DepthWiseConvolution3D(DepthWiseConvolution3D&& operation);
@ -50,7 +50,7 @@ class DepthWiseConvolution3D : public GPUOperation {
DepthWiseConvolution3D& operator=(const DepthWiseConvolution3D&) = delete;
private:
friend absl::Status CreateDepthWiseConvolution3D(
friend Status CreateDepthWiseConvolution3D(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
DepthWiseConvolution3D* result);
@ -58,14 +58,14 @@ class DepthWiseConvolution3D : public GPUOperation {
const DepthwiseConvolution3DAttributes& attr,
const CLDevice& device);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context);
template <DataType S, typename T>
void RearrangeWeightsData(const ::tflite::gpu::Tensor<OHWDI, S>& weights,
absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Texture2D weights_tex2d_;
@ -85,7 +85,7 @@ class DepthWiseConvolution3D : public GPUOperation {
};
template <DataType T>
absl::Status DepthWiseConvolution3D::UploadWeights(
Status DepthWiseConvolution3D::UploadWeights(
const ::tflite::gpu::Tensor<OHWDI, T>& weights, CLContext* context) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_slices = IntegralDivideRoundUp(dst_channels, 4);
@ -123,7 +123,7 @@ absl::Status DepthWiseConvolution3D::UploadWeights(
gpu_data.data(), context, &weights_tex2d_));
}
}
return absl::OkStatus();
return OkStatus();
}
template <DataType S, typename T>
@ -158,7 +158,7 @@ void DepthWiseConvolution3D::RearrangeWeightsData(
}
}
absl::Status CreateDepthWiseConvolution3D(
Status CreateDepthWiseConvolution3D(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
DepthWiseConvolution3D* result);

View File

@ -297,8 +297,7 @@ DepthWiseConv3x3& DepthWiseConv3x3::operator=(DepthWiseConv3x3&& operation) {
return *this;
}
absl::Status DepthWiseConv3x3::Compile(
const CreationContext& creation_context) {
Status DepthWiseConv3x3::Compile(const CreationContext& creation_context) {
std::string code = GenerateDepthWiseConvCode(
definition_, linked_operations_, *creation_context.device,
weights_are_buffer_, local_mem_uploads_);
@ -312,14 +311,15 @@ absl::Status DepthWiseConv3x3::Compile(
*creation_context.device, &kernel_);
}
absl::Status DepthWiseConv3x3::BindArguments() {
Status DepthWiseConv3x3::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 DepthWiseConv3x3::GetGridSize() const {
@ -329,15 +329,15 @@ int3 DepthWiseConv3x3::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status DepthWiseConv3x3::Tune(const TuningParameters& params) {
Status DepthWiseConv3x3::Tune(const TuningParameters& params) {
if (local_mem_uploads_) {
return absl::OkStatus();
return OkStatus();
}
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status DepthWiseConv3x3::AddToQueue(CLCommandQueue* queue) {
Status DepthWiseConv3x3::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -351,11 +351,12 @@ bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
attr.padding.appended.h == 1;
}
absl::Status CreateDepthWiseConv3x3(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result) {
Status CreateDepthWiseConv3x3(const CreationContext& creation_context,
const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConv3x3* result) {
if (!IsDepthWiseConv3x3Supported(attr)) {
return absl::InvalidArgumentError(
return InvalidArgumentError(
"DepthWiseConv3x3 doesn't support this attributes");
}
bool weights_are_buffer =
@ -363,8 +364,9 @@ absl::Status CreateDepthWiseConv3x3(
bool local_mem_uploads =
weights_are_buffer && creation_context.device->IsPowerVR();
*result = DepthWiseConv3x3(definition, weights_are_buffer, local_mem_uploads);
return result->UploadWeightsAndBiases(attr.weights, attr.bias,
creation_context.context);
RETURN_IF_ERROR(result->UploadWeightsAndBiases(attr.weights, attr.bias,
creation_context.context));
return OkStatus();
}
} // namespace cl

View File

@ -38,10 +38,10 @@ namespace cl {
class DepthWiseConv3x3 : public GPUOperation {
public:
DepthWiseConv3x3() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
DepthWiseConv3x3(DepthWiseConv3x3&& operation);
@ -53,11 +53,11 @@ class DepthWiseConv3x3 : public GPUOperation {
explicit DepthWiseConv3x3(const OperationDef& definition,
bool weights_are_buffer, bool local_mem_uploads);
template <DataType T>
absl::Status UploadWeightsAndBiases(
const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases, CLContext* context);
Status UploadWeightsAndBiases(const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
friend absl::Status CreateDepthWiseConv3x3(
friend Status CreateDepthWiseConv3x3(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result);
@ -66,7 +66,7 @@ class DepthWiseConv3x3 : public GPUOperation {
const ::tflite::gpu::Tensor<OHWI, S>& weights,
const ::tflite::gpu::Tensor<Linear, S>& biases, absl::Span<T> dst);
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
bool weights_are_buffer_;
@ -80,7 +80,7 @@ class DepthWiseConv3x3 : public GPUOperation {
};
template <DataType T>
absl::Status DepthWiseConv3x3::UploadWeightsAndBiases(
Status DepthWiseConv3x3::UploadWeightsAndBiases(
const ::tflite::gpu::Tensor<OHWI, T>& weights,
const ::tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
@ -122,7 +122,7 @@ absl::Status DepthWiseConv3x3::UploadWeightsAndBiases(
weights_ = weights_tex2d_.GetMemoryPtr();
}
return absl::OkStatus();
return OkStatus();
}
template <DataType S, typename T>
@ -160,9 +160,10 @@ void DepthWiseConv3x3::RearrangeWeightsAndBiasesData(
bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr);
absl::Status CreateDepthWiseConv3x3(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result);
Status CreateDepthWiseConv3x3(const CreationContext& creation_context,
const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConv3x3* result);
} // namespace cl
} // namespace gpu

View File

@ -203,14 +203,14 @@ std::string ElementwiseTwoInput::GetArgsDeclaration() const {
return args;
}
absl::Status ElementwiseTwoInput::BindArguments(CLKernel* kernel) {
Status ElementwiseTwoInput::BindArguments(CLKernel* kernel) {
if (use_scalar_para_) {
RETURN_IF_ERROR(kernel->SetBytesAuto(scalar_para_));
} else {
RETURN_IF_ERROR(kernel->SetMemoryAuto(src_[1]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel->SetBytesAuto(src_[1]->GetWBatchedHSB()));
}
return absl::OkStatus();
return OkStatus();
}
ElementwiseTwoInput CreateElementwiseTwoInput(

View File

@ -75,7 +75,7 @@ class ElementwiseTwoInput : public ElementwiseOperation {
void SetLinkIndex(int index) override;
std::string GetCoreCode(const LinkingContext& context) const override;
std::string GetArgsDeclaration() const override;
absl::Status BindArguments(CLKernel* kernel) override;
Status BindArguments(CLKernel* kernel) override;
inline void SetScalarPara(FLT scalar) {
scalar_para_ = scalar;
use_scalar_para_ = true;

View File

@ -113,7 +113,7 @@ FullyConnected& FullyConnected::operator=(FullyConnected&& kernel) {
return *this;
}
absl::Status FullyConnected::Compile(const CreationContext& creation_context) {
Status FullyConnected::Compile(const CreationContext& creation_context) {
int wg_width = 32;
int wg_height = 4;
int work_items;
@ -134,10 +134,10 @@ absl::Status FullyConnected::Compile(const CreationContext& creation_context) {
}
work_items = work_group_size_.x * work_group_size_.y * work_group_size_.z;
} while (work_items > kernel_.GetMaxWorkGroupSize());
return absl::OkStatus();
return OkStatus();
}
absl::Status FullyConnected::AddToQueue(CLCommandQueue* queue) {
Status FullyConnected::AddToQueue(CLCommandQueue* queue) {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
@ -146,14 +146,15 @@ absl::Status FullyConnected::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
RETURN_IF_ERROR(
kernel_.SetBytesAuto(int2(src_[0]->Slices(), dst_[0]->Slices())));
return queue->DispatchImplicit(kernel_, {dst_[0]->Slices(), 1, 1},
work_group_size_);
}
absl::Status CreateFullyConnected(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
FullyConnected* result) {
Status CreateFullyConnected(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
FullyConnected* result) {
*result = FullyConnected(definition);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
@ -164,7 +165,7 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context,
create_info.aligned_size = attr.weights.shape.o;
RETURN_IF_ERROR(CreateLinearStorage(
create_info, attr.bias, creation_context.context, &result->biases_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -37,9 +37,9 @@ namespace cl {
class FullyConnected : public GPUOperation {
public:
FullyConnected() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
FullyConnected(FullyConnected&& kernel);
@ -49,13 +49,14 @@ class FullyConnected : public GPUOperation {
private:
explicit FullyConnected(const OperationDef& definition);
friend absl::Status CreateFullyConnected(
const CreationContext& creation_context, const OperationDef& definition,
const FullyConnectedAttributes& attr, FullyConnected* result);
friend Status CreateFullyConnected(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
FullyConnected* result);
template <DataType T>
absl::Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
template <DataType T, typename S>
void RearrangeWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
@ -68,7 +69,7 @@ class FullyConnected : public GPUOperation {
};
template <DataType T>
absl::Status FullyConnected::UploadWeights(
Status FullyConnected::UploadWeights(
const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
const int dst_depth = IntegralDivideRoundUp(weights.shape.o, 4);
@ -122,10 +123,10 @@ void FullyConnected::RearrangeWeights(
}
}
absl::Status CreateFullyConnected(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
FullyConnected* result);
Status CreateFullyConnected(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
FullyConnected* result);
} // namespace cl
} // namespace gpu

View File

@ -154,7 +154,7 @@ ElementwiseOperation& ElementwiseOperation::operator=(
return *this;
}
absl::Status ElementwiseOperation::BindArguments() {
Status ElementwiseOperation::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArguments(&kernel_));
@ -162,7 +162,7 @@ absl::Status ElementwiseOperation::BindArguments() {
RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 ElementwiseOperation::GetGridSize() const {
@ -172,20 +172,19 @@ int3 ElementwiseOperation::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status ElementwiseOperation::Compile(
const CreationContext& creation_context) {
Status ElementwiseOperation::Compile(const CreationContext& creation_context) {
const auto code = GetElementWiseCode(definition_, *this, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_);
}
absl::Status ElementwiseOperation::AddToQueue(CLCommandQueue* queue) {
Status ElementwiseOperation::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status ElementwiseOperation::Tune(const TuningParameters& params) {
Status ElementwiseOperation::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
@ -210,12 +209,12 @@ std::string PostProcess(const std::vector<ElementwiseOperation*>& linked_ops,
return code;
}
absl::Status BindArgs(CLKernel* kernel,
const std::vector<ElementwiseOperation*>& linked_ops) {
Status BindArgs(CLKernel* kernel,
const std::vector<ElementwiseOperation*>& linked_ops) {
for (auto linked_op : linked_ops) {
RETURN_IF_ERROR(linked_op->BindArguments(kernel));
}
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -96,15 +96,11 @@ class GPUOperation {
void SetSrc(Tensor* ptr, int index = 0);
void SetDst(Tensor* ptr, int index = 0);
virtual absl::Status AddToQueue(CLCommandQueue* queue) {
return absl::OkStatus();
}
virtual absl::Status Tune(const TuningParameters& params) {
return absl::OkStatus();
}
virtual Status AddToQueue(CLCommandQueue* queue) { return OkStatus(); }
virtual Status Tune(const TuningParameters& params) { return OkStatus(); }
virtual absl::Status Compile(const CreationContext& creation_context) {
return absl::OkStatus();
virtual Status Compile(const CreationContext& creation_context) {
return OkStatus();
}
const OperationDef& GetDefinition() const { return definition_; }
@ -131,10 +127,10 @@ class ElementwiseOperation : public GPUOperation {
: GPUOperation(definition) {}
virtual ~ElementwiseOperation() {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
ElementwiseOperation(ElementwiseOperation&& operation);
@ -154,12 +150,10 @@ class ElementwiseOperation : public GPUOperation {
virtual std::string GetCoreCode(const LinkingContext& context) const = 0;
virtual std::string GetArgsDeclaration() const { return ""; }
virtual absl::Status BindArguments(CLKernel* kernel) {
return absl::OkStatus();
}
virtual Status BindArguments(CLKernel* kernel) { return OkStatus(); }
protected:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
CLKernel kernel_;
int3 work_group_size_ = int3(8, 4, 1);
@ -177,8 +171,8 @@ std::string PostProcess(const std::vector<ElementwiseOperation*>& linked_ops,
// Binds arguments to given kernel for elementwise operations in
// linked_ops.
// Every ElementwiseOperation can bind her arguments.
absl::Status BindArgs(CLKernel* kernel,
const std::vector<ElementwiseOperation*>& linked_ops);
Status BindArgs(CLKernel* kernel,
const std::vector<ElementwiseOperation*>& linked_ops);
} // namespace cl
} // namespace gpu

View File

@ -121,14 +121,14 @@ LSTM& LSTM::operator=(LSTM&& kernel) {
return *this;
}
absl::Status LSTM::Compile(const CreationContext& creation_context) {
Status LSTM::Compile(const CreationContext& creation_context) {
const auto code = GetLSTMCode(definition_, *creation_context.device);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_);
}
absl::Status LSTM::BindArguments() {
Status LSTM::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[1]->GetMemoryPtr()));
@ -137,7 +137,8 @@ absl::Status LSTM::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->Batch()));
return absl::OkStatus();
return OkStatus();
}
int3 LSTM::GetGridSize() const {
@ -147,12 +148,12 @@ int3 LSTM::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status LSTM::Tune(const TuningParameters& params) {
Status LSTM::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status LSTM::AddToQueue(CLCommandQueue* queue) {
Status LSTM::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -28,9 +28,9 @@ namespace cl {
class LSTM : public GPUOperation {
public:
explicit LSTM(const OperationDef& definition);
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
LSTM(LSTM&& kernel);
@ -39,7 +39,7 @@ class LSTM : public GPUOperation {
LSTM& operator=(const LSTM&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
CLKernel kernel_;

View File

@ -218,7 +218,7 @@ MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) {
return *this;
}
absl::Status MaxUnpooling::Compile(const CreationContext& creation_context) {
Status MaxUnpooling::Compile(const CreationContext& creation_context) {
const auto code = GetMaxUnpoolingKernelCode(
definition_, *creation_context.device, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
@ -226,7 +226,7 @@ absl::Status MaxUnpooling::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status MaxUnpooling::BindArguments() {
Status MaxUnpooling::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[1]->GetMemoryPtr()));
@ -237,7 +237,8 @@ absl::Status MaxUnpooling::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(kernel_size_));
RETURN_IF_ERROR(kernel_.SetBytesAuto(padding_));
RETURN_IF_ERROR(kernel_.SetBytesAuto(stride_));
return absl::OkStatus();
return OkStatus();
}
int3 MaxUnpooling::GetGridSize() const {
@ -247,12 +248,12 @@ int3 MaxUnpooling::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status MaxUnpooling::Tune(const TuningParameters& params) {
Status MaxUnpooling::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status MaxUnpooling::AddToQueue(CLCommandQueue* queue) {
Status MaxUnpooling::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -290,7 +291,7 @@ MaxUnpooling3D& MaxUnpooling3D::operator=(MaxUnpooling3D&& kernel) {
return *this;
}
absl::Status MaxUnpooling3D::Compile(const CreationContext& creation_context) {
Status MaxUnpooling3D::Compile(const CreationContext& creation_context) {
const auto code = GetMaxUnpooling3DKernelCode(
definition_, *creation_context.device, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
@ -298,7 +299,7 @@ absl::Status MaxUnpooling3D::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status MaxUnpooling3D::BindArguments() {
Status MaxUnpooling3D::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[1]->GetMemoryPtr()));
@ -315,7 +316,8 @@ absl::Status MaxUnpooling3D::BindArguments() {
kernel_.SetBytesAuto(int4(padding_.x, padding_.y, padding_.z, 1)));
RETURN_IF_ERROR(
kernel_.SetBytesAuto(int4(stride_.x, stride_.y, stride_.z, 1)));
return absl::OkStatus();
return OkStatus();
}
int3 MaxUnpooling3D::GetGridSize() const {
@ -325,12 +327,12 @@ int3 MaxUnpooling3D::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status MaxUnpooling3D::Tune(const TuningParameters& params) {
Status MaxUnpooling3D::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status MaxUnpooling3D::AddToQueue(CLCommandQueue* queue) {
Status MaxUnpooling3D::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -29,10 +29,10 @@ class MaxUnpooling : public GPUOperation {
public:
MaxUnpooling(const OperationDef& definition,
const MaxUnpooling2DAttributes& attr);
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
MaxUnpooling(MaxUnpooling&& kernel);
@ -41,7 +41,7 @@ class MaxUnpooling : public GPUOperation {
MaxUnpooling& operator=(const MaxUnpooling&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
int2 stride_;
@ -59,10 +59,10 @@ class MaxUnpooling3D : public GPUOperation {
public:
MaxUnpooling3D(const OperationDef& definition,
const MaxUnpooling3DAttributes& attr);
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
MaxUnpooling3D(MaxUnpooling3D&& kernel);
@ -71,7 +71,7 @@ class MaxUnpooling3D : public GPUOperation {
MaxUnpooling3D& operator=(const MaxUnpooling3D&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
int3 stride_;

View File

@ -103,7 +103,7 @@ Mean& Mean::operator=(Mean&& operation) {
return *this;
}
absl::Status Mean::Compile(const CreationContext& creation_context) {
Status Mean::Compile(const CreationContext& creation_context) {
if (creation_context.device->IsAdreno3xx()) {
work_group_size_ = int3(16, 8, 1);
}
@ -114,7 +114,7 @@ absl::Status Mean::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Mean::BindArguments() {
Status Mean::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -124,7 +124,7 @@ absl::Status Mean::BindArguments() {
const double size_0 = work_group_size_.x * work_group_size_.y;
const double size_1 = total_size / size_0;
RETURN_IF_ERROR(kernel_.SetBytesAuto(float2(1.0 / size_1, 1.0 / size_0)));
return absl::OkStatus();
return OkStatus();
}
int3 Mean::GetGridSize() const {
@ -134,7 +134,7 @@ int3 Mean::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Mean::AddToQueue(CLCommandQueue* queue) {
Status Mean::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -30,9 +30,9 @@ class Mean : public GPUOperation {
public:
Mean() = default;
explicit Mean(const OperationDef& definition) : GPUOperation(definition) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Mean(Mean&& operation);
@ -41,7 +41,7 @@ class Mean : public GPUOperation {
Mean& operator=(const Mean&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
CLKernel kernel_;

View File

@ -89,7 +89,7 @@ std::string MultiplyAdd::GetArgsDeclaration() const {
return args;
}
absl::Status MultiplyAdd::BindArguments(CLKernel* kernel) {
Status MultiplyAdd::BindArguments(CLKernel* kernel) {
if (use_mul_vec_) {
RETURN_IF_ERROR(kernel->SetMemoryAuto(mul_vec_.GetMemoryPtr()));
}
@ -102,12 +102,12 @@ absl::Status MultiplyAdd::BindArguments(CLKernel* kernel) {
if (scalar_add_.Active()) {
RETURN_IF_ERROR(kernel->SetBytesAuto(scalar_add_));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status MultiplyAdd::UploadMul(const MultiplyAttributes& attr,
CalculationsPrecision scalar_precision,
CLContext* context) {
Status MultiplyAdd::UploadMul(const MultiplyAttributes& attr,
CalculationsPrecision scalar_precision,
CLContext* context) {
auto mul = absl::get_if<::tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(
&attr.param);
auto mul_scalar = absl::get_if<float>(&attr.param);
@ -116,12 +116,12 @@ absl::Status MultiplyAdd::UploadMul(const MultiplyAttributes& attr,
} else {
scalar_mul_ = FLT(scalar_precision, *mul_scalar);
}
return absl::OkStatus();
return OkStatus();
}
absl::Status MultiplyAdd::UploadAdd(const AddAttributes& attr,
CalculationsPrecision scalar_precision,
CLContext* context) {
Status MultiplyAdd::UploadAdd(const AddAttributes& attr,
CalculationsPrecision scalar_precision,
CLContext* context) {
auto add = absl::get_if<::tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(
&attr.param);
auto add_scalar = absl::get_if<float>(&attr.param);
@ -130,13 +130,12 @@ absl::Status MultiplyAdd::UploadAdd(const AddAttributes& attr,
} else {
scalar_add_ = FLT(scalar_precision, *add_scalar);
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& attr,
MultiplyAdd* result) {
Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& attr, MultiplyAdd* result) {
const auto scalar_precision = creation_context.device->IsPowerVR()
? CalculationsPrecision::F32
: definition.precision;
@ -144,12 +143,12 @@ absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
RETURN_IF_ERROR(
result->UploadMul(attr, scalar_precision, creation_context.context));
result->SetLinkIndex(0);
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const AddAttributes& attr, MultiplyAdd* result) {
Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const AddAttributes& attr, MultiplyAdd* result) {
const auto scalar_precision = creation_context.device->IsPowerVR()
? CalculationsPrecision::F32
: definition.precision;
@ -157,14 +156,13 @@ absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
RETURN_IF_ERROR(
result->UploadAdd(attr, scalar_precision, creation_context.context));
result->SetLinkIndex(0);
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& mul_attr,
const AddAttributes& add_attr,
MultiplyAdd* result) {
Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& mul_attr,
const AddAttributes& add_attr, MultiplyAdd* result) {
const auto scalar_precision = creation_context.device->IsPowerVR()
? CalculationsPrecision::F32
: definition.precision;
@ -174,7 +172,7 @@ absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
RETURN_IF_ERROR(
result->UploadAdd(add_attr, scalar_precision, creation_context.context));
result->SetLinkIndex(0);
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -40,42 +40,40 @@ class MultiplyAdd : public ElementwiseOperation {
MultiplyAdd(const MultiplyAdd&) = delete;
MultiplyAdd& operator=(const MultiplyAdd&) = delete;
absl::Status UploadMul(const MultiplyAttributes& attr,
CalculationsPrecision scalar_precision,
CLContext* context);
absl::Status UploadAdd(const AddAttributes& attr,
CalculationsPrecision scalar_precision,
CLContext* context);
Status UploadMul(const MultiplyAttributes& attr,
CalculationsPrecision scalar_precision, CLContext* context);
Status UploadAdd(const AddAttributes& attr,
CalculationsPrecision scalar_precision, CLContext* context);
template <DataType T>
absl::Status UploadMul(const ::tflite::gpu::Tensor<Linear, T>& mul,
CLContext* context);
Status UploadMul(const ::tflite::gpu::Tensor<Linear, T>& mul,
CLContext* context);
template <DataType T>
absl::Status UploadAdd(const ::tflite::gpu::Tensor<Linear, T>& add,
CLContext* context);
Status UploadAdd(const ::tflite::gpu::Tensor<Linear, T>& add,
CLContext* context);
void SetLinkIndex(int index) override;
std::string GetCoreCode(const LinkingContext& context) const override;
std::string GetArgsDeclaration() const override;
absl::Status BindArguments(CLKernel* kernel) override;
Status BindArguments(CLKernel* kernel) override;
friend absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& attr,
MultiplyAdd* result);
friend Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& attr,
MultiplyAdd* result);
friend absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const AddAttributes& attr,
MultiplyAdd* result);
friend Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const AddAttributes& attr,
MultiplyAdd* result);
friend absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& mul_attr,
const AddAttributes& add_attr,
MultiplyAdd* result);
friend Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& mul_attr,
const AddAttributes& add_attr,
MultiplyAdd* result);
private:
explicit MultiplyAdd(const OperationDef& definition)
@ -91,43 +89,41 @@ class MultiplyAdd : public ElementwiseOperation {
FLT scalar_add_;
};
absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& attr,
MultiplyAdd* result);
Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& attr, MultiplyAdd* result);
absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const AddAttributes& attr, MultiplyAdd* result);
Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const AddAttributes& attr, MultiplyAdd* result);
absl::Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& mul_attr,
const AddAttributes& add_attr,
MultiplyAdd* result);
Status CreateMultiplyAdd(const CreationContext& creation_context,
const OperationDef& definition,
const MultiplyAttributes& mul_attr,
const AddAttributes& add_attr, MultiplyAdd* result);
template <DataType T>
absl::Status MultiplyAdd::UploadMul(const ::tflite::gpu::Tensor<Linear, T>& mul,
CLContext* context) {
Status MultiplyAdd::UploadMul(const ::tflite::gpu::Tensor<Linear, T>& mul,
CLContext* context) {
LinearStorageCreateInfo create_info;
create_info.storage_type =
DeduceLinearStorageType(definition_.GetPrimaryStorageType());
create_info.data_type = definition_.GetDataType();
RETURN_IF_ERROR(CreateLinearStorage(create_info, mul, context, &mul_vec_));
use_mul_vec_ = true;
return absl::OkStatus();
return OkStatus();
}
template <DataType T>
absl::Status MultiplyAdd::UploadAdd(const ::tflite::gpu::Tensor<Linear, T>& add,
CLContext* context) {
Status MultiplyAdd::UploadAdd(const ::tflite::gpu::Tensor<Linear, T>& add,
CLContext* context) {
LinearStorageCreateInfo create_info;
create_info.storage_type =
DeduceLinearStorageType(definition_.GetPrimaryStorageType());
create_info.data_type = definition_.GetDataType();
RETURN_IF_ERROR(CreateLinearStorage(create_info, add, context, &add_vec_));
use_add_vec_ = true;
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -169,7 +169,7 @@ Padding& Padding::operator=(Padding&& kernel) {
return *this;
}
absl::Status Padding::Compile(const CreationContext& creation_context) {
Status Padding::Compile(const CreationContext& creation_context) {
const auto code =
GetPaddingCode(definition_, linked_operations_, attributes_);
return creation_context.cache->GetOrCreateCLKernel(
@ -177,7 +177,7 @@ absl::Status Padding::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Padding::BindArguments() {
Status Padding::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -187,7 +187,7 @@ absl::Status Padding::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
const auto& prep = attributes_.prepended;
RETURN_IF_ERROR(kernel_.SetBytesAuto(int4(prep.w, prep.h, prep.c, prep.b)));
return absl::OkStatus();
return OkStatus();
}
int3 Padding::GetGridSize() const {
@ -197,12 +197,12 @@ int3 Padding::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Padding::Tune(const TuningParameters& params) {
Status Padding::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status Padding::AddToQueue(CLCommandQueue* queue) {
Status Padding::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -28,10 +28,10 @@ namespace cl {
class Padding : public GPUOperation {
public:
Padding(const OperationDef& definition, const PadAttributes& attr);
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Padding(Padding&& kernel);
@ -40,7 +40,7 @@ class Padding : public GPUOperation {
Padding& operator=(const Padding&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
PadAttributes attributes_;

View File

@ -408,7 +408,7 @@ Pooling& Pooling::operator=(Pooling&& kernel) {
return *this;
}
absl::Status Pooling::Compile(const CreationContext& creation_context) {
Status Pooling::Compile(const CreationContext& creation_context) {
std::string code;
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
@ -423,7 +423,7 @@ absl::Status Pooling::Compile(const CreationContext& creation_context) {
linked_operations_, output_indices_);
break;
default:
return absl::InvalidArgumentError(
return InvalidArgumentError(
"You should create another kernel with this params");
break;
}
@ -432,7 +432,7 @@ absl::Status Pooling::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Pooling::BindArguments() {
Status Pooling::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -447,7 +447,7 @@ absl::Status Pooling::BindArguments() {
kernel_.SetBytesAuto(int2(padding_.x * src_[0]->Batch(), padding_.y)));
RETURN_IF_ERROR(kernel_.SetBytesAuto(stride_));
return absl::OkStatus();
return OkStatus();
}
int3 Pooling::GetGridSize() const {
@ -457,12 +457,12 @@ int3 Pooling::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Pooling::Tune(const TuningParameters& params) {
Status Pooling::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status Pooling::AddToQueue(CLCommandQueue* queue) {
Status Pooling::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
@ -506,7 +506,7 @@ Pooling3D& Pooling3D::operator=(Pooling3D&& kernel) {
return *this;
}
absl::Status Pooling3D::Compile(const CreationContext& creation_context) {
Status Pooling3D::Compile(const CreationContext& creation_context) {
std::string code;
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
@ -521,7 +521,7 @@ absl::Status Pooling3D::Compile(const CreationContext& creation_context) {
linked_operations_, output_indices_);
break;
default:
return absl::InvalidArgumentError(
return InvalidArgumentError(
"You should create another kernel with this params");
break;
}
@ -530,7 +530,7 @@ absl::Status Pooling3D::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Pooling3D::BindArguments() {
Status Pooling3D::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -550,7 +550,7 @@ absl::Status Pooling3D::BindArguments() {
RETURN_IF_ERROR(
kernel_.SetBytesAuto(int4(stride_.x, stride_.y, stride_.z, 1)));
return absl::OkStatus();
return OkStatus();
}
int3 Pooling3D::GetGridSize() const {
@ -560,12 +560,12 @@ int3 Pooling3D::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Pooling3D::Tune(const TuningParameters& params) {
Status Pooling3D::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status Pooling3D::AddToQueue(CLCommandQueue* queue) {
Status Pooling3D::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -30,10 +30,10 @@ namespace cl {
class Pooling : public GPUOperation {
public:
Pooling(const OperationDef& definition, const Pooling2DAttributes& attr);
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Pooling(Pooling&& kernel);
@ -42,7 +42,7 @@ class Pooling : public GPUOperation {
Pooling& operator=(const Pooling&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
int2 stride_;
@ -62,10 +62,10 @@ Pooling CreatePooling(const OperationDef& definition,
class Pooling3D : public GPUOperation {
public:
Pooling3D(const OperationDef& definition, const Pooling3DAttributes& attr);
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Pooling3D(Pooling3D&& kernel);
@ -74,7 +74,7 @@ class Pooling3D : public GPUOperation {
Pooling3D& operator=(const Pooling3D&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
int3 stride_;

View File

@ -73,21 +73,21 @@ std::string PReLU::GetArgsDeclaration() const {
return args;
}
absl::Status PReLU::BindArguments(CLKernel* kernel) {
Status PReLU::BindArguments(CLKernel* kernel) {
RETURN_IF_ERROR(kernel->SetMemoryAuto(alpha_.GetMemoryPtr()));
if (clip_.Active()) {
RETURN_IF_ERROR(kernel->SetBytesAuto(clip_));
}
return absl::OkStatus();
return OkStatus();
}
absl::Status CreatePReLU(const CreationContext& creation_context,
const OperationDef& definition,
const PReLUAttributes& attr, PReLU* result) {
Status CreatePReLU(const CreationContext& creation_context,
const OperationDef& definition, const PReLUAttributes& attr,
PReLU* result) {
auto alpha = absl::get_if<::tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(
&attr.alpha);
if (!alpha) {
return absl::InvalidArgumentError("Alpha is missing");
return InvalidArgumentError("Alpha is missing");
}
const auto scalar_precision = creation_context.device->IsPowerVR()
? CalculationsPrecision::F32
@ -95,7 +95,7 @@ absl::Status CreatePReLU(const CreationContext& creation_context,
*result = PReLU(definition, attr, scalar_precision);
RETURN_IF_ERROR(result->UploadParameters(*alpha, creation_context.context));
result->SetLinkIndex(0);
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -44,30 +44,30 @@ class PReLU : public ElementwiseOperation {
void SetLinkIndex(int index) override;
std::string GetCoreCode(const LinkingContext& context) const override;
std::string GetArgsDeclaration() const override;
absl::Status BindArguments(CLKernel* kernel) override;
Status BindArguments(CLKernel* kernel) override;
friend absl::Status CreatePReLU(const CreationContext& creation_context,
const OperationDef& definition,
const PReLUAttributes& attr, PReLU* result);
friend Status CreatePReLU(const CreationContext& creation_context,
const OperationDef& definition,
const PReLUAttributes& attr, PReLU* result);
private:
PReLU(const OperationDef& definition, const PReLUAttributes& attr,
CalculationsPrecision scalar_precision);
template <DataType T>
absl::Status UploadParameters(
const ::tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context);
Status UploadParameters(const ::tflite::gpu::Tensor<Linear, T>& parameters,
CLContext* context);
FLT clip_;
LinearStorage alpha_;
};
absl::Status CreatePReLU(const CreationContext& creation_context,
const OperationDef& definition,
const PReLUAttributes& attr, PReLU* result);
Status CreatePReLU(const CreationContext& creation_context,
const OperationDef& definition, const PReLUAttributes& attr,
PReLU* result);
template <DataType T>
absl::Status PReLU::UploadParameters(
Status PReLU::UploadParameters(
const ::tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context) {
LinearStorageCreateInfo create_info;
create_info.storage_type =
@ -75,7 +75,7 @@ absl::Status PReLU::UploadParameters(
create_info.data_type = definition_.GetPrimaryDataType();
RETURN_IF_ERROR(
CreateLinearStorage(create_info, parameters, context, &alpha_));
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -92,17 +92,17 @@ std::string QuantizeAndDequantize::GetArgsDeclaration() const {
scale_.GetDeclaration());
}
absl::Status QuantizeAndDequantize::BindArguments(CLKernel* kernel) {
Status QuantizeAndDequantize::BindArguments(CLKernel* kernel) {
RETURN_IF_ERROR(kernel->SetBytesAuto(min_));
RETURN_IF_ERROR(kernel->SetBytesAuto(max_));
RETURN_IF_ERROR(kernel->SetBytesAuto(scale_));
return absl::OkStatus();
return OkStatus();
}
absl::Status CreateQuantizeAndDequantize(
const CreationContext& creation_context, const OperationDef& definition,
const QuantizeAndDequantizeAttributes& attr,
QuantizeAndDequantize* result) {
Status CreateQuantizeAndDequantize(const CreationContext& creation_context,
const OperationDef& definition,
const QuantizeAndDequantizeAttributes& attr,
QuantizeAndDequantize* result) {
const auto scalar_precision = creation_context.device->IsPowerVR()
? CalculationsPrecision::F32
: definition.precision;
@ -120,7 +120,7 @@ absl::Status CreateQuantizeAndDequantize(
*result = QuantizeAndDequantize(definition, attr, scalar_precision);
}
result->SetLinkIndex(0);
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -57,9 +57,9 @@ class QuantizeAndDequantize : public ElementwiseOperation {
void SetLinkIndex(int index) override;
std::string GetCoreCode(const LinkingContext& context) const override;
std::string GetArgsDeclaration() const override;
absl::Status BindArguments(CLKernel* kernel) override;
Status BindArguments(CLKernel* kernel) override;
friend absl::Status CreateQuantizeAndDequantize(
friend Status CreateQuantizeAndDequantize(
const CreationContext& creation_context, const OperationDef& definition,
const QuantizeAndDequantizeAttributes& attr,
QuantizeAndDequantize* result);
@ -70,26 +70,27 @@ class QuantizeAndDequantize : public ElementwiseOperation {
CalculationsPrecision scalar_precision);
template <DataType T>
absl::Status UploadParameters(
const ::tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context);
Status UploadParameters(const ::tflite::gpu::Tensor<Linear, T>& parameters,
CLContext* context);
FLT min_;
FLT max_;
FLT scale_;
};
absl::Status CreateQuantizeAndDequantize(
const CreationContext& creation_context, const OperationDef& definition,
const QuantizeAndDequantizeAttributes& attr, QuantizeAndDequantize* result);
Status CreateQuantizeAndDequantize(const CreationContext& creation_context,
const OperationDef& definition,
const QuantizeAndDequantizeAttributes& attr,
QuantizeAndDequantize* result);
template <DataType T>
absl::Status QuantizeAndDequantize::UploadParameters(
Status QuantizeAndDequantize::UploadParameters(
const ::tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context) {
LinearStorageCreateInfo create_info;
create_info.storage_type =
DeduceLinearStorageType(definition_.GetPrimaryStorageType());
create_info.data_type = definition_.GetPrimaryDataType();
return absl::OkStatus();
return OkStatus();
}
} // namespace cl

View File

@ -80,14 +80,14 @@ std::string ReLU::GetArgsDeclaration() const {
return args;
}
absl::Status ReLU::BindArguments(CLKernel* kernel) {
Status ReLU::BindArguments(CLKernel* kernel) {
if (alpha_.Active()) {
RETURN_IF_ERROR(kernel->SetBytesAuto(alpha_));
}
if (clip_.Active()) {
RETURN_IF_ERROR(kernel->SetBytesAuto(clip_));
}
return absl::OkStatus();
return OkStatus();
}
ReLU CreateReLU(const CreationContext& creation_context,

View File

@ -37,7 +37,7 @@ class ReLU : public ElementwiseOperation {
void SetLinkIndex(int index) override;
std::string GetCoreCode(const LinkingContext& context) const override;
std::string GetArgsDeclaration() const override;
absl::Status BindArguments(CLKernel* kernel) override;
Status BindArguments(CLKernel* kernel) override;
friend ReLU CreateReLU(const CreationContext& creation_context,
const OperationDef& definition,

View File

@ -156,7 +156,7 @@ Reshape& Reshape::operator=(Reshape&& operation) {
return *this;
}
absl::Status Reshape::Compile(const CreationContext& creation_context) {
Status Reshape::Compile(const CreationContext& creation_context) {
const auto code = definition_.IsBatchSupported()
? GetReshapeBatchedCode(definition_, linked_operations_)
: GetReshapeCode(definition_, linked_operations_);
@ -165,7 +165,7 @@ absl::Status Reshape::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Reshape::BindArguments() {
Status Reshape::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -174,7 +174,8 @@ absl::Status Reshape::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->Channels()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->Channels()));
return absl::OkStatus();
return OkStatus();
}
int3 Reshape::GetGridSize() const {
@ -184,12 +185,12 @@ int3 Reshape::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Reshape::Tune(const TuningParameters& params) {
Status Reshape::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status Reshape::AddToQueue(CLCommandQueue* queue) {
Status Reshape::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -29,10 +29,10 @@ class Reshape : public GPUOperation {
public:
explicit Reshape(const OperationDef& definition)
: GPUOperation(definition), work_group_size_(8, 4, 1) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Reshape(Reshape&& operation);
@ -41,7 +41,7 @@ class Reshape : public GPUOperation {
Reshape& operator=(const Reshape&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
CLKernel kernel_;

View File

@ -120,7 +120,7 @@ Reshapex4& Reshapex4::operator=(Reshapex4&& operation) {
return *this;
}
absl::Status Reshapex4::Compile(const CreationContext& creation_context) {
Status Reshapex4::Compile(const CreationContext& creation_context) {
const auto code = definition_.IsBatchSupported()
? GetReshapeBatchedCode(definition_, linked_operations_)
: GetReshapeCode(definition_, linked_operations_);
@ -129,14 +129,15 @@ absl::Status Reshapex4::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Reshapex4::BindArguments() {
Status Reshapex4::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 Reshapex4::GetGridSize() const {
@ -146,12 +147,12 @@ int3 Reshapex4::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Reshapex4::Tune(const TuningParameters& params) {
Status Reshapex4::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status Reshapex4::AddToQueue(CLCommandQueue* queue) {
Status Reshapex4::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -30,10 +30,10 @@ class Reshapex4 : public GPUOperation {
public:
explicit Reshapex4(const OperationDef& definition)
: GPUOperation(definition), work_group_size_(8, 4, 1) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Reshapex4(Reshapex4&& operation);
@ -42,7 +42,7 @@ class Reshapex4 : public GPUOperation {
Reshapex4& operator=(const Reshapex4&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
CLKernel kernel_;

View File

@ -209,7 +209,7 @@ Resize& Resize::operator=(Resize&& operation) {
return *this;
}
absl::Status Resize::Compile(const CreationContext& creation_context) {
Status Resize::Compile(const CreationContext& creation_context) {
const auto code = GetResizeCode(definition_, attr_.type,
attr_.half_pixel_centers, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
@ -217,7 +217,7 @@ absl::Status Resize::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Resize::BindArguments() {
Status Resize::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -230,7 +230,7 @@ absl::Status Resize::BindArguments() {
float2(CalculateResizeScale(src_[0]->Width(), dst_[0]->Width(), attr_),
CalculateResizeScale(src_[0]->Height(), dst_[0]->Height(), attr_));
RETURN_IF_ERROR(kernel_.SetBytesAuto(scale_factor));
return absl::OkStatus();
return OkStatus();
}
int3 Resize::GetGridSize() const {
@ -240,12 +240,12 @@ int3 Resize::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Resize::AddToQueue(CLCommandQueue* queue) {
Status Resize::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status Resize::Tune(const TuningParameters& params) {
Status Resize::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
@ -271,7 +271,7 @@ Resize3D& Resize3D::operator=(Resize3D&& operation) {
return *this;
}
absl::Status Resize3D::Compile(const CreationContext& creation_context) {
Status Resize3D::Compile(const CreationContext& creation_context) {
const auto code =
GetResize3DCode(definition_, attr_.type, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
@ -279,7 +279,7 @@ absl::Status Resize3D::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status Resize3D::BindArguments() {
Status Resize3D::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -296,7 +296,7 @@ absl::Status Resize3D::BindArguments() {
CalculateResizeScale(src_[0]->Height(), dst_[0]->Height(), attr_),
CalculateResizeScale(src_[0]->Depth(), dst_[0]->Depth(), attr_), 1.0f);
RETURN_IF_ERROR(kernel_.SetBytesAuto(scale_factor));
return absl::OkStatus();
return OkStatus();
}
int3 Resize3D::GetGridSize() const {
@ -306,12 +306,12 @@ int3 Resize3D::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Resize3D::AddToQueue(CLCommandQueue* queue) {
Status Resize3D::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status Resize3D::Tune(const TuningParameters& params) {
Status Resize3D::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}

View File

@ -27,10 +27,10 @@ namespace cl {
class Resize : public GPUOperation {
public:
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Resize(Resize&& operation);
@ -45,7 +45,7 @@ class Resize : public GPUOperation {
Resize(const OperationDef& definition, const Resize2DAttributes& attr)
: GPUOperation(definition), attr_(attr) {}
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Resize2DAttributes attr_;
@ -58,10 +58,10 @@ Resize CreateResize(const OperationDef& definition,
class Resize3D : public GPUOperation {
public:
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Resize3D(Resize3D&& operation);
@ -76,7 +76,7 @@ class Resize3D : public GPUOperation {
Resize3D(const OperationDef& definition, const Resize3DAttributes& attr)
: GPUOperation(definition), attr_(attr) {}
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
Resize3DAttributes attr_;

View File

@ -79,14 +79,14 @@ Softmax& Softmax::operator=(Softmax&& kernel) {
return *this;
}
absl::Status Softmax::Compile(const CreationContext& creation_context) {
Status Softmax::Compile(const CreationContext& creation_context) {
const auto code = GetSoftmaxKernelCode(definition_, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_);
}
absl::Status Softmax::BindArguments() {
Status Softmax::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -94,7 +94,7 @@ absl::Status Softmax::BindArguments() {
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHSB()));
RETURN_IF_ERROR(
kernel_.SetBytesAuto(GetMaskForLastPlane(src_[0]->Channels())));
return absl::OkStatus();
return OkStatus();
}
int3 Softmax::GetGridSize() const {
@ -104,12 +104,12 @@ int3 Softmax::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status Softmax::Tune(const TuningParameters& params) {
Status Softmax::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status Softmax::AddToQueue(CLCommandQueue* queue) {
Status Softmax::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -30,10 +30,10 @@ class Softmax : public GPUOperation {
public:
Softmax() = default;
explicit Softmax(const OperationDef& definition) : GPUOperation(definition) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Softmax(Softmax&& kernel);
@ -44,7 +44,7 @@ class Softmax : public GPUOperation {
friend Softmax CreateSoftmax();
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
CLKernel kernel_;
int3 work_group_size_ = int3(8, 4, 1);

View File

@ -115,14 +115,14 @@ Softmax1x1& Softmax1x1::operator=(Softmax1x1&& kernel) {
return *this;
}
absl::Status Softmax1x1::Compile(const CreationContext& creation_context) {
Status Softmax1x1::Compile(const CreationContext& creation_context) {
const auto code = GetSoftmaxKernelCode(definition_, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_);
}
absl::Status Softmax1x1::AddToQueue(CLCommandQueue* queue) {
Status Softmax1x1::AddToQueue(CLCommandQueue* queue) {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));

View File

@ -30,9 +30,9 @@ class Softmax1x1 : public GPUOperation {
Softmax1x1() = default;
explicit Softmax1x1(const OperationDef& definition)
: GPUOperation(definition) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status Compile(const CreationContext& creation_context) override;
// Move only
Softmax1x1(Softmax1x1&& kernel);

View File

@ -96,14 +96,14 @@ SpaceToDepth& SpaceToDepth::operator=(SpaceToDepth&& operation) {
return *this;
}
absl::Status SpaceToDepth::Compile(const CreationContext& creation_context) {
Status SpaceToDepth::Compile(const CreationContext& creation_context) {
const auto code = GetSpaceToDepthCode(definition_, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_);
}
absl::Status SpaceToDepth::BindArguments() {
Status SpaceToDepth::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -121,12 +121,12 @@ int3 SpaceToDepth::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status SpaceToDepth::Tune(const TuningParameters& params) {
Status SpaceToDepth::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status SpaceToDepth::AddToQueue(CLCommandQueue* queue) {
Status SpaceToDepth::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

View File

@ -30,9 +30,9 @@ class SpaceToDepth : public GPUOperation {
public:
SpaceToDepth(const OperationDef& op_def, const SpaceToDepthAttributes& attr)
: GPUOperation(op_def), attr_(attr), work_group_size_(8, 4, 1) {}
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
Status AddToQueue(CLCommandQueue* queue) override;
Status Tune(const TuningParameters& params) override;
Status Compile(const CreationContext& creation_context) override;
SpaceToDepth(SpaceToDepth&& operation);
SpaceToDepth& operator=(SpaceToDepth&& operation);
@ -40,7 +40,7 @@ class SpaceToDepth : public GPUOperation {
SpaceToDepth& operator=(const SpaceToDepth&) = delete;
private:
absl::Status BindArguments();
Status BindArguments();
int3 GetGridSize() const;
SpaceToDepthAttributes attr_;

View File

@ -166,7 +166,7 @@ StridedSlice& StridedSlice::operator=(StridedSlice&& operation) {
return *this;
}
absl::Status StridedSlice::Compile(const CreationContext& creation_context) {
Status StridedSlice::Compile(const CreationContext& creation_context) {
const auto code = GetStridedSliceCode(definition_, Is4Aligned(attributes_),
linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
@ -174,7 +174,7 @@ absl::Status StridedSlice::Compile(const CreationContext& creation_context) {
*creation_context.device, &kernel_);
}
absl::Status StridedSlice::BindArguments() {
Status StridedSlice::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
@ -187,7 +187,7 @@ absl::Status StridedSlice::BindArguments() {
attributes_.strides.c, attributes_.strides.b)));
RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
return absl::OkStatus();
return OkStatus();
}
int3 StridedSlice::GetGridSize() const {
@ -197,12 +197,12 @@ int3 StridedSlice::GetGridSize() const {
return int3(grid_x, grid_y, grid_z);
}
absl::Status StridedSlice::Tune(const TuningParameters& params) {
Status StridedSlice::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status StridedSlice::AddToQueue(CLCommandQueue* queue) {
Status StridedSlice::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}

Some files were not shown because too many files have changed in this diff Show More