Removed ElementwiseOperation.
Simplified ex-ElementwiseOperations, ReLU, PReLU, etc. PiperOrigin-RevId: 324715510 Change-Id: I3d98cdbcc8075bb91f20e065b0aca2ab16a4e8e5
This commit is contained in:
parent
a73b5ce940
commit
dab856a93f
@ -390,9 +390,7 @@ void InferenceContext::Merge() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto& linkable_node = nodes_[next_nodes[0]];
|
auto& linkable_node = nodes_[next_nodes[0]];
|
||||||
auto* elementwise =
|
if (!linkable_node.operations[0]->IsLinkable() ||
|
||||||
dynamic_cast<ElementwiseOperation*>(linkable_node.operations[0].get());
|
|
||||||
if (!elementwise || !elementwise->IsLinkable() ||
|
|
||||||
linkable_node.outputs.size() != 1 ||
|
linkable_node.outputs.size() != 1 ||
|
||||||
!IsReady(ready_tensors, linkable_node)) {
|
!IsReady(ready_tensors, linkable_node)) {
|
||||||
continue;
|
continue;
|
||||||
@ -410,9 +408,7 @@ void InferenceContext::Merge() {
|
|||||||
}
|
}
|
||||||
for (auto& node : nodes_) {
|
for (auto& node : nodes_) {
|
||||||
for (int j = 1; j < node.operations.size(); ++j) {
|
for (int j = 1; j < node.operations.size(); ++j) {
|
||||||
auto* elementwise =
|
node.operations[0]->AddOperation(node.operations[j].get());
|
||||||
dynamic_cast<ElementwiseOperation*>(node.operations[j].get());
|
|
||||||
node.operations[0]->AddOperation(elementwise);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,42 +25,29 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
|
||||||
Add::Add(const OperationDef& definition, const std::vector<int>& channels,
|
GPUOperation CreateAdd(const OperationDef& definition,
|
||||||
int dst_channels)
|
const std::vector<int>& channels, int dst_channels) {
|
||||||
: ElementwiseOperation(definition) {
|
GPUOperation add(definition);
|
||||||
int dst_depth = DivideRoundUp(dst_channels, 4);
|
int dst_depth = DivideRoundUp(dst_channels, 4);
|
||||||
int src0_depth = DivideRoundUp(channels[0], 4);
|
int src0_depth = DivideRoundUp(channels[0], 4);
|
||||||
linkable_ = dst_depth == src0_depth;
|
add.elementwise_ = true;
|
||||||
|
add.linkable_ = dst_depth == src0_depth;
|
||||||
if (src0_depth < dst_depth) {
|
if (src0_depth < dst_depth) {
|
||||||
check_src_channels_size_ = true;
|
add.check_src_channels_size_ = true;
|
||||||
}
|
}
|
||||||
for (int i = 1; i < definition_.src_tensors.size(); ++i) {
|
for (int i = 1; i < definition.src_tensors.size(); ++i) {
|
||||||
const std::string tensor_name = absl::StrCat("src_data_", i);
|
const std::string tensor_name = absl::StrCat("src_data_", i);
|
||||||
auto src_desc = definition_.src_tensors[i];
|
auto src_desc = definition.src_tensors[i];
|
||||||
if (definition_.IsBatchSupported()) {
|
if (definition.IsBatchSupported()) {
|
||||||
src_desc.SetStateVar("BatchedWidth", "true");
|
src_desc.SetStateVar("BatchedWidth", "true");
|
||||||
}
|
}
|
||||||
AddSrcTensor(tensor_name, src_desc);
|
add.AddSrcTensor(tensor_name, src_desc);
|
||||||
code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n";
|
add.code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n";
|
||||||
code_ += " in_out_value += args." + tensor_name +
|
add.code_ += " in_out_value += args." + tensor_name +
|
||||||
".Read(X_COORD, Y_COORD, S_COORD);\n";
|
".Read(X_COORD, Y_COORD, S_COORD);\n";
|
||||||
code_ += "}\n";
|
add.code_ += "}\n";
|
||||||
}
|
}
|
||||||
}
|
return add;
|
||||||
|
|
||||||
Add::Add(Add&& operation) : ElementwiseOperation(std::move(operation)) {}
|
|
||||||
|
|
||||||
Add& Add::operator=(Add&& operation) {
|
|
||||||
if (this != &operation) {
|
|
||||||
ElementwiseOperation::operator=(std::move(operation));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
Add CreateAdd(const OperationDef& definition, const std::vector<int>& channels,
|
|
||||||
int dst_channels) {
|
|
||||||
Add operation(definition, channels, dst_channels);
|
|
||||||
return operation;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
|
@ -27,24 +27,10 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
|
||||||
// Add operation inherited from ElementwiseOperation, but it is more
|
// Add operation supports not equal tensors on input (for possibility to
|
||||||
// complicated than usual elementwise, that is why it has own versions for
|
// remove Padding operation with zeroes in channels dimension)
|
||||||
// Compile. Add operation support not equal tensors on input (for possibility to
|
GPUOperation CreateAdd(const OperationDef& definition,
|
||||||
// remove Padding operation with zeroes in Z dimension)
|
const std::vector<int>& channels, int dst_channels);
|
||||||
class Add : public ElementwiseOperation {
|
|
||||||
public:
|
|
||||||
Add(const OperationDef& definition, const std::vector<int>& channels,
|
|
||||||
int dst_channels);
|
|
||||||
|
|
||||||
// Move only
|
|
||||||
Add(Add&& operation);
|
|
||||||
Add& operator=(Add&& operation);
|
|
||||||
Add(const Add&) = delete;
|
|
||||||
Add& operator=(const Add&) = delete;
|
|
||||||
};
|
|
||||||
|
|
||||||
Add CreateAdd(const OperationDef& definition, const std::vector<int>& channels,
|
|
||||||
int dst_channels);
|
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -49,7 +49,7 @@ TEST_F(OpenCLOperationTest, AddTwoEqualTensors) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
Add operation = CreateAdd(op_def, channels, channels[0]);
|
GPUOperation operation = CreateAdd(op_def, channels, channels[0]);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -77,7 +77,7 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasMoreChannelsThanSecond) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
Add operation = CreateAdd(op_def, channels, channels[0]);
|
GPUOperation operation = CreateAdd(op_def, channels, channels[0]);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 6), &dst_tensor));
|
BHWC(1, 2, 1, 6), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -107,7 +107,7 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasLessChannelsThanSecond) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
Add operation = CreateAdd(op_def, channels, 6);
|
GPUOperation operation = CreateAdd(op_def, channels, 6);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 6), &dst_tensor));
|
BHWC(1, 2, 1, 6), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
|
@ -134,128 +134,33 @@ std::string GetTwoInputCode(const OperationType& op_type,
|
|||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
ElementwiseOneInput::ElementwiseOneInput(const OperationDef& definition,
|
GPUOperation CreateElementwiseOneInput(const OperationDef& definition,
|
||||||
const OperationType& op_type)
|
const OperationType& op_type) {
|
||||||
: ElementwiseOperation(definition) {
|
GPUOperation op(definition);
|
||||||
code_ = GetOneInputCode(op_type, definition.precision, "in_out_value");
|
op.elementwise_ = true;
|
||||||
|
op.code_ = GetOneInputCode(op_type, definition.precision, "in_out_value");
|
||||||
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
ElementwiseOneInput::ElementwiseOneInput(ElementwiseOneInput&& operation)
|
GPUOperation CreateElementwiseOneRuntimeOneScalar(
|
||||||
: ElementwiseOperation(std::move(operation)) {}
|
|
||||||
|
|
||||||
ElementwiseOneInput& ElementwiseOneInput::operator=(
|
|
||||||
ElementwiseOneInput&& operation) {
|
|
||||||
if (this != &operation) {
|
|
||||||
ElementwiseOperation::operator=(std::move(operation));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
ElementwiseOneInput CreateElementwiseOneInput(const OperationDef& definition,
|
|
||||||
const OperationType& op_type) {
|
|
||||||
ElementwiseOneInput operation(definition, op_type);
|
|
||||||
return operation;
|
|
||||||
}
|
|
||||||
|
|
||||||
ElementwiseOneRuntimeOneScalar::ElementwiseOneRuntimeOneScalar(
|
|
||||||
const OperationDef& definition, const OperationType& op_type,
|
|
||||||
float scalar_parameter, CalculationsPrecision scalar_precision)
|
|
||||||
: ElementwiseOperation(definition) {
|
|
||||||
if (definition.precision == CalculationsPrecision::F32) {
|
|
||||||
args_.AddFloat("scalar", scalar_parameter);
|
|
||||||
} else {
|
|
||||||
args_.AddHalf("scalar", half(scalar_parameter));
|
|
||||||
}
|
|
||||||
code_ = GetTwoInputCode(op_type, "in_out_value", "args.scalar");
|
|
||||||
}
|
|
||||||
|
|
||||||
ElementwiseOneRuntimeOneScalar::ElementwiseOneRuntimeOneScalar(
|
|
||||||
ElementwiseOneRuntimeOneScalar&& operation)
|
|
||||||
: ElementwiseOperation(std::move(operation)) {}
|
|
||||||
|
|
||||||
ElementwiseOneRuntimeOneScalar& ElementwiseOneRuntimeOneScalar::operator=(
|
|
||||||
ElementwiseOneRuntimeOneScalar&& operation) {
|
|
||||||
if (this != &operation) {
|
|
||||||
ElementwiseOperation::operator=(std::move(operation));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
ElementwiseOneRuntimeOneScalar CreateElementwiseOneRuntimeOneScalar(
|
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const OperationType& op_type, float scalar_parameter) {
|
const OperationType& op_type, float scalar_parameter) {
|
||||||
const auto scalar_precision = creation_context.device->IsPowerVR()
|
GPUOperation op(definition);
|
||||||
? CalculationsPrecision::F32
|
op.elementwise_ = true;
|
||||||
: definition.precision;
|
if (definition.precision == CalculationsPrecision::F32) {
|
||||||
ElementwiseOneRuntimeOneScalar operation(definition, op_type,
|
op.args_.AddFloat("scalar", scalar_parameter);
|
||||||
scalar_parameter, scalar_precision);
|
} else {
|
||||||
return operation;
|
op.args_.AddHalf("scalar", half(scalar_parameter));
|
||||||
}
|
|
||||||
|
|
||||||
ElementwiseTwoInput::ElementwiseTwoInput(const OperationDef& definition,
|
|
||||||
const OperationType& op_type,
|
|
||||||
const BroadcastSettings& broadcast)
|
|
||||||
: ElementwiseOperation(definition),
|
|
||||||
broadcast_(broadcast) {
|
|
||||||
auto src_desc = definition.src_tensors[1];
|
|
||||||
if (definition.IsBatchSupported()) {
|
|
||||||
src_desc.SetStateVar("BatchedWidth", "true");
|
|
||||||
}
|
}
|
||||||
AddSrcTensor("second_tensor", src_desc);
|
op.code_ = GetTwoInputCode(op_type, "in_out_value", "args.scalar");
|
||||||
const std::string x_coord = broadcast.width ? "0" : "X_COORD";
|
return op;
|
||||||
const std::string y_coord = broadcast.height ? "0" : "Y_COORD";
|
|
||||||
const std::string s_coord = broadcast.channels ? "0" : "S_COORD";
|
|
||||||
code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord,
|
|
||||||
", ", y_coord, ", ", s_coord, ");\n");
|
|
||||||
if (broadcast.channels) {
|
|
||||||
code_ += " second_val.y = second_val.x;\n";
|
|
||||||
code_ += " second_val.z = second_val.x;\n";
|
|
||||||
code_ += " second_val.w = second_val.x;\n";
|
|
||||||
}
|
|
||||||
code_ += GetTwoInputCode(op_type, "in_out_value", "second_val");
|
|
||||||
}
|
|
||||||
|
|
||||||
ElementwiseTwoInput::ElementwiseTwoInput(const OperationDef& definition,
|
|
||||||
const OperationType& op_type,
|
|
||||||
const BroadcastSettings& broadcast,
|
|
||||||
Tensor&& constant_tensor)
|
|
||||||
: ElementwiseOperation(definition),
|
|
||||||
broadcast_(broadcast) {
|
|
||||||
auto descriptor = constant_tensor.GetDescriptor();
|
|
||||||
args_.AddObject("second_tensor", AccessType::READ,
|
|
||||||
absl::make_unique<Tensor>(std::move(constant_tensor)),
|
|
||||||
absl::make_unique<TensorDescriptor>(descriptor));
|
|
||||||
const std::string x_coord = broadcast.width ? "0" : "X_COORD";
|
|
||||||
const std::string y_coord = broadcast.height ? "0" : "Y_COORD";
|
|
||||||
const std::string s_coord = broadcast.channels ? "0" : "S_COORD";
|
|
||||||
code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord,
|
|
||||||
", ", y_coord, ", ", s_coord, ");\n");
|
|
||||||
if (broadcast.channels) {
|
|
||||||
code_ += " second_val.y = second_val.x;\n";
|
|
||||||
code_ += " second_val.z = second_val.x;\n";
|
|
||||||
code_ += " second_val.w = second_val.x;\n";
|
|
||||||
}
|
|
||||||
code_ += GetTwoInputCode(op_type, "in_out_value", "second_val");
|
|
||||||
}
|
|
||||||
|
|
||||||
ElementwiseTwoInput::ElementwiseTwoInput(ElementwiseTwoInput&& operation)
|
|
||||||
: ElementwiseOperation(std::move(operation)),
|
|
||||||
broadcast_(operation.broadcast_) {}
|
|
||||||
|
|
||||||
ElementwiseTwoInput& ElementwiseTwoInput::operator=(
|
|
||||||
ElementwiseTwoInput&& operation) {
|
|
||||||
if (this != &operation) {
|
|
||||||
broadcast_ = operation.broadcast_;
|
|
||||||
ElementwiseOperation::operator=(std::move(operation));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status CreateElementwiseTwoInput(
|
absl::Status CreateElementwiseTwoInput(
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const OperationType& op_type,
|
const OperationType& op_type,
|
||||||
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
|
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
|
||||||
ElementwiseTwoInput* result) {
|
GPUOperation* result) {
|
||||||
const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
|
const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
|
||||||
TensorStorageType storage_type =
|
TensorStorageType storage_type =
|
||||||
SelectBestStorageType(*creation_context.context, *creation_context.device,
|
SelectBestStorageType(*creation_context.context, *creation_context.device,
|
||||||
@ -268,12 +173,21 @@ absl::Status CreateElementwiseTwoInput(
|
|||||||
&gpu_tensor));
|
&gpu_tensor));
|
||||||
RETURN_IF_ERROR(
|
RETURN_IF_ERROR(
|
||||||
gpu_tensor.WriteData(creation_context.queue, constant_tensor));
|
gpu_tensor.WriteData(creation_context.queue, constant_tensor));
|
||||||
BroadcastSettings broadcast;
|
|
||||||
broadcast.width = true;
|
*result = GPUOperation(definition);
|
||||||
broadcast.height = true;
|
result->elementwise_ = true;
|
||||||
broadcast.channels = shape.c == 1;
|
result->args_.AddObject("second_tensor", AccessType::READ,
|
||||||
*result = ElementwiseTwoInput(definition, op_type, broadcast,
|
absl::make_unique<Tensor>(std::move(gpu_tensor)),
|
||||||
std::move(gpu_tensor));
|
absl::make_unique<TensorDescriptor>(desc));
|
||||||
|
const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
|
||||||
|
result->code_ = absl::StrCat(
|
||||||
|
"FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n");
|
||||||
|
if (shape.c == 1) {
|
||||||
|
result->code_ += " second_val.y = second_val.x;\n";
|
||||||
|
result->code_ += " second_val.z = second_val.x;\n";
|
||||||
|
result->code_ += " second_val.w = second_val.x;\n";
|
||||||
|
}
|
||||||
|
result->code_ += GetTwoInputCode(op_type, "in_out_value", "second_val");
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -281,7 +195,7 @@ absl::Status CreateElementwiseTwoInput(
|
|||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const OperationType& op_type,
|
const OperationType& op_type,
|
||||||
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
|
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
|
||||||
ElementwiseTwoInput* result) {
|
GPUOperation* result) {
|
||||||
const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
|
const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
|
||||||
constant_tensor.shape.c);
|
constant_tensor.shape.c);
|
||||||
TensorStorageType storage_type =
|
TensorStorageType storage_type =
|
||||||
@ -295,34 +209,49 @@ absl::Status CreateElementwiseTwoInput(
|
|||||||
&gpu_tensor));
|
&gpu_tensor));
|
||||||
RETURN_IF_ERROR(
|
RETURN_IF_ERROR(
|
||||||
gpu_tensor.WriteData(creation_context.queue, constant_tensor));
|
gpu_tensor.WriteData(creation_context.queue, constant_tensor));
|
||||||
BroadcastSettings broadcast;
|
|
||||||
broadcast.width = shape.w == 1;
|
*result = GPUOperation(definition);
|
||||||
broadcast.height = shape.h == 1;
|
result->elementwise_ = true;
|
||||||
broadcast.channels = shape.c == 1;
|
result->args_.AddObject("second_tensor", AccessType::READ,
|
||||||
*result = ElementwiseTwoInput(definition, op_type, broadcast,
|
absl::make_unique<Tensor>(std::move(gpu_tensor)),
|
||||||
std::move(gpu_tensor));
|
absl::make_unique<TensorDescriptor>(desc));
|
||||||
|
const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
|
||||||
|
const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
|
||||||
|
const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
|
||||||
|
result->code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(",
|
||||||
|
x_coord, ", ", y_coord, ", ", s_coord, ");\n");
|
||||||
|
if (shape.c == 1) {
|
||||||
|
result->code_ += " second_val.y = second_val.x;\n";
|
||||||
|
result->code_ += " second_val.z = second_val.x;\n";
|
||||||
|
result->code_ += " second_val.w = second_val.x;\n";
|
||||||
|
}
|
||||||
|
result->code_ += GetTwoInputCode(op_type, "in_out_value", "second_val");
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
ElementwiseTwoInput CreateElementwiseTwoInput(const OperationDef& definition,
|
GPUOperation CreateElementwiseTwoInput(const OperationDef& definition,
|
||||||
const OperationType& op_type,
|
const OperationType& op_type,
|
||||||
const BHWC& shape) {
|
const BHWC& shape) {
|
||||||
BroadcastSettings broadcast;
|
GPUOperation op(definition);
|
||||||
broadcast.width = shape.w == 1;
|
op.elementwise_ = true;
|
||||||
broadcast.height = shape.h == 1;
|
auto src_desc = definition.src_tensors[1];
|
||||||
broadcast.channels = shape.c == 1;
|
if (definition.IsBatchSupported()) {
|
||||||
ElementwiseTwoInput operation(definition, op_type, broadcast);
|
src_desc.SetStateVar("BatchedWidth", "true");
|
||||||
return operation;
|
}
|
||||||
}
|
op.AddSrcTensor("second_tensor", src_desc);
|
||||||
|
const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
|
||||||
ElementwiseTwoInput CreateElementwiseTwoInput(const OperationDef& definition,
|
const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
|
||||||
const OperationType& op_type) {
|
const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
|
||||||
BroadcastSettings broadcast;
|
op.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord,
|
||||||
broadcast.width = false;
|
", ", y_coord, ", ", s_coord, ");\n");
|
||||||
broadcast.height = false;
|
if (shape.c == 1) {
|
||||||
broadcast.channels = false;
|
op.code_ += " second_val.y = second_val.x;\n";
|
||||||
ElementwiseTwoInput operation(definition, op_type, broadcast);
|
op.code_ += " second_val.z = second_val.x;\n";
|
||||||
return operation;
|
op.code_ += " second_val.w = second_val.x;\n";
|
||||||
|
}
|
||||||
|
op.code_ += GetTwoInputCode(op_type, "in_out_value", "second_val");
|
||||||
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
|
@ -26,93 +26,38 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
|
||||||
// Class for simple one input operations without any parameters, for example
|
// Creates simple one input operation without any parameters, for example
|
||||||
// log, sin, cos and etc.
|
// log, sin, cos, etc.
|
||||||
class ElementwiseOneInput : public ElementwiseOperation {
|
GPUOperation CreateElementwiseOneInput(const OperationDef& definition,
|
||||||
public:
|
const OperationType& op_type);
|
||||||
ElementwiseOneInput(const OperationDef& definition,
|
|
||||||
const OperationType& op_type);
|
|
||||||
|
|
||||||
// Move only
|
// Creates simple two input (first input is runtime tensor and second input is
|
||||||
ElementwiseOneInput(ElementwiseOneInput&& operation);
|
// scalar argument) operation, for example sub, div, pow, etc.
|
||||||
ElementwiseOneInput& operator=(ElementwiseOneInput&& operation);
|
GPUOperation CreateElementwiseOneRuntimeOneScalar(
|
||||||
ElementwiseOneInput(const ElementwiseOneInput&) = delete;
|
|
||||||
ElementwiseOneInput& operator=(const ElementwiseOneInput&) = delete;
|
|
||||||
};
|
|
||||||
|
|
||||||
ElementwiseOneInput CreateElementwiseOneInput(const OperationDef& definition,
|
|
||||||
const OperationType& op_type);
|
|
||||||
|
|
||||||
// Class for simple two input (first input is runtime tensor and second input is
|
|
||||||
// scalar argument) operations without any parameters, for example sub, div and
|
|
||||||
// etc.
|
|
||||||
class ElementwiseOneRuntimeOneScalar : public ElementwiseOperation {
|
|
||||||
public:
|
|
||||||
ElementwiseOneRuntimeOneScalar(const OperationDef& definition,
|
|
||||||
const OperationType& op_type,
|
|
||||||
float scalar_parameter,
|
|
||||||
CalculationsPrecision scalar_precision);
|
|
||||||
|
|
||||||
// Move only
|
|
||||||
ElementwiseOneRuntimeOneScalar(ElementwiseOneRuntimeOneScalar&& operation);
|
|
||||||
ElementwiseOneRuntimeOneScalar& operator=(
|
|
||||||
ElementwiseOneRuntimeOneScalar&& operation);
|
|
||||||
ElementwiseOneRuntimeOneScalar(const ElementwiseOneRuntimeOneScalar&) =
|
|
||||||
delete;
|
|
||||||
ElementwiseOneRuntimeOneScalar& operator=(
|
|
||||||
const ElementwiseOneRuntimeOneScalar&) = delete;
|
|
||||||
};
|
|
||||||
|
|
||||||
ElementwiseOneRuntimeOneScalar CreateElementwiseOneRuntimeOneScalar(
|
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const OperationType& op_type, float scalar_parameter);
|
const OperationType& op_type, float scalar_parameter);
|
||||||
|
|
||||||
struct BroadcastSettings {
|
// Creates simple two input(first input is runtime tensor and second input is
|
||||||
bool width;
|
// constant linear tensor) operation, for example sub, div and etc.
|
||||||
bool height;
|
|
||||||
bool channels;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Class for simple two input(first input is runtime tensor and second input is
|
|
||||||
// runtime or constant tensor) operations without any parameters, for example
|
|
||||||
// sub, div and etc.
|
|
||||||
class ElementwiseTwoInput : public ElementwiseOperation {
|
|
||||||
public:
|
|
||||||
ElementwiseTwoInput() = default;
|
|
||||||
ElementwiseTwoInput(const OperationDef& definition,
|
|
||||||
const OperationType& op_type,
|
|
||||||
const BroadcastSettings& broadcast);
|
|
||||||
|
|
||||||
ElementwiseTwoInput(const OperationDef& definition,
|
|
||||||
const OperationType& op_type,
|
|
||||||
const BroadcastSettings& broadcast,
|
|
||||||
Tensor&& constant_tensor);
|
|
||||||
|
|
||||||
// Move only
|
|
||||||
ElementwiseTwoInput(ElementwiseTwoInput&& operation);
|
|
||||||
ElementwiseTwoInput& operator=(ElementwiseTwoInput&& operation);
|
|
||||||
ElementwiseTwoInput(const ElementwiseTwoInput&) = delete;
|
|
||||||
ElementwiseTwoInput& operator=(const ElementwiseTwoInput&) = delete;
|
|
||||||
|
|
||||||
private:
|
|
||||||
BroadcastSettings broadcast_;
|
|
||||||
};
|
|
||||||
|
|
||||||
absl::Status CreateElementwiseTwoInput(
|
absl::Status CreateElementwiseTwoInput(
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const OperationType& op_type,
|
const OperationType& op_type,
|
||||||
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
|
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
|
||||||
ElementwiseTwoInput* result);
|
GPUOperation* result);
|
||||||
|
|
||||||
|
// Creates simple two input(first input is runtime tensor and second input is
|
||||||
|
// constant HWC tensor) operation, for example sub, div and etc.
|
||||||
absl::Status CreateElementwiseTwoInput(
|
absl::Status CreateElementwiseTwoInput(
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const OperationType& op_type,
|
const OperationType& op_type,
|
||||||
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
|
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
|
||||||
ElementwiseTwoInput* result);
|
GPUOperation* result);
|
||||||
|
|
||||||
ElementwiseTwoInput CreateElementwiseTwoInput(const OperationDef& definition,
|
// Creates simple two input(2 runtime tensors) operation, for example
|
||||||
const OperationType& op_type,
|
// sub, div and etc.
|
||||||
const BHWC& shape);
|
GPUOperation CreateElementwiseTwoInput(const OperationDef& definition,
|
||||||
|
const OperationType& op_type,
|
||||||
|
const BHWC& shape);
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -45,7 +45,7 @@ TEST_F(OpenCLOperationTest, Abs) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::ABS);
|
CreateElementwiseOneInput(op_def, OperationType::ABS);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -70,7 +70,7 @@ TEST_F(OpenCLOperationTest, Cos) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::COS);
|
CreateElementwiseOneInput(op_def, OperationType::COS);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -95,7 +95,7 @@ TEST_F(OpenCLOperationTest, Copy) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::COPY);
|
CreateElementwiseOneInput(op_def, OperationType::COPY);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -118,7 +118,7 @@ TEST_F(OpenCLOperationTest, Elu) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::ELU);
|
CreateElementwiseOneInput(op_def, OperationType::ELU);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 1, 1, 7), &dst_tensor));
|
BHWC(1, 1, 1, 7), &dst_tensor));
|
||||||
@ -144,7 +144,7 @@ TEST_F(OpenCLOperationTest, Exp) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::EXP);
|
CreateElementwiseOneInput(op_def, OperationType::EXP);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 1, 1, 7), &dst_tensor));
|
BHWC(1, 1, 1, 7), &dst_tensor));
|
||||||
@ -171,7 +171,7 @@ TEST_F(OpenCLOperationTest, HardSwish) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::HARD_SWISH);
|
CreateElementwiseOneInput(op_def, OperationType::HARD_SWISH);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
src_tensor.shape, &dst_tensor));
|
src_tensor.shape, &dst_tensor));
|
||||||
@ -197,7 +197,7 @@ TEST_F(OpenCLOperationTest, Log) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::LOG);
|
CreateElementwiseOneInput(op_def, OperationType::LOG);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -222,7 +222,7 @@ TEST_F(OpenCLOperationTest, Rsqrt) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::RSQRT);
|
CreateElementwiseOneInput(op_def, OperationType::RSQRT);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -249,7 +249,7 @@ TEST_F(OpenCLOperationTest, Sigmoid) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::SIGMOID);
|
CreateElementwiseOneInput(op_def, OperationType::SIGMOID);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -273,7 +273,7 @@ TEST_F(OpenCLOperationTest, Sin) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::SIN);
|
CreateElementwiseOneInput(op_def, OperationType::SIN);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -299,7 +299,7 @@ TEST_F(OpenCLOperationTest, Sqrt) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::SQRT);
|
CreateElementwiseOneInput(op_def, OperationType::SQRT);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -325,7 +325,7 @@ TEST_F(OpenCLOperationTest, Square) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::SQUARE);
|
CreateElementwiseOneInput(op_def, OperationType::SQUARE);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -349,7 +349,7 @@ TEST_F(OpenCLOperationTest, Tanh) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseOneInput(op_def, OperationType::TANH);
|
CreateElementwiseOneInput(op_def, OperationType::TANH);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -378,7 +378,7 @@ TEST_F(OpenCLOperationTest, Sub) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::SUB, src_tensor_1.shape);
|
op_def, OperationType::SUB, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -406,7 +406,7 @@ TEST_F(OpenCLOperationTest, SquaredDiff) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::SQUARED_DIFF, src_tensor_1.shape);
|
op_def, OperationType::SQUARED_DIFF, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -434,7 +434,7 @@ TEST_F(OpenCLOperationTest, Div) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::DIV, src_tensor_1.shape);
|
op_def, OperationType::DIV, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -462,7 +462,7 @@ TEST_F(OpenCLOperationTest, Pow) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::POW, src_tensor_1.shape);
|
op_def, OperationType::POW, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -490,7 +490,7 @@ TEST_F(OpenCLOperationTest, Add) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::ADD, src_tensor_1.shape);
|
op_def, OperationType::ADD, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -518,7 +518,7 @@ TEST_F(OpenCLOperationTest, Maximum) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::MAXIMUM, src_tensor_1.shape);
|
op_def, OperationType::MAXIMUM, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -547,9 +547,8 @@ TEST_F(OpenCLOperationTest, MaximumWithScalar) {
|
|||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
const float* scalar = absl::get_if<float>(&attr.param);
|
const float* scalar = absl::get_if<float>(&attr.param);
|
||||||
ElementwiseOneRuntimeOneScalar operation =
|
GPUOperation operation = CreateElementwiseOneRuntimeOneScalar(
|
||||||
CreateElementwiseOneRuntimeOneScalar(creation_context_, op_def,
|
creation_context_, op_def, OperationType::MAXIMUM, *scalar);
|
||||||
OperationType::MAXIMUM, *scalar);
|
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation,
|
||||||
BHWC(1, 4, 1, 1), &dst_tensor));
|
BHWC(1, 4, 1, 1), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -578,7 +577,7 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantLinearTensor) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def,
|
ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def,
|
||||||
OperationType::MAXIMUM, linear_tensor,
|
OperationType::MAXIMUM, linear_tensor,
|
||||||
&operation));
|
&operation));
|
||||||
@ -608,7 +607,7 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensor) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def,
|
ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def,
|
||||||
OperationType::MAXIMUM, hwc_tensor,
|
OperationType::MAXIMUM, hwc_tensor,
|
||||||
&operation));
|
&operation));
|
||||||
@ -637,7 +636,7 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensorBroadcastChannels) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def,
|
ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def,
|
||||||
OperationType::MAXIMUM, hwc_tensor,
|
OperationType::MAXIMUM, hwc_tensor,
|
||||||
&operation));
|
&operation));
|
||||||
@ -666,7 +665,7 @@ TEST_F(OpenCLOperationTest, Minimum) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::MINIMUM, src_tensor_1.shape);
|
op_def, OperationType::MINIMUM, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -695,9 +694,8 @@ TEST_F(OpenCLOperationTest, MinimumWithScalar) {
|
|||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
const float* scalar = absl::get_if<float>(&attr.param);
|
const float* scalar = absl::get_if<float>(&attr.param);
|
||||||
ElementwiseOneRuntimeOneScalar operation =
|
GPUOperation operation = CreateElementwiseOneRuntimeOneScalar(
|
||||||
CreateElementwiseOneRuntimeOneScalar(creation_context_, op_def,
|
creation_context_, op_def, OperationType::MINIMUM, *scalar);
|
||||||
OperationType::MINIMUM, *scalar);
|
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation,
|
||||||
BHWC(1, 4, 1, 1), &dst_tensor));
|
BHWC(1, 4, 1, 1), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -723,7 +721,7 @@ TEST_F(OpenCLOperationTest, Mul) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::MUL, src_tensor_1.shape);
|
op_def, OperationType::MUL, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -751,7 +749,7 @@ TEST_F(OpenCLOperationTest, MulBroadcastHW) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::MUL, src_tensor_1.shape);
|
op_def, OperationType::MUL, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
@ -779,7 +777,7 @@ TEST_F(OpenCLOperationTest, MulBroadcastChannels) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ElementwiseTwoInput operation = CreateElementwiseTwoInput(
|
GPUOperation operation = CreateElementwiseTwoInput(
|
||||||
op_def, OperationType::MUL, src_tensor_1.shape);
|
op_def, OperationType::MUL, src_tensor_1.shape);
|
||||||
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1},
|
||||||
creation_context_, &operation,
|
creation_context_, &operation,
|
||||||
|
@ -49,6 +49,20 @@ std::string GetElementWiseCode(const OperationDef& op_def,
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
absl::Status MergeOperations(const std::vector<GPUOperation*>& linked_ops,
|
||||||
|
Arguments* merged_args, std::string* merged_code) {
|
||||||
|
for (int i = 0; i < linked_ops.size(); ++i) {
|
||||||
|
std::string code = linked_ops[i]->code_;
|
||||||
|
std::string unique_postfix = absl::StrCat("_link", i + 1);
|
||||||
|
linked_ops[i]->args_.RenameArgs(unique_postfix, &code);
|
||||||
|
*merged_code += "{\n" + code + "\n}\n";
|
||||||
|
RETURN_IF_ERROR(
|
||||||
|
merged_args->Merge(std::move(linked_ops[i]->args_), unique_postfix));
|
||||||
|
linked_ops[i]->AddUniquePostfix(unique_postfix);
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
DataType OperationDef::GetDataType() const {
|
DataType OperationDef::GetDataType() const {
|
||||||
@ -108,14 +122,17 @@ void GPUOperation::SetDst(Tensor* ptr, int index) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
GPUOperation::GPUOperation(GPUOperation&& operation)
|
GPUOperation::GPUOperation(GPUOperation&& operation)
|
||||||
: definition_(std::move(operation.definition_)),
|
: args_(std::move(operation.args_)),
|
||||||
|
code_(std::move(operation.code_)),
|
||||||
|
elementwise_(operation.elementwise_),
|
||||||
|
linkable_(operation.linkable_),
|
||||||
|
check_src_channels_size_(operation.check_src_channels_size_),
|
||||||
|
definition_(std::move(operation.definition_)),
|
||||||
src_(std::move(operation.src_)),
|
src_(std::move(operation.src_)),
|
||||||
dst_(std::move(operation.dst_)),
|
dst_(std::move(operation.dst_)),
|
||||||
args_(std::move(operation.args_)),
|
|
||||||
kernel_(std::move(operation.kernel_)),
|
kernel_(std::move(operation.kernel_)),
|
||||||
work_group_size_(operation.work_group_size_),
|
work_group_size_(operation.work_group_size_),
|
||||||
grid_size_(operation.grid_size_),
|
grid_size_(operation.grid_size_),
|
||||||
code_(std::move(operation.code_)),
|
|
||||||
src_tensors_names_(std::move(operation.src_tensors_names_)),
|
src_tensors_names_(std::move(operation.src_tensors_names_)),
|
||||||
dst_tensors_names_(std::move(operation.dst_tensors_names_)),
|
dst_tensors_names_(std::move(operation.dst_tensors_names_)),
|
||||||
compiler_options_(std::move(operation.compiler_options_)),
|
compiler_options_(std::move(operation.compiler_options_)),
|
||||||
@ -123,14 +140,17 @@ GPUOperation::GPUOperation(GPUOperation&& operation)
|
|||||||
|
|
||||||
GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
|
GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
|
||||||
if (this != &operation) {
|
if (this != &operation) {
|
||||||
|
args_ = std::move(operation.args_);
|
||||||
|
code_ = std::move(operation.code_);
|
||||||
|
elementwise_ = operation.elementwise_;
|
||||||
|
linkable_ = operation.linkable_;
|
||||||
|
check_src_channels_size_ = operation.check_src_channels_size_;
|
||||||
definition_ = std::move(operation.definition_);
|
definition_ = std::move(operation.definition_);
|
||||||
src_ = std::move(operation.src_);
|
src_ = std::move(operation.src_);
|
||||||
dst_ = std::move(operation.dst_);
|
dst_ = std::move(operation.dst_);
|
||||||
args_ = std::move(operation.args_);
|
|
||||||
kernel_ = std::move(operation.kernel_);
|
kernel_ = std::move(operation.kernel_);
|
||||||
std::swap(work_group_size_, operation.work_group_size_);
|
std::swap(work_group_size_, operation.work_group_size_);
|
||||||
std::swap(grid_size_, operation.grid_size_);
|
std::swap(grid_size_, operation.grid_size_);
|
||||||
code_ = std::move(operation.code_);
|
|
||||||
src_tensors_names_ = std::move(operation.src_tensors_names_);
|
src_tensors_names_ = std::move(operation.src_tensors_names_);
|
||||||
dst_tensors_names_ = std::move(operation.dst_tensors_names_);
|
dst_tensors_names_ = std::move(operation.dst_tensors_names_);
|
||||||
compiler_options_ = std::move(operation.compiler_options_);
|
compiler_options_ = std::move(operation.compiler_options_);
|
||||||
@ -139,7 +159,7 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUOperation::AddOperation(ElementwiseOperation* operation) {
|
void GPUOperation::AddOperation(GPUOperation* operation) {
|
||||||
linked_operations_.push_back(operation);
|
linked_operations_.push_back(operation);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -183,73 +203,62 @@ absl::Status GPUOperation::UpdateParams() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||||
std::string element_wise_code;
|
if (elementwise_) {
|
||||||
RETURN_IF_ERROR(
|
auto src_desc =
|
||||||
MergeOperations(linked_operations_, &args_, &element_wise_code));
|
absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
|
||||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
if (definition_.IsBatchSupported()) {
|
||||||
creation_context.device->GetInfo(),
|
src_desc->SetStateVar("BatchedWidth", "true");
|
||||||
{{dst_tensors_names_[0], element_wise_code}}, &code_));
|
}
|
||||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor");
|
||||||
code_, "main_function", compiler_options_, *creation_context.context,
|
args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
|
||||||
*creation_context.device, &kernel_));
|
|
||||||
|
auto dst_desc =
|
||||||
|
absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]);
|
||||||
|
if (definition_.IsBatchSupported()) {
|
||||||
|
dst_desc->SetStateVar("BatchedWidth", "true");
|
||||||
|
}
|
||||||
|
dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
|
||||||
|
args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
|
||||||
|
|
||||||
|
std::string code =
|
||||||
|
GetElementWiseCode(definition_, check_src_channels_size_);
|
||||||
|
std::string element_wise_code;
|
||||||
|
element_wise_code += "{\n" + code_ + "\n}\n";
|
||||||
|
RETURN_IF_ERROR(
|
||||||
|
MergeOperations(linked_operations_, &args_, &element_wise_code));
|
||||||
|
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||||
|
creation_context.device->GetInfo(),
|
||||||
|
{{dst_tensors_names_[0], element_wise_code}}, &code));
|
||||||
|
code = absl::Substitute(code, args_.GetListOfArgs());
|
||||||
|
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||||
|
code, "main_function", *creation_context.context,
|
||||||
|
*creation_context.device, &kernel_));
|
||||||
|
} else {
|
||||||
|
std::string element_wise_code;
|
||||||
|
RETURN_IF_ERROR(
|
||||||
|
MergeOperations(linked_operations_, &args_, &element_wise_code));
|
||||||
|
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||||
|
creation_context.device->GetInfo(),
|
||||||
|
{{dst_tensors_names_[0], element_wise_code}}, &code_));
|
||||||
|
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||||
|
code_, "main_function", compiler_options_, *creation_context.context,
|
||||||
|
*creation_context.device, &kernel_));
|
||||||
|
}
|
||||||
return PostCompileCheck(creation_context.device->GetInfo());
|
return PostCompileCheck(creation_context.device->GetInfo());
|
||||||
}
|
}
|
||||||
|
|
||||||
ElementwiseOperation::ElementwiseOperation(ElementwiseOperation&& operation)
|
int3 GPUOperation::GetGridSize() const {
|
||||||
: GPUOperation(std::move(operation)),
|
if (elementwise_) {
|
||||||
check_src_channels_size_(operation.check_src_channels_size_),
|
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
|
||||||
linkable_(operation.linkable_) {}
|
const int grid_y = dst_[0]->Height();
|
||||||
|
const int grid_z = dst_[0]->Slices();
|
||||||
ElementwiseOperation& ElementwiseOperation::operator=(
|
return int3(grid_x, grid_y, grid_z);
|
||||||
ElementwiseOperation&& operation) {
|
} else {
|
||||||
if (this != &operation) {
|
return int3(0, 0, 0);
|
||||||
check_src_channels_size_ = operation.check_src_channels_size_;
|
|
||||||
linkable_ = operation.linkable_;
|
|
||||||
GPUOperation::operator=(std::move(operation));
|
|
||||||
}
|
}
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int3 ElementwiseOperation::GetGridSize() const {
|
void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
|
||||||
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
|
|
||||||
const int grid_y = dst_[0]->Height();
|
|
||||||
const int grid_z = dst_[0]->Slices();
|
|
||||||
return int3(grid_x, grid_y, grid_z);
|
|
||||||
}
|
|
||||||
|
|
||||||
absl::Status ElementwiseOperation::Compile(
|
|
||||||
const CreationContext& creation_context) {
|
|
||||||
auto src_desc =
|
|
||||||
absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
|
|
||||||
if (definition_.IsBatchSupported()) {
|
|
||||||
src_desc->SetStateVar("BatchedWidth", "true");
|
|
||||||
}
|
|
||||||
src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor");
|
|
||||||
args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
|
|
||||||
|
|
||||||
auto dst_desc =
|
|
||||||
absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]);
|
|
||||||
if (definition_.IsBatchSupported()) {
|
|
||||||
dst_desc->SetStateVar("BatchedWidth", "true");
|
|
||||||
}
|
|
||||||
dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
|
|
||||||
args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
|
|
||||||
|
|
||||||
std::string code = GetElementWiseCode(definition_, check_src_channels_size_);
|
|
||||||
std::string element_wise_code;
|
|
||||||
element_wise_code += "{\n" + code_ + "\n}\n";
|
|
||||||
RETURN_IF_ERROR(
|
|
||||||
MergeOperations(linked_operations_, &args_, &element_wise_code));
|
|
||||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
|
||||||
creation_context.device->GetInfo(),
|
|
||||||
{{dst_tensors_names_[0], element_wise_code}}, &code));
|
|
||||||
code = absl::Substitute(code, args_.GetListOfArgs());
|
|
||||||
return creation_context.cache->GetOrCreateCLKernel(
|
|
||||||
code, "main_function", *creation_context.context,
|
|
||||||
*creation_context.device, &kernel_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ElementwiseOperation::AddUniquePostfix(const std::string& unique_postfix) {
|
|
||||||
for (int i = 0; i < src_tensors_names_.size(); ++i) {
|
for (int i = 0; i < src_tensors_names_.size(); ++i) {
|
||||||
src_tensors_names_[i] += unique_postfix;
|
src_tensors_names_[i] += unique_postfix;
|
||||||
}
|
}
|
||||||
@ -258,21 +267,6 @@ void ElementwiseOperation::AddUniquePostfix(const std::string& unique_postfix) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status MergeOperations(
|
|
||||||
const std::vector<ElementwiseOperation*>& linked_ops,
|
|
||||||
Arguments* merged_args, std::string* merged_code) {
|
|
||||||
for (int i = 0; i < linked_ops.size(); ++i) {
|
|
||||||
std::string code = linked_ops[i]->GetCode();
|
|
||||||
std::string unique_postfix = absl::StrCat("_link", i + 1);
|
|
||||||
auto&& link_args = linked_ops[i]->MoveArgs();
|
|
||||||
link_args.RenameArgs(unique_postfix, &code);
|
|
||||||
*merged_code += "{\n" + code + "\n}\n";
|
|
||||||
RETURN_IF_ERROR(merged_args->Merge(std::move(link_args), unique_postfix));
|
|
||||||
linked_ops[i]->AddUniquePostfix(unique_postfix);
|
|
||||||
}
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -59,18 +59,15 @@ struct OperationDef {
|
|||||||
bool IsBatchSupported() const;
|
bool IsBatchSupported() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ElementwiseOperation;
|
|
||||||
|
|
||||||
// GPUOperation represents some implementation of neural network operation on
|
// GPUOperation represents some implementation of neural network operation on
|
||||||
// GPU. GPUOperation can contain ElementwiseOperation operations, in this case,
|
// GPU. GPUOperation can contain another GPU operations with flag elementwise_.
|
||||||
// ElementwiseOperation still hold necessary data and should be alive.
|
// When GPUOperation contains another GPU ops, this GPUoperation replaces
|
||||||
// When GPUOperation contains ElementwiseOperations, this GPUoperation replaces
|
// some sequence of operations Op + op0 + op1 + ...
|
||||||
// some sequence of operations Op + el_op0 + el_op1 + ...
|
|
||||||
// Because of this abilities of GPUOperation, usage scenario is next:
|
// Because of this abilities of GPUOperation, usage scenario is next:
|
||||||
// Create instance of GPUOperation.
|
// Create instance of GPUOperation.
|
||||||
// Create all instances of ElementwiseOperations that we will(probably) attach
|
// Create all instances of GPUOperations that we will(probably) attach
|
||||||
// to GPUOperation. Attach all ElementwiseOperations to GPUOperation. Call
|
// to GPUOperation. Attach all GPUOperations to GPUOperation. Call
|
||||||
// GPUOperation.Compile(). Don't call ElementwiseOperation.Compile() if it
|
// GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
|
||||||
// attached, it useless(and may be error)
|
// attached, it useless(and may be error)
|
||||||
class GPUOperation {
|
class GPUOperation {
|
||||||
public:
|
public:
|
||||||
@ -83,7 +80,7 @@ class GPUOperation {
|
|||||||
GPUOperation(const GPUOperation&) = delete;
|
GPUOperation(const GPUOperation&) = delete;
|
||||||
GPUOperation& operator=(const GPUOperation&) = delete;
|
GPUOperation& operator=(const GPUOperation&) = delete;
|
||||||
|
|
||||||
void AddOperation(ElementwiseOperation* operation);
|
void AddOperation(GPUOperation* operation);
|
||||||
|
|
||||||
void SetSrc(Tensor* ptr, int index = 0);
|
void SetSrc(Tensor* ptr, int index = 0);
|
||||||
void SetDst(Tensor* ptr, int index = 0);
|
void SetDst(Tensor* ptr, int index = 0);
|
||||||
@ -116,64 +113,37 @@ class GPUOperation {
|
|||||||
void AddDstTensor(const std::string& tensor_name,
|
void AddDstTensor(const std::string& tensor_name,
|
||||||
const TensorDescriptor& desc);
|
const TensorDescriptor& desc);
|
||||||
|
|
||||||
|
bool IsLinkable() const { return elementwise_ && linkable_; }
|
||||||
|
|
||||||
|
// for linking
|
||||||
|
void AddUniquePostfix(const std::string& unique_postfix);
|
||||||
|
|
||||||
|
Arguments args_;
|
||||||
|
std::string code_;
|
||||||
|
|
||||||
|
bool elementwise_ = false;
|
||||||
|
// applicable only with elementwise_ = true;
|
||||||
|
bool linkable_ = true; // by default every elementwise is linkable
|
||||||
|
// applicable only with elementwise_ = true;
|
||||||
|
bool check_src_channels_size_ = false;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual absl::Status BindArguments() { return absl::OkStatus(); }
|
virtual absl::Status BindArguments() { return absl::OkStatus(); }
|
||||||
virtual int3 GetGridSize() const = 0;
|
virtual int3 GetGridSize() const;
|
||||||
|
|
||||||
// Defines operation calculation precision and format of src/dst tensors.
|
// Defines operation calculation precision and format of src/dst tensors.
|
||||||
OperationDef definition_;
|
OperationDef definition_;
|
||||||
std::vector<Tensor*> src_;
|
std::vector<Tensor*> src_;
|
||||||
std::vector<Tensor*> dst_;
|
std::vector<Tensor*> dst_;
|
||||||
Arguments args_;
|
|
||||||
CLKernel kernel_;
|
CLKernel kernel_;
|
||||||
int3 work_group_size_ = int3(8, 4, 1);
|
int3 work_group_size_ = int3(8, 4, 1);
|
||||||
int3 grid_size_ = int3(0, 0, 0);
|
int3 grid_size_ = int3(0, 0, 0);
|
||||||
std::string code_;
|
|
||||||
std::vector<std::string> src_tensors_names_;
|
std::vector<std::string> src_tensors_names_;
|
||||||
std::vector<std::string> dst_tensors_names_;
|
std::vector<std::string> dst_tensors_names_;
|
||||||
std::vector<CompilerOptions> compiler_options_;
|
std::vector<CompilerOptions> compiler_options_;
|
||||||
std::vector<ElementwiseOperation*> linked_operations_;
|
std::vector<GPUOperation*> linked_operations_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// ElementwiseOperation can be fused(linked) to another operation.
|
|
||||||
// field linked_ indicate about this
|
|
||||||
// link_index_ used mostly for generating of correct names for
|
|
||||||
// linked code variables
|
|
||||||
// link_index_ is number of operation in sequence of linked operations
|
|
||||||
// and should be unique in this sequence
|
|
||||||
// link_index_ = 0 is equivalent that operation not linked.
|
|
||||||
class ElementwiseOperation : public GPUOperation {
|
|
||||||
public:
|
|
||||||
ElementwiseOperation() {}
|
|
||||||
explicit ElementwiseOperation(const OperationDef& definition)
|
|
||||||
: GPUOperation(definition) {}
|
|
||||||
|
|
||||||
virtual ~ElementwiseOperation() {}
|
|
||||||
|
|
||||||
absl::Status Compile(const CreationContext& creation_context) override;
|
|
||||||
int3 GetGridSize() const override;
|
|
||||||
|
|
||||||
// Move only
|
|
||||||
ElementwiseOperation(ElementwiseOperation&& operation);
|
|
||||||
ElementwiseOperation& operator=(ElementwiseOperation&& operation);
|
|
||||||
ElementwiseOperation(const ElementwiseOperation&) = delete;
|
|
||||||
ElementwiseOperation& operator=(const ElementwiseOperation&) = delete;
|
|
||||||
|
|
||||||
Arguments&& MoveArgs() { return std::move(args_); }
|
|
||||||
std::string GetCode() const { return code_; }
|
|
||||||
void AddUniquePostfix(const std::string& unique_postfix);
|
|
||||||
|
|
||||||
bool IsLinkable() const { return linkable_; }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
bool check_src_channels_size_ = false;
|
|
||||||
bool linkable_ = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
absl::Status MergeOperations(
|
|
||||||
const std::vector<ElementwiseOperation*>& linked_ops,
|
|
||||||
Arguments* merged_args, std::string* merged_code);
|
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -24,47 +24,43 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
|
||||||
PReLU::PReLU(const OperationDef& definition, const PReLUAttributes& attr,
|
absl::Status CreatePReLU(const CreationContext& creation_context,
|
||||||
CalculationsPrecision scalar_precision)
|
const OperationDef& definition,
|
||||||
: ElementwiseOperation(definition) {
|
const PReLUAttributes& attr, GPUOperation* result) {
|
||||||
|
*result = GPUOperation(definition);
|
||||||
|
result->elementwise_ = true;
|
||||||
if (attr.clip != 0) {
|
if (attr.clip != 0) {
|
||||||
if (definition.precision == CalculationsPrecision::F32) {
|
if (definition.precision == CalculationsPrecision::F32) {
|
||||||
args_.AddFloat("clip", attr.clip);
|
result->args_.AddFloat("clip", attr.clip);
|
||||||
} else {
|
} else {
|
||||||
args_.AddHalf("clip", half(attr.clip));
|
result->args_.AddHalf("clip", half(attr.clip));
|
||||||
}
|
}
|
||||||
code_ =
|
result->code_ =
|
||||||
"in_out_value = clamp(in_out_value, (FLT4)(0.0f), (FLT4)(args.clip)) + "
|
"in_out_value = clamp(in_out_value, (FLT4)(0.0f), (FLT4)(args.clip)) + "
|
||||||
"min((FLT4)(0.0f), in_out_value) * args.alpha.Read(S_COORD);";
|
"min((FLT4)(0.0f), in_out_value) * args.alpha.Read(S_COORD);";
|
||||||
} else {
|
} else {
|
||||||
code_ =
|
result->code_ =
|
||||||
"in_out_value = max((FLT4)(0.0f), in_out_value) + min((FLT4)(0.0f), "
|
"in_out_value = max((FLT4)(0.0f), in_out_value) + min((FLT4)(0.0f), "
|
||||||
"in_out_value) * args.alpha.Read(S_COORD);";
|
"in_out_value) * args.alpha.Read(S_COORD);";
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
PReLU::PReLU(PReLU&& operation) : ElementwiseOperation(std::move(operation)) {}
|
|
||||||
|
|
||||||
PReLU& PReLU::operator=(PReLU&& operation) {
|
|
||||||
if (this != &operation) {
|
|
||||||
ElementwiseOperation::operator=(std::move(operation));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
absl::Status CreatePReLU(const CreationContext& creation_context,
|
|
||||||
const OperationDef& definition,
|
|
||||||
const PReLUAttributes& attr, PReLU* result) {
|
|
||||||
auto alpha =
|
auto alpha =
|
||||||
absl::get_if<tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(&attr.alpha);
|
absl::get_if<tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(&attr.alpha);
|
||||||
if (!alpha) {
|
if (!alpha) {
|
||||||
return absl::InvalidArgumentError("Alpha is missing");
|
return absl::InvalidArgumentError("Alpha is missing");
|
||||||
}
|
}
|
||||||
const auto scalar_precision = creation_context.device->IsPowerVR()
|
TensorLinearDescriptor desc;
|
||||||
? CalculationsPrecision::F32
|
desc.storage_type =
|
||||||
: definition.precision;
|
DeduceLinearStorageType(definition.GetPrimaryStorageType());
|
||||||
*result = PReLU(definition, attr, scalar_precision);
|
desc.element_type = definition.GetPrimaryDataType();
|
||||||
RETURN_IF_ERROR(result->UploadParameters(*alpha, creation_context.context));
|
|
||||||
|
LinearStorage lt;
|
||||||
|
RETURN_IF_ERROR(
|
||||||
|
CreateLinearStorage(desc, *alpha, creation_context.context, <));
|
||||||
|
result->args_.AddObject("alpha", AccessType::READ,
|
||||||
|
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||||
|
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,48 +31,9 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
|
||||||
class PReLU : public ElementwiseOperation {
|
|
||||||
public:
|
|
||||||
PReLU() = default;
|
|
||||||
// Move only
|
|
||||||
PReLU(PReLU&& operation);
|
|
||||||
PReLU& operator=(PReLU&& operation);
|
|
||||||
PReLU(const PReLU&) = delete;
|
|
||||||
PReLU& operator=(const PReLU&) = delete;
|
|
||||||
|
|
||||||
friend absl::Status CreatePReLU(const CreationContext& creation_context,
|
|
||||||
const OperationDef& definition,
|
|
||||||
const PReLUAttributes& attr, PReLU* result);
|
|
||||||
|
|
||||||
private:
|
|
||||||
PReLU(const OperationDef& definition, const PReLUAttributes& attr,
|
|
||||||
CalculationsPrecision scalar_precision);
|
|
||||||
|
|
||||||
template <DataType T>
|
|
||||||
absl::Status UploadParameters(
|
|
||||||
const tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context);
|
|
||||||
};
|
|
||||||
|
|
||||||
absl::Status CreatePReLU(const CreationContext& creation_context,
|
absl::Status CreatePReLU(const CreationContext& creation_context,
|
||||||
const OperationDef& definition,
|
const OperationDef& definition,
|
||||||
const PReLUAttributes& attr, PReLU* result);
|
const PReLUAttributes& attr, GPUOperation* result);
|
||||||
|
|
||||||
template <DataType T>
|
|
||||||
absl::Status PReLU::UploadParameters(
|
|
||||||
const tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context) {
|
|
||||||
TensorLinearDescriptor desc;
|
|
||||||
desc.storage_type =
|
|
||||||
DeduceLinearStorageType(definition_.GetPrimaryStorageType());
|
|
||||||
desc.element_type = definition_.GetPrimaryDataType();
|
|
||||||
|
|
||||||
LinearStorage lt;
|
|
||||||
RETURN_IF_ERROR(CreateLinearStorage(desc, parameters, context, <));
|
|
||||||
args_.AddObject("alpha", AccessType::READ,
|
|
||||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
|
||||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
|
||||||
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -52,7 +52,7 @@ TEST_F(OpenCLOperationTest, PReLUAlpha) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
PReLU operation;
|
GPUOperation operation;
|
||||||
ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
|
ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
@ -83,7 +83,7 @@ TEST_F(OpenCLOperationTest, PReLUAlphaClip) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
PReLU operation;
|
GPUOperation operation;
|
||||||
ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
|
ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
|
@ -25,59 +25,37 @@ limitations under the License.
|
|||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
GPUOperation CreateQuantizeAndDequantize(
|
||||||
QuantizeAndDequantize::QuantizeAndDequantize(
|
|
||||||
const OperationDef& definition, const QuantizeAndDequantizeAttributes& attr,
|
|
||||||
CalculationsPrecision scalar_precision)
|
|
||||||
: ElementwiseOperation(definition) {
|
|
||||||
if (definition.precision == CalculationsPrecision::F32) {
|
|
||||||
args_.AddFloat("min", attr.min);
|
|
||||||
args_.AddFloat("max", attr.max);
|
|
||||||
args_.AddFloat("scale", attr.scale);
|
|
||||||
} else {
|
|
||||||
args_.AddHalf("min", half(attr.min));
|
|
||||||
args_.AddHalf("max", half(attr.max));
|
|
||||||
args_.AddHalf("scale", half(attr.scale));
|
|
||||||
}
|
|
||||||
code_ = R"(
|
|
||||||
FLT4 clamped_value = min((FLT4)(args.max), max((FLT4)(args.min), in_out_value));
|
|
||||||
FLT4 quantized_value = round((clamped_value - (FLT4)(args.min)) / (FLT4)(args.scale));
|
|
||||||
FLT4 dequantized_value = quantized_value * (FLT4)(args.scale) + (FLT4)(args.min);
|
|
||||||
in_out_value = dequantized_value;)";
|
|
||||||
}
|
|
||||||
|
|
||||||
QuantizeAndDequantize::QuantizeAndDequantize(QuantizeAndDequantize&& operation)
|
|
||||||
: ElementwiseOperation(std::move(operation)) {}
|
|
||||||
|
|
||||||
QuantizeAndDequantize& QuantizeAndDequantize::operator=(
|
|
||||||
QuantizeAndDequantize&& operation) {
|
|
||||||
if (this != &operation) {
|
|
||||||
ElementwiseOperation::operator=(std::move(operation));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
absl::Status CreateQuantizeAndDequantize(
|
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const QuantizeAndDequantizeAttributes& attr,
|
const QuantizeAndDequantizeAttributes& attr) {
|
||||||
QuantizeAndDequantize* result) {
|
QuantizeAndDequantizeAttributes adjusted_attr = attr;
|
||||||
const auto scalar_precision = creation_context.device->IsPowerVR()
|
|
||||||
? CalculationsPrecision::F32
|
|
||||||
: definition.precision;
|
|
||||||
const bool is_fp16 = definition.precision == CalculationsPrecision::F16 ||
|
const bool is_fp16 = definition.precision == CalculationsPrecision::F16 ||
|
||||||
definition.precision == CalculationsPrecision::F32_F16;
|
definition.precision == CalculationsPrecision::F32_F16;
|
||||||
if (is_fp16 && attr.scale < 0.000062f) {
|
if (is_fp16 && attr.scale < 0.000062f) {
|
||||||
// The smallest positive normal number for Half-precision floating-point
|
// The smallest positive normal number for Half-precision floating-point
|
||||||
// format is 2^-14 ~ 0.000062f. Therefore, if the scale is lesser than this
|
// format is 2^-14 ~ 0.000062f. Therefore, if the scale is lesser than this
|
||||||
// number, we just reset it accordingly.
|
// number, we just reset it accordingly.
|
||||||
QuantizeAndDequantizeAttributes adjusted_attr = attr;
|
|
||||||
adjusted_attr.scale = 0.000062f;
|
adjusted_attr.scale = 0.000062f;
|
||||||
*result =
|
|
||||||
QuantizeAndDequantize(definition, adjusted_attr, scalar_precision);
|
|
||||||
} else {
|
|
||||||
*result = QuantizeAndDequantize(definition, attr, scalar_precision);
|
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
|
||||||
|
GPUOperation op(definition);
|
||||||
|
op.elementwise_ = true;
|
||||||
|
if (definition.precision == CalculationsPrecision::F32) {
|
||||||
|
op.args_.AddFloat("min", adjusted_attr.min);
|
||||||
|
op.args_.AddFloat("max", adjusted_attr.max);
|
||||||
|
op.args_.AddFloat("scale", adjusted_attr.scale);
|
||||||
|
} else {
|
||||||
|
op.args_.AddHalf("min", half(adjusted_attr.min));
|
||||||
|
op.args_.AddHalf("max", half(adjusted_attr.max));
|
||||||
|
op.args_.AddHalf("scale", half(adjusted_attr.scale));
|
||||||
|
}
|
||||||
|
op.code_ = R"(
|
||||||
|
FLT4 clamped_value = min((FLT4)(args.max), max((FLT4)(args.min), in_out_value));
|
||||||
|
FLT4 quantized_value = round((clamped_value - (FLT4)(args.min)) / (FLT4)(args.scale));
|
||||||
|
FLT4 dequantized_value = quantized_value * (FLT4)(args.scale) + (FLT4)(args.min);
|
||||||
|
in_out_value = dequantized_value;)";
|
||||||
|
|
||||||
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
|
@ -43,43 +43,9 @@ namespace cl {
|
|||||||
//
|
//
|
||||||
// NOTE: We do not need to nudge min/max values in this op, since they would
|
// NOTE: We do not need to nudge min/max values in this op, since they would
|
||||||
// already be adjusted while generating the quantized model.
|
// already be adjusted while generating the quantized model.
|
||||||
class QuantizeAndDequantize : public ElementwiseOperation {
|
GPUOperation CreateQuantizeAndDequantize(
|
||||||
public:
|
|
||||||
QuantizeAndDequantize() = default;
|
|
||||||
// Move only
|
|
||||||
QuantizeAndDequantize(QuantizeAndDequantize&& operation);
|
|
||||||
QuantizeAndDequantize& operator=(QuantizeAndDequantize&& operation);
|
|
||||||
QuantizeAndDequantize(const QuantizeAndDequantize&) = delete;
|
|
||||||
QuantizeAndDequantize& operator=(const QuantizeAndDequantize&) = delete;
|
|
||||||
|
|
||||||
friend absl::Status CreateQuantizeAndDequantize(
|
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
|
||||||
const QuantizeAndDequantizeAttributes& attr,
|
|
||||||
QuantizeAndDequantize* result);
|
|
||||||
|
|
||||||
private:
|
|
||||||
QuantizeAndDequantize(const OperationDef& definition,
|
|
||||||
const QuantizeAndDequantizeAttributes& attr,
|
|
||||||
CalculationsPrecision scalar_precision);
|
|
||||||
|
|
||||||
template <DataType T>
|
|
||||||
absl::Status UploadParameters(
|
|
||||||
const tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context);
|
|
||||||
};
|
|
||||||
|
|
||||||
absl::Status CreateQuantizeAndDequantize(
|
|
||||||
const CreationContext& creation_context, const OperationDef& definition,
|
const CreationContext& creation_context, const OperationDef& definition,
|
||||||
const QuantizeAndDequantizeAttributes& attr, QuantizeAndDequantize* result);
|
const QuantizeAndDequantizeAttributes& attr);
|
||||||
|
|
||||||
template <DataType T>
|
|
||||||
absl::Status QuantizeAndDequantize::UploadParameters(
|
|
||||||
const tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context) {
|
|
||||||
LinearStorageCreateInfo create_info;
|
|
||||||
create_info.storage_type =
|
|
||||||
DeduceLinearStorageType(definition_.GetPrimaryStorageType());
|
|
||||||
create_info.data_type = definition_.GetPrimaryDataType();
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -56,9 +56,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits8) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
QuantizeAndDequantize operation;
|
GPUOperation operation =
|
||||||
ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr,
|
CreateQuantizeAndDequantize(creation_context_, op_def, attr);
|
||||||
&operation));
|
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 3, 2, 1), &dst_tensor));
|
BHWC(1, 3, 2, 1), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -92,9 +91,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits8_NegativeRange) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
QuantizeAndDequantize operation;
|
GPUOperation operation =
|
||||||
ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr,
|
CreateQuantizeAndDequantize(creation_context_, op_def, attr);
|
||||||
&operation));
|
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 3, 1, 2), &dst_tensor));
|
BHWC(1, 3, 1, 2), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -128,9 +126,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits16) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
QuantizeAndDequantize operation;
|
GPUOperation operation =
|
||||||
ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr,
|
CreateQuantizeAndDequantize(creation_context_, op_def, attr);
|
||||||
&operation));
|
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 3, 1, 2), &dst_tensor));
|
BHWC(1, 3, 1, 2), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -164,9 +161,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits16_NegativeRange) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
QuantizeAndDequantize operation;
|
GPUOperation operation =
|
||||||
ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr,
|
CreateQuantizeAndDequantize(creation_context_, op_def, attr);
|
||||||
&operation));
|
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 3, 2, 1), &dst_tensor));
|
BHWC(1, 3, 2, 1), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
|
@ -21,50 +21,36 @@ limitations under the License.
|
|||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
GPUOperation CreateReLU(const CreationContext& creation_context,
|
||||||
|
const OperationDef& definition,
|
||||||
|
const ReLUAttributes& attr) {
|
||||||
|
GPUOperation op(definition);
|
||||||
|
op.elementwise_ = true;
|
||||||
|
|
||||||
ReLU::ReLU(const OperationDef& definition, const ReLUAttributes& attr,
|
|
||||||
CalculationsPrecision scalar_precision)
|
|
||||||
: ElementwiseOperation(definition) {
|
|
||||||
std::string min_func;
|
std::string min_func;
|
||||||
if (attr.alpha != 0.0f) {
|
if (attr.alpha != 0.0f) {
|
||||||
min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))";
|
min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))";
|
||||||
if (definition.precision == CalculationsPrecision::F32) {
|
if (definition.precision == CalculationsPrecision::F32) {
|
||||||
args_.AddFloat("alpha", attr.alpha);
|
op.args_.AddFloat("alpha", attr.alpha);
|
||||||
} else {
|
} else {
|
||||||
args_.AddHalf("alpha", half(attr.alpha));
|
op.args_.AddHalf("alpha", half(attr.alpha));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
min_func = "(FLT)(0.0f)";
|
min_func = "(FLT)(0.0f)";
|
||||||
}
|
}
|
||||||
if (attr.clip != 0.0f) {
|
if (attr.clip != 0.0f) {
|
||||||
if (definition.precision == CalculationsPrecision::F32) {
|
if (definition.precision == CalculationsPrecision::F32) {
|
||||||
args_.AddFloat("clip", attr.clip);
|
op.args_.AddFloat("clip", attr.clip);
|
||||||
} else {
|
} else {
|
||||||
args_.AddHalf("clip", half(attr.clip));
|
op.args_.AddHalf("clip", half(attr.clip));
|
||||||
}
|
}
|
||||||
code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func +
|
op.code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func +
|
||||||
", args.clip);");
|
", args.clip);");
|
||||||
} else {
|
} else {
|
||||||
code_ = absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");");
|
op.code_ =
|
||||||
|
absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");");
|
||||||
}
|
}
|
||||||
}
|
return op;
|
||||||
|
|
||||||
ReLU::ReLU(ReLU&& operation) : ElementwiseOperation(std::move(operation)) {}
|
|
||||||
|
|
||||||
ReLU& ReLU::operator=(ReLU&& operation) {
|
|
||||||
if (this != &operation) {
|
|
||||||
ElementwiseOperation::operator=(std::move(operation));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
ReLU CreateReLU(const CreationContext& creation_context,
|
|
||||||
const OperationDef& definition, const ReLUAttributes& attr) {
|
|
||||||
const auto scalar_precision = creation_context.device->IsPowerVR()
|
|
||||||
? CalculationsPrecision::F32
|
|
||||||
: definition.precision;
|
|
||||||
ReLU operation(definition, attr, scalar_precision);
|
|
||||||
return operation;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
|
@ -25,25 +25,9 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
|
|
||||||
class ReLU : public ElementwiseOperation {
|
GPUOperation CreateReLU(const CreationContext& creation_context,
|
||||||
public:
|
const OperationDef& definition,
|
||||||
// Move only
|
const ReLUAttributes& attr);
|
||||||
ReLU(ReLU&& operation);
|
|
||||||
ReLU& operator=(ReLU&& operation);
|
|
||||||
ReLU(const ReLU&) = delete;
|
|
||||||
ReLU& operator=(const ReLU&) = delete;
|
|
||||||
|
|
||||||
friend ReLU CreateReLU(const CreationContext& creation_context,
|
|
||||||
const OperationDef& definition,
|
|
||||||
const ReLUAttributes& attr);
|
|
||||||
|
|
||||||
private:
|
|
||||||
ReLU(const OperationDef& definition, const ReLUAttributes& attr,
|
|
||||||
CalculationsPrecision scalar_precision);
|
|
||||||
};
|
|
||||||
|
|
||||||
ReLU CreateReLU(const CreationContext& creation_context,
|
|
||||||
const OperationDef& definition, const ReLUAttributes& attr);
|
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -49,7 +49,7 @@ TEST_F(OpenCLOperationTest, ReLUNoClipNoAlpha) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ReLU operation = CreateReLU(creation_context_, op_def, attr);
|
GPUOperation operation = CreateReLU(creation_context_, op_def, attr);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -76,7 +76,7 @@ TEST_F(OpenCLOperationTest, ReLUClip) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ReLU operation = CreateReLU(creation_context_, op_def, attr);
|
GPUOperation operation = CreateReLU(creation_context_, op_def, attr);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -103,7 +103,7 @@ TEST_F(OpenCLOperationTest, ReLUAlpha) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ReLU operation = CreateReLU(creation_context_, op_def, attr);
|
GPUOperation operation = CreateReLU(creation_context_, op_def, attr);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
@ -130,7 +130,7 @@ TEST_F(OpenCLOperationTest, ReLUAlphaClip) {
|
|||||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||||
TensorFloat32 dst_tensor;
|
TensorFloat32 dst_tensor;
|
||||||
ReLU operation = CreateReLU(creation_context_, op_def, attr);
|
GPUOperation operation = CreateReLU(creation_context_, op_def, attr);
|
||||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||||
BHWC(1, 2, 1, 2), &dst_tensor));
|
BHWC(1, 2, 1, 2), &dst_tensor));
|
||||||
EXPECT_THAT(dst_tensor.data,
|
EXPECT_THAT(dst_tensor.data,
|
||||||
|
@ -144,9 +144,9 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
if (inputs.size() == 2 &&
|
if (inputs.size() == 2 &&
|
||||||
(inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c ||
|
(inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c ||
|
||||||
inputs[1]->tensor.shape.c == 1)) {
|
inputs[1]->tensor.shape.c == 1)) {
|
||||||
ElementwiseTwoInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
|
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
|
||||||
*gpu_op = absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (inputs.size() >= 2) {
|
} else if (inputs.size() >= 2) {
|
||||||
auto output = outputs[0];
|
auto output = outputs[0];
|
||||||
@ -167,25 +167,21 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(
|
absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(
|
||||||
&attr.param);
|
&attr.param);
|
||||||
if (scalar) {
|
if (scalar) {
|
||||||
ElementwiseOneRuntimeOneScalar operation =
|
GPUOperation operation = CreateElementwiseOneRuntimeOneScalar(
|
||||||
CreateElementwiseOneRuntimeOneScalar(creation_context, op_def,
|
creation_context, op_def, op_type, *scalar);
|
||||||
op_type, *scalar);
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
*gpu_op = absl::make_unique<ElementwiseOneRuntimeOneScalar>(
|
|
||||||
std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (linear_tensor) {
|
} else if (linear_tensor) {
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
||||||
creation_context, op_def, op_type, *linear_tensor, &operation));
|
creation_context, op_def, op_type, *linear_tensor, &operation));
|
||||||
*gpu_op =
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (hwc_tensor) {
|
} else if (hwc_tensor) {
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
||||||
creation_context, op_def, op_type, *hwc_tensor, &operation));
|
creation_context, op_def, op_type, *hwc_tensor, &operation));
|
||||||
*gpu_op =
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -295,9 +291,9 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
}
|
}
|
||||||
case OperationType::MUL: {
|
case OperationType::MUL: {
|
||||||
if (inputs.size() == 2) {
|
if (inputs.size() == 2) {
|
||||||
ElementwiseTwoInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
|
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
|
||||||
*gpu_op = absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
|
} else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
|
||||||
auto attr =
|
auto attr =
|
||||||
@ -310,25 +306,21 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(
|
absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(
|
||||||
&attr.param);
|
&attr.param);
|
||||||
if (scalar) {
|
if (scalar) {
|
||||||
ElementwiseOneRuntimeOneScalar operation =
|
GPUOperation operation = CreateElementwiseOneRuntimeOneScalar(
|
||||||
CreateElementwiseOneRuntimeOneScalar(creation_context, op_def,
|
creation_context, op_def, op_type, *scalar);
|
||||||
op_type, *scalar);
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
*gpu_op = absl::make_unique<ElementwiseOneRuntimeOneScalar>(
|
|
||||||
std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (linear_tensor) {
|
} else if (linear_tensor) {
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
||||||
creation_context, op_def, op_type, *linear_tensor, &operation));
|
creation_context, op_def, op_type, *linear_tensor, &operation));
|
||||||
*gpu_op =
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (hwc_tensor) {
|
} else if (hwc_tensor) {
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
||||||
creation_context, op_def, op_type, *hwc_tensor, &operation));
|
creation_context, op_def, op_type, *hwc_tensor, &operation));
|
||||||
*gpu_op =
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -353,8 +345,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
case OperationType::QUANTIZE_AND_DEQUANTIZE: {
|
case OperationType::QUANTIZE_AND_DEQUANTIZE: {
|
||||||
auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
|
auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
|
||||||
node.operation.attributes);
|
node.operation.attributes);
|
||||||
return SelectQuantizeAndDequantize(attr, creation_context, op_def,
|
SelectQuantizeAndDequantize(attr, creation_context, op_def, gpu_op);
|
||||||
gpu_op);
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
case OperationType::RELU: {
|
case OperationType::RELU: {
|
||||||
auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes);
|
auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes);
|
||||||
@ -405,9 +397,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
case OperationType::SQRT:
|
case OperationType::SQRT:
|
||||||
case OperationType::SQUARE:
|
case OperationType::SQUARE:
|
||||||
case OperationType::TANH: {
|
case OperationType::TANH: {
|
||||||
ElementwiseOneInput operation =
|
GPUOperation operation = CreateElementwiseOneInput(op_def, op_type);
|
||||||
CreateElementwiseOneInput(op_def, op_type);
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
*gpu_op = absl::make_unique<ElementwiseOneInput>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
case OperationType::DIV:
|
case OperationType::DIV:
|
||||||
@ -417,9 +408,9 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
case OperationType::SQUARED_DIFF:
|
case OperationType::SQUARED_DIFF:
|
||||||
case OperationType::SUB: {
|
case OperationType::SUB: {
|
||||||
if (inputs.size() == 2) {
|
if (inputs.size() == 2) {
|
||||||
ElementwiseTwoInput operation =
|
GPUOperation operation =
|
||||||
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
|
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
|
||||||
*gpu_op = absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
|
} else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
|
||||||
auto attr =
|
auto attr =
|
||||||
@ -432,25 +423,21 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(
|
absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(
|
||||||
&attr.param);
|
&attr.param);
|
||||||
if (scalar) {
|
if (scalar) {
|
||||||
ElementwiseOneRuntimeOneScalar operation =
|
GPUOperation operation = CreateElementwiseOneRuntimeOneScalar(
|
||||||
CreateElementwiseOneRuntimeOneScalar(creation_context, op_def,
|
creation_context, op_def, op_type, *scalar);
|
||||||
op_type, *scalar);
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
*gpu_op = absl::make_unique<ElementwiseOneRuntimeOneScalar>(
|
|
||||||
std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (linear_tensor) {
|
} else if (linear_tensor) {
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
||||||
creation_context, op_def, op_type, *linear_tensor, &operation));
|
creation_context, op_def, op_type, *linear_tensor, &operation));
|
||||||
*gpu_op =
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
} else if (hwc_tensor) {
|
} else if (hwc_tensor) {
|
||||||
ElementwiseTwoInput operation;
|
GPUOperation operation;
|
||||||
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
RETURN_IF_ERROR(CreateElementwiseTwoInput(
|
||||||
creation_context, op_def, op_type, *hwc_tensor, &operation));
|
creation_context, op_def, op_type, *hwc_tensor, &operation));
|
||||||
*gpu_op =
|
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
absl::make_unique<ElementwiseTwoInput>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -54,17 +54,17 @@ void SelectLSTM(const OperationDef& op_def, const DeviceInfo& device_info,
|
|||||||
void SelectReLU(const CreationContext& creation_context,
|
void SelectReLU(const CreationContext& creation_context,
|
||||||
const ReLUAttributes& attr, const OperationDef& op_def,
|
const ReLUAttributes& attr, const OperationDef& op_def,
|
||||||
std::unique_ptr<GPUOperation>* ptr) {
|
std::unique_ptr<GPUOperation>* ptr) {
|
||||||
ReLU relu = CreateReLU(creation_context, op_def, attr);
|
GPUOperation relu = CreateReLU(creation_context, op_def, attr);
|
||||||
*ptr = absl::make_unique<ReLU>(std::move(relu));
|
*ptr = absl::make_unique<GPUOperation>(std::move(relu));
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status SelectPReLU(const PReLUAttributes& attr,
|
absl::Status SelectPReLU(const PReLUAttributes& attr,
|
||||||
const CreationContext& creation_context,
|
const CreationContext& creation_context,
|
||||||
const OperationDef& op_def,
|
const OperationDef& op_def,
|
||||||
std::unique_ptr<GPUOperation>* ptr) {
|
std::unique_ptr<GPUOperation>* ptr) {
|
||||||
PReLU operation;
|
GPUOperation operation;
|
||||||
RETURN_IF_ERROR(CreatePReLU(creation_context, op_def, attr, &operation));
|
RETURN_IF_ERROR(CreatePReLU(creation_context, op_def, attr, &operation));
|
||||||
*ptr = absl::make_unique<PReLU>(std::move(operation));
|
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,8 +85,8 @@ void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr,
|
|||||||
|
|
||||||
void SelectAdd(const OperationDef& op_def, const std::vector<int>& channels,
|
void SelectAdd(const OperationDef& op_def, const std::vector<int>& channels,
|
||||||
int dst_channels, std::unique_ptr<GPUOperation>* ptr) {
|
int dst_channels, std::unique_ptr<GPUOperation>* ptr) {
|
||||||
Add operation = CreateAdd(op_def, channels, dst_channels);
|
GPUOperation operation = CreateAdd(op_def, channels, dst_channels);
|
||||||
*ptr = absl::make_unique<Add>(std::move(operation));
|
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status SelectResize(const Resize2DAttributes& attr,
|
absl::Status SelectResize(const Resize2DAttributes& attr,
|
||||||
@ -203,15 +203,13 @@ absl::Status SelectWinograd36To4x4(
|
|||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status SelectQuantizeAndDequantize(
|
void SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr,
|
||||||
const QuantizeAndDequantizeAttributes& attr,
|
const CreationContext& creation_context,
|
||||||
const CreationContext& creation_context, const OperationDef& op_def,
|
const OperationDef& op_def,
|
||||||
std::unique_ptr<GPUOperation>* ptr) {
|
std::unique_ptr<GPUOperation>* ptr) {
|
||||||
QuantizeAndDequantize operation;
|
GPUOperation operation =
|
||||||
RETURN_IF_ERROR(
|
CreateQuantizeAndDequantize(creation_context, op_def, attr);
|
||||||
CreateQuantizeAndDequantize(creation_context, op_def, attr, &operation));
|
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
|
||||||
*ptr = absl::make_unique<QuantizeAndDequantize>(std::move(operation));
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
|
@ -97,10 +97,10 @@ absl::Status SelectWinograd36To4x4(
|
|||||||
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases,
|
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases,
|
||||||
std::unique_ptr<GPUOperation>* ptr);
|
std::unique_ptr<GPUOperation>* ptr);
|
||||||
|
|
||||||
absl::Status SelectQuantizeAndDequantize(
|
void SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr,
|
||||||
const QuantizeAndDequantizeAttributes& attr,
|
const CreationContext& creation_context,
|
||||||
const CreationContext& creation_context, const OperationDef& op_def,
|
const OperationDef& op_def,
|
||||||
std::unique_ptr<GPUOperation>* ptr);
|
std::unique_ptr<GPUOperation>* ptr);
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
Loading…
x
Reference in New Issue
Block a user