Removed useless Status and CreationContext from convolution transposed kernels.

PiperOrigin-RevId: 327367790
Change-Id: I0f1e3a6e38900674f87a3def14325c1a2e40956a
This commit is contained in:
Raman Sarokin 2020-08-18 21:32:13 -07:00 committed by TensorFlower Gardener
parent b829e9ace3
commit 3b9cb438e5
20 changed files with 174 additions and 267 deletions

View File

@ -358,23 +358,20 @@ void ConvolutionTransposed::GetPossibleKernelWorkGroups(
work_groups);
}
absl::Status CreateConvolutionTransposed(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed* result) {
*result =
ConvolutionTransposed(definition, attr, creation_context.device->info_);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
ConvolutionTransposed CreateConvolutionTransposed(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposed result(definition, attr, device_info);
result.UploadWeights(attr.weights);
TensorLinearDescriptor desc;
desc.storage_type =
DeduceLinearStorageType(definition.GetPrimaryStorageType());
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
return result;
}
} // namespace cl

View File

@ -52,16 +52,14 @@ class ConvolutionTransposed : public GPUOperation {
ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
private:
friend absl::Status CreateConvolutionTransposed(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed* result);
friend ConvolutionTransposed CreateConvolutionTransposed(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
explicit ConvolutionTransposed(const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
const DeviceInfo& device_info);
template <DataType T>
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
@ -82,8 +80,8 @@ class ConvolutionTransposed : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
void ConvolutionTransposed::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights) {
const int dst_depth =
AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.z);
const int src_depth = DivideRoundUp(weights.shape.i, 4);
@ -146,8 +144,6 @@ absl::Status ConvolutionTransposed::UploadWeights(
args_.AddObject("weights3",
absl::make_unique<Texture2DDescriptor>(std::move(desc3)));
}
return absl::OkStatus();
}
template <DataType S, typename T>
@ -202,9 +198,9 @@ void ConvolutionTransposed::RearrangeWeightsData(
}
}
absl::Status CreateConvolutionTransposed(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr, ConvolutionTransposed* result);
ConvolutionTransposed CreateConvolutionTransposed(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu

View File

@ -401,23 +401,20 @@ void ConvolutionTransposed3D::GetPossibleKernelWorkGroups(
work_groups);
}
absl::Status CreateConvolutionTransposed3D(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr,
ConvolutionTransposed3D* result) {
*result =
ConvolutionTransposed3D(definition, attr, creation_context.device->info_);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
ConvolutionTransposed3D CreateConvolutionTransposed3D(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr) {
ConvolutionTransposed3D result(definition, attr, device_info);
result.UploadWeights(attr.weights);
TensorLinearDescriptor desc;
desc.storage_type =
DeduceLinearStorageType(definition.GetPrimaryStorageType());
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
return result;
}
} // namespace cl

View File

@ -52,16 +52,14 @@ class ConvolutionTransposed3D : public GPUOperation {
ConvolutionTransposed3D& operator=(const ConvolutionTransposed3D&) = delete;
private:
friend absl::Status CreateConvolutionTransposed3D(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr,
ConvolutionTransposed3D* result);
friend ConvolutionTransposed3D CreateConvolutionTransposed3D(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr);
ConvolutionTransposed3D(const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr,
const DeviceInfo& device_info);
template <DataType T>
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights,
CLContext* context);
void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWDI, S>& weights,
@ -81,8 +79,8 @@ class ConvolutionTransposed3D : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed3D::UploadWeights(
const tflite::gpu::Tensor<OHWDI, T>& weights, CLContext* context) {
void ConvolutionTransposed3D::UploadWeights(
const tflite::gpu::Tensor<OHWDI, T>& weights) {
const int dst_depth =
AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.z);
const int src_depth = DivideRoundUp(weights.shape.i, 4);
@ -149,8 +147,6 @@ absl::Status ConvolutionTransposed3D::UploadWeights(
args_.AddObject("weights3",
absl::make_unique<Texture2DDescriptor>(std::move(desc3)));
}
return absl::OkStatus();
}
template <DataType S, typename T>
@ -209,10 +205,9 @@ void ConvolutionTransposed3D::RearrangeWeightsData(
}
}
absl::Status CreateConvolutionTransposed3D(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr,
ConvolutionTransposed3D* result);
ConvolutionTransposed3D CreateConvolutionTransposed3D(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposed3DAttributes& attr);
} // namespace cl
} // namespace gpu

View File

@ -28,16 +28,16 @@ namespace tflite {
namespace gpu {
namespace cl {
ConvolutionTransposed3x3::ConvolutionTransposed3x3(
const OperationDef& definition, const CLDevice& device, int2 padding)
const OperationDef& definition, const DeviceInfo& device_info, int2 padding)
: GPUOperation(definition),
padding_(padding),
work_group_launch_order_(2, 0, 1) {
work_group_size_ = int3(8, 4, 1);
if (device.IsPowerVR()) {
if (device_info.IsPowerVR()) {
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC;
} else if (device.IsNvidia() || device.IsIntel()) {
} else if (device_info.IsNvidia() || device_info.IsIntel()) {
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS;
} else if (device.IsAMD()) {
} else if (device_info.IsAMD()) {
weights_upload_type_ = WeightsUploadType::CONSTANT_MEM;
} else {
weights_upload_type_ = WeightsUploadType::GLOBAL_MEM;
@ -45,7 +45,7 @@ ConvolutionTransposed3x3::ConvolutionTransposed3x3(
code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type_,
padding_, work_group_launch_order_);
if (definition_.precision == CalculationsPrecision::F16 &&
device.IsPowerVR()) {
device_info.IsPowerVR()) {
compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
}
}
@ -329,34 +329,26 @@ int3 ConvolutionTransposed3x3::GetGridSize() const {
}
bool IsConvolutionTransposed3x3Supported(
const CLDevice& device, const OperationDef& definition,
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
return attr.weights.shape.w == 3 && attr.weights.shape.h == 3 &&
attr.stride.w == 2 && attr.stride.h == 2;
}
absl::Status CreateConvolutionTransposed3x3(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3* result) {
if (!IsConvolutionTransposed3x3Supported(*creation_context.device, definition,
attr)) {
return absl::InvalidArgumentError(
"ConvolutionTransposed3x3 doesn't support this attributes");
}
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h);
*result =
ConvolutionTransposed3x3(definition, *creation_context.device, padding);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
ConvolutionTransposed3x3 result(definition, device_info, padding);
result.UploadWeights(attr.weights);
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
return result;
}
} // namespace cl

View File

@ -61,14 +61,12 @@ class ConvolutionTransposed3x3 : public GPUOperation {
private:
ConvolutionTransposed3x3(const OperationDef& definition,
const CLDevice& device, int2 padding);
friend absl::Status CreateConvolutionTransposed3x3(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3* result);
const DeviceInfo& device_info, int2 padding);
friend ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
@ -85,8 +83,8 @@ class ConvolutionTransposed3x3 : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed3x3::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
void ConvolutionTransposed3x3::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights) {
const int src_depth = DivideRoundUp(weights.shape.i, 4);
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
const int kernel_x = 3; // This operation support only 3x3 kernel
@ -117,8 +115,6 @@ absl::Status ConvolutionTransposed3x3::UploadWeights(
args_.AddObject("weights",
absl::make_unique<BufferDescriptor>(std::move(desc)));
return absl::OkStatus();
}
template <DataType S, typename T>
@ -177,13 +173,12 @@ void ConvolutionTransposed3x3::RearrangeWeightsData(
}
bool IsConvolutionTransposed3x3Supported(
const CLDevice& device, const OperationDef& definition,
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposed3x3(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3* result);
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu

View File

@ -54,9 +54,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposed3x3 operation;
ASSERT_OK(CreateConvolutionTransposed3x3(creation_context_, op_def, attr,
&operation));
ConvolutionTransposed3x3 operation = CreateConvolutionTransposed3x3(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 1), &dst_tensor));
EXPECT_THAT(dst_tensor.data,

View File

@ -189,7 +189,7 @@ int3 ConvolutionTransposed3x3Thin::GetGridSize() const {
}
bool IsConvolutionTransposed3x3ThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr) {
const ConvolutionTransposedAttributes& attr) {
return attr.weights.shape.o <= 8 && attr.weights.shape.w == 3 &&
attr.weights.shape.h == 3 && attr.stride.w == 2 &&
attr.stride.h == 2 && attr.padding.prepended.w == 1 &&
@ -197,19 +197,12 @@ bool IsConvolutionTransposed3x3ThinSupported(
attr.padding.appended.h == 1;
}
absl::Status CreateConvolutionTransposed3x3Thin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3Thin* result) {
if (!IsConvolutionTransposed3x3ThinSupported(*creation_context.device,
attr)) {
return absl::InvalidArgumentError(
"ConvolutionTransposed3x3Thin doesn't support this attributes");
}
*result = ConvolutionTransposed3x3Thin(definition, attr);
RETURN_IF_ERROR(
result->UploadData(attr.weights, attr.bias, creation_context.context));
return absl::OkStatus();
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposed3x3Thin result(definition, attr);
result.UploadData(attr.weights, attr.bias);
return result;
}
} // namespace cl

View File

@ -48,17 +48,15 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
delete;
private:
friend absl::Status CreateConvolutionTransposed3x3Thin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3Thin* result);
friend ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
explicit ConvolutionTransposed3x3Thin(
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
absl::Status UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
void UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
@ -69,9 +67,9 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed3x3Thin::UploadData(
void ConvolutionTransposed3x3Thin::UploadData(
const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
const tflite::gpu::Tensor<Linear, T>& biases) {
const int src_depth = DivideRoundUp(weights.shape.i, 4);
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
const int kernel_x = 3; // This operation support only 3x3 kernel
@ -114,8 +112,6 @@ absl::Status ConvolutionTransposed3x3Thin::UploadData(
args_.AddObject("weights",
absl::make_unique<BufferDescriptor>(std::move(desc)));
return absl::OkStatus();
}
template <DataType S, typename T>
@ -161,12 +157,11 @@ void ConvolutionTransposed3x3Thin::RearrangeWeightsData(
}
bool IsConvolutionTransposed3x3ThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr);
const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposed3x3Thin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed3x3Thin* result);
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu

View File

@ -54,9 +54,9 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3ThinSimpleWeights) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposed3x3Thin operation;
ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
attr, &operation));
ConvolutionTransposed3x3Thin operation =
CreateConvolutionTransposed3x3Thin(creation_context_.GetDeviceInfo(),
op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 1), &dst_tensor));
EXPECT_THAT(dst_tensor.data,
@ -90,9 +90,9 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3Thin) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposed3x3Thin operation;
ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
attr, &operation));
ConvolutionTransposed3x3Thin operation =
CreateConvolutionTransposed3x3Thin(creation_context_.GetDeviceInfo(),
op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 1), &dst_tensor));
EXPECT_THAT(

View File

@ -28,14 +28,14 @@ namespace tflite {
namespace gpu {
namespace cl {
ConvolutionTransposed4x4::ConvolutionTransposed4x4(
const OperationDef& definition, const CLDevice& device)
const OperationDef& definition, const DeviceInfo& device_info)
: GPUOperation(definition) {
work_group_size_ = int3(8, 4, 1);
if (device.IsPowerVR()) {
if (device_info.IsPowerVR()) {
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC;
} else if (device.IsNvidia() || device.IsIntel()) {
} else if (device_info.IsNvidia() || device_info.IsIntel()) {
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS;
} else if (device.IsAMD()) {
} else if (device_info.IsAMD()) {
weights_upload_type_ = WeightsUploadType::CONSTANT_MEM;
} else {
weights_upload_type_ = WeightsUploadType::GLOBAL_MEM;
@ -43,7 +43,7 @@ ConvolutionTransposed4x4::ConvolutionTransposed4x4(
code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type_);
if (definition_.precision == CalculationsPrecision::F16 &&
device.IsPowerVR()) {
device_info.IsPowerVR()) {
compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
}
}
@ -307,33 +307,26 @@ int3 ConvolutionTransposed4x4::GetGridSize() const {
}
bool IsConvolutionTransposed4x4Supported(
const CLDevice& device, const OperationDef& definition,
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
return attr.weights.shape.w == 4 && attr.weights.shape.h == 4 &&
attr.stride.w == 2 && attr.stride.h == 2 &&
attr.padding.prepended.w == 1 && attr.padding.prepended.h == 1;
}
absl::Status CreateConvolutionTransposed4x4(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed4x4* result) {
if (!IsConvolutionTransposed4x4Supported(*creation_context.device, definition,
attr)) {
return absl::InvalidArgumentError(
"ConvolutionTransposed4x4 doesn't support this attributes");
}
*result = ConvolutionTransposed4x4(definition, *creation_context.device);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposed4x4 result(definition, device_info);
result.UploadWeights(attr.weights);
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
return result;
}
} // namespace cl

View File

@ -61,14 +61,12 @@ class ConvolutionTransposed4x4 : public GPUOperation {
private:
ConvolutionTransposed4x4(const OperationDef& definition,
const CLDevice& device);
friend absl::Status CreateConvolutionTransposed4x4(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed4x4* result);
const DeviceInfo& device_info);
friend ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
CLContext* context);
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
@ -81,8 +79,8 @@ class ConvolutionTransposed4x4 : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposed4x4::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
void ConvolutionTransposed4x4::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights) {
const int src_depth = DivideRoundUp(weights.shape.i, 4);
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
const int kernel_x = 4; // This operation support only 4x4 kernel
@ -113,8 +111,6 @@ absl::Status ConvolutionTransposed4x4::UploadWeights(
args_.AddObject("weights",
absl::make_unique<BufferDescriptor>(std::move(desc)));
return absl::OkStatus();
}
template <DataType S, typename T>
@ -160,13 +156,12 @@ void ConvolutionTransposed4x4::RearrangeWeightsData(
}
bool IsConvolutionTransposed4x4Supported(
const CLDevice& device, const OperationDef& definition,
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposed4x4(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed4x4* result);
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu

View File

@ -55,9 +55,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed4x4) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposed4x4 operation;
ASSERT_OK(CreateConvolutionTransposed4x4(creation_context_, op_def, attr,
&operation));
ConvolutionTransposed4x4 operation = CreateConvolutionTransposed4x4(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 1), &dst_tensor));
EXPECT_THAT(dst_tensor.data,

View File

@ -55,9 +55,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedSimpleWeights) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposed operation;
ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
&operation));
ConvolutionTransposed operation = CreateConvolutionTransposed(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 2), &dst_tensor));
EXPECT_THAT(
@ -94,9 +93,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposed operation;
ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
&operation));
ConvolutionTransposed operation = CreateConvolutionTransposed(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 1), &dst_tensor));
EXPECT_THAT(

View File

@ -159,26 +159,19 @@ int3 ConvolutionTransposedThin::GetGridSize() const {
}
bool IsConvolutionTransposedThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr) {
const ConvolutionTransposedAttributes& attr) {
return attr.weights.shape.o <= 4 && attr.weights.shape.w == attr.stride.w &&
attr.weights.shape.h == attr.stride.h &&
attr.padding.prepended.w == 0 && attr.padding.prepended.h == 0 &&
attr.padding.appended.w == 0 && attr.padding.appended.h == 0;
}
absl::Status CreateConvolutionTransposedThin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposedThin* result) {
if (!IsConvolutionTransposedThinSupported(*creation_context.device, attr)) {
return absl::InvalidArgumentError(
"ConvolutionTransposedThin doesn't support this attributes");
}
*result = ConvolutionTransposedThin(definition, attr,
creation_context.device->info_);
RETURN_IF_ERROR(
result->UploadData(attr.weights, attr.bias, creation_context.context));
return absl::OkStatus();
ConvolutionTransposedThin CreateConvolutionTransposedThin(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposedThin result(definition, attr, device_info);
result.UploadData(attr.weights, attr.bias);
return result;
}
} // namespace cl

View File

@ -47,17 +47,15 @@ class ConvolutionTransposedThin : public GPUOperation {
delete;
private:
friend absl::Status CreateConvolutionTransposedThin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposedThin* result);
friend ConvolutionTransposedThin CreateConvolutionTransposedThin(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
ConvolutionTransposedThin(const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
const DeviceInfo& device_info);
template <DataType T>
absl::Status UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
void UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
@ -68,9 +66,9 @@ class ConvolutionTransposedThin : public GPUOperation {
};
template <DataType T>
absl::Status ConvolutionTransposedThin::UploadData(
void ConvolutionTransposedThin::UploadData(
const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
const tflite::gpu::Tensor<Linear, T>& biases) {
const int src_depth = DivideRoundUp(weights.shape.i, 4);
const int flt4_count =
weights.shape.w * weights.shape.h * src_depth * weights.shape.o;
@ -105,8 +103,6 @@ absl::Status ConvolutionTransposedThin::UploadData(
args_.AddObject("weights",
absl::make_unique<BufferDescriptor>(std::move(desc)));
return absl::OkStatus();
}
template <DataType S, typename T>
@ -142,12 +138,11 @@ void ConvolutionTransposedThin::RearrangeWeightsData(
}
bool IsConvolutionTransposedThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr);
const ConvolutionTransposedAttributes& attr);
absl::Status CreateConvolutionTransposedThin(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposedThin* result);
ConvolutionTransposedThin CreateConvolutionTransposedThin(
const DeviceInfo& device_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu

View File

@ -55,9 +55,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThinSimpleWeights) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposedThin operation;
ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
&operation));
ConvolutionTransposedThin operation = CreateConvolutionTransposedThin(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 2), &dst_tensor));
EXPECT_THAT(
@ -94,9 +93,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThin) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConvolutionTransposedThin operation;
ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
&operation));
ConvolutionTransposedThin operation = CreateConvolutionTransposedThin(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 4, 4, 1), &dst_tensor));
EXPECT_THAT(

View File

@ -29,95 +29,73 @@ namespace cl {
namespace {
absl::Status SelectConvolutionTransposedAdreno(
const ConvolutionTransposedAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
if (IsConvolutionTransposedThinSupported(*creation_context.device, attr)) {
ConvolutionTransposedThin conv;
RETURN_IF_ERROR(
CreateConvolutionTransposedThin(creation_context, op_def, attr, &conv));
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
if (IsConvolutionTransposedThinSupported(attr)) {
ConvolutionTransposedThin conv =
CreateConvolutionTransposedThin(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposedThin>(std::move(conv));
} else if (IsConvolutionTransposed3x3ThinSupported(*creation_context.device,
attr)) {
ConvolutionTransposed3x3Thin conv;
RETURN_IF_ERROR(CreateConvolutionTransposed3x3Thin(creation_context, op_def,
attr, &conv));
} else if (IsConvolutionTransposed3x3ThinSupported(attr)) {
ConvolutionTransposed3x3Thin conv =
CreateConvolutionTransposed3x3Thin(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposed3x3Thin>(std::move(conv));
} else {
ConvolutionTransposed conv;
RETURN_IF_ERROR(
CreateConvolutionTransposed(creation_context, op_def, attr, &conv));
ConvolutionTransposed conv =
CreateConvolutionTransposed(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposed>(std::move(conv));
}
return absl::OkStatus();
}
absl::Status SelectConvolutionTransposedPowerVR(
const ConvolutionTransposedAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
if (IsConvolutionTransposedThinSupported(*creation_context.device, attr)) {
ConvolutionTransposedThin conv;
RETURN_IF_ERROR(
CreateConvolutionTransposedThin(creation_context, op_def, attr, &conv));
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
if (IsConvolutionTransposedThinSupported(attr)) {
ConvolutionTransposedThin conv =
CreateConvolutionTransposedThin(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposedThin>(std::move(conv));
} else if (IsConvolutionTransposed3x3ThinSupported(*creation_context.device,
attr)) {
ConvolutionTransposed3x3Thin conv;
RETURN_IF_ERROR(CreateConvolutionTransposed3x3Thin(creation_context, op_def,
attr, &conv));
} else if (IsConvolutionTransposed3x3ThinSupported(attr)) {
ConvolutionTransposed3x3Thin conv =
CreateConvolutionTransposed3x3Thin(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposed3x3Thin>(std::move(conv));
} else if (IsConvolutionTransposed3x3Supported(*creation_context.device,
op_def, attr)) {
ConvolutionTransposed3x3 conv;
RETURN_IF_ERROR(
CreateConvolutionTransposed3x3(creation_context, op_def, attr, &conv));
} else if (IsConvolutionTransposed3x3Supported(op_def, attr)) {
ConvolutionTransposed3x3 conv =
CreateConvolutionTransposed3x3(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposed3x3>(std::move(conv));
} else if (IsConvolutionTransposed4x4Supported(*creation_context.device,
op_def, attr)) {
ConvolutionTransposed4x4 conv;
RETURN_IF_ERROR(
CreateConvolutionTransposed4x4(creation_context, op_def, attr, &conv));
} else if (IsConvolutionTransposed4x4Supported(op_def, attr)) {
ConvolutionTransposed4x4 conv =
CreateConvolutionTransposed4x4(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposed4x4>(std::move(conv));
} else {
ConvolutionTransposed conv;
RETURN_IF_ERROR(
CreateConvolutionTransposed(creation_context, op_def, attr, &conv));
ConvolutionTransposed conv =
CreateConvolutionTransposed(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposed>(std::move(conv));
}
return absl::OkStatus();
}
absl::Status SelectConvolutionTransposedMali(
const ConvolutionTransposedAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
ConvolutionTransposed conv;
RETURN_IF_ERROR(
CreateConvolutionTransposed(creation_context, op_def, attr, &conv));
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
ConvolutionTransposed conv =
CreateConvolutionTransposed(device_info, op_def, attr);
*ptr = absl::make_unique<ConvolutionTransposed>(std::move(conv));
return absl::OkStatus();
}
} // namespace
absl::Status SelectConvolutionTransposed(
const ConvolutionTransposedAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
const auto& device_info = creation_context.device->info_;
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
if (device_info.IsAdreno()) {
return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
ptr);
return SelectConvolutionTransposedAdreno(attr, device_info, op_def, ptr);
} else if (device_info.IsPowerVR() || device_info.IsAMD() ||
device_info.IsNvidia() || device_info.IsIntel()) {
return SelectConvolutionTransposedPowerVR(attr, creation_context, op_def,
ptr);
return SelectConvolutionTransposedPowerVR(attr, device_info, op_def, ptr);
} else if (device_info.IsMali()) {
return SelectConvolutionTransposedMali(attr, creation_context, op_def, ptr);
return SelectConvolutionTransposedMali(attr, device_info, op_def, ptr);
} else {
return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
ptr);
return SelectConvolutionTransposedAdreno(attr, device_info, op_def, ptr);
}
}

View File

@ -27,9 +27,8 @@ namespace gpu {
namespace cl {
absl::Status SelectConvolutionTransposed(
const ConvolutionTransposedAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr);
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr);
} // namespace cl
} // namespace gpu

View File

@ -231,8 +231,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
case OperationType::CONVOLUTION_TRANSPOSED: {
auto attr = absl::any_cast<ConvolutionTransposedAttributes>(
node.operation.attributes);
return SelectConvolutionTransposed(attr, creation_context, op_def,
gpu_op);
return SelectConvolutionTransposed(attr, creation_context.GetDeviceInfo(),
op_def, gpu_op);
}
case OperationType::DEPTHWISE_CONVOLUTION: {
auto attr = absl::any_cast<DepthwiseConvolution2DAttributes>(