Removed useless Status and CreationContext from convolution transposed kernels.
PiperOrigin-RevId: 327367790 Change-Id: I0f1e3a6e38900674f87a3def14325c1a2e40956a
This commit is contained in:
parent
b829e9ace3
commit
3b9cb438e5
@ -358,23 +358,20 @@ void ConvolutionTransposed::GetPossibleKernelWorkGroups(
|
||||
work_groups);
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposed(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed* result) {
|
||||
*result =
|
||||
ConvolutionTransposed(definition, attr, creation_context.device->info_);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
ConvolutionTransposed CreateConvolutionTransposed(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposed result(definition, attr, device_info);
|
||||
result.UploadWeights(attr.weights);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type =
|
||||
DeduceLinearStorageType(definition.GetPrimaryStorageType());
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -52,16 +52,14 @@ class ConvolutionTransposed : public GPUOperation {
|
||||
ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
|
||||
|
||||
private:
|
||||
friend absl::Status CreateConvolutionTransposed(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed* result);
|
||||
friend ConvolutionTransposed CreateConvolutionTransposed(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
explicit ConvolutionTransposed(const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const DeviceInfo& device_info);
|
||||
template <DataType T>
|
||||
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
CLContext* context);
|
||||
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
|
||||
|
||||
template <DataType S, typename T>
|
||||
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
|
||||
@ -82,8 +80,8 @@ class ConvolutionTransposed : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status ConvolutionTransposed::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
|
||||
void ConvolutionTransposed::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights) {
|
||||
const int dst_depth =
|
||||
AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.z);
|
||||
const int src_depth = DivideRoundUp(weights.shape.i, 4);
|
||||
@ -146,8 +144,6 @@ absl::Status ConvolutionTransposed::UploadWeights(
|
||||
args_.AddObject("weights3",
|
||||
absl::make_unique<Texture2DDescriptor>(std::move(desc3)));
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
@ -202,9 +198,9 @@ void ConvolutionTransposed::RearrangeWeightsData(
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposed(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr, ConvolutionTransposed* result);
|
||||
ConvolutionTransposed CreateConvolutionTransposed(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -401,23 +401,20 @@ void ConvolutionTransposed3D::GetPossibleKernelWorkGroups(
|
||||
work_groups);
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposed3D(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposed3DAttributes& attr,
|
||||
ConvolutionTransposed3D* result) {
|
||||
*result =
|
||||
ConvolutionTransposed3D(definition, attr, creation_context.device->info_);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
ConvolutionTransposed3D CreateConvolutionTransposed3D(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposed3DAttributes& attr) {
|
||||
ConvolutionTransposed3D result(definition, attr, device_info);
|
||||
result.UploadWeights(attr.weights);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type =
|
||||
DeduceLinearStorageType(definition.GetPrimaryStorageType());
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -52,16 +52,14 @@ class ConvolutionTransposed3D : public GPUOperation {
|
||||
ConvolutionTransposed3D& operator=(const ConvolutionTransposed3D&) = delete;
|
||||
|
||||
private:
|
||||
friend absl::Status CreateConvolutionTransposed3D(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposed3DAttributes& attr,
|
||||
ConvolutionTransposed3D* result);
|
||||
friend ConvolutionTransposed3D CreateConvolutionTransposed3D(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposed3DAttributes& attr);
|
||||
ConvolutionTransposed3D(const OperationDef& definition,
|
||||
const ConvolutionTransposed3DAttributes& attr,
|
||||
const DeviceInfo& device_info);
|
||||
template <DataType T>
|
||||
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights,
|
||||
CLContext* context);
|
||||
void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights);
|
||||
|
||||
template <DataType S, typename T>
|
||||
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWDI, S>& weights,
|
||||
@ -81,8 +79,8 @@ class ConvolutionTransposed3D : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status ConvolutionTransposed3D::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWDI, T>& weights, CLContext* context) {
|
||||
void ConvolutionTransposed3D::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWDI, T>& weights) {
|
||||
const int dst_depth =
|
||||
AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.z);
|
||||
const int src_depth = DivideRoundUp(weights.shape.i, 4);
|
||||
@ -149,8 +147,6 @@ absl::Status ConvolutionTransposed3D::UploadWeights(
|
||||
args_.AddObject("weights3",
|
||||
absl::make_unique<Texture2DDescriptor>(std::move(desc3)));
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
@ -209,10 +205,9 @@ void ConvolutionTransposed3D::RearrangeWeightsData(
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposed3D(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposed3DAttributes& attr,
|
||||
ConvolutionTransposed3D* result);
|
||||
ConvolutionTransposed3D CreateConvolutionTransposed3D(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposed3DAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -28,16 +28,16 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
ConvolutionTransposed3x3::ConvolutionTransposed3x3(
|
||||
const OperationDef& definition, const CLDevice& device, int2 padding)
|
||||
const OperationDef& definition, const DeviceInfo& device_info, int2 padding)
|
||||
: GPUOperation(definition),
|
||||
padding_(padding),
|
||||
work_group_launch_order_(2, 0, 1) {
|
||||
work_group_size_ = int3(8, 4, 1);
|
||||
if (device.IsPowerVR()) {
|
||||
if (device_info.IsPowerVR()) {
|
||||
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC;
|
||||
} else if (device.IsNvidia() || device.IsIntel()) {
|
||||
} else if (device_info.IsNvidia() || device_info.IsIntel()) {
|
||||
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS;
|
||||
} else if (device.IsAMD()) {
|
||||
} else if (device_info.IsAMD()) {
|
||||
weights_upload_type_ = WeightsUploadType::CONSTANT_MEM;
|
||||
} else {
|
||||
weights_upload_type_ = WeightsUploadType::GLOBAL_MEM;
|
||||
@ -45,7 +45,7 @@ ConvolutionTransposed3x3::ConvolutionTransposed3x3(
|
||||
code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type_,
|
||||
padding_, work_group_launch_order_);
|
||||
if (definition_.precision == CalculationsPrecision::F16 &&
|
||||
device.IsPowerVR()) {
|
||||
device_info.IsPowerVR()) {
|
||||
compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
|
||||
}
|
||||
}
|
||||
@ -329,34 +329,26 @@ int3 ConvolutionTransposed3x3::GetGridSize() const {
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposed3x3Supported(
|
||||
const CLDevice& device, const OperationDef& definition,
|
||||
const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
return attr.weights.shape.w == 3 && attr.weights.shape.h == 3 &&
|
||||
attr.stride.w == 2 && attr.stride.h == 2;
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposed3x3(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed3x3* result) {
|
||||
if (!IsConvolutionTransposed3x3Supported(*creation_context.device, definition,
|
||||
attr)) {
|
||||
return absl::InvalidArgumentError(
|
||||
"ConvolutionTransposed3x3 doesn't support this attributes");
|
||||
}
|
||||
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h);
|
||||
*result =
|
||||
ConvolutionTransposed3x3(definition, *creation_context.device, padding);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
ConvolutionTransposed3x3 result(definition, device_info, padding);
|
||||
result.UploadWeights(attr.weights);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -61,14 +61,12 @@ class ConvolutionTransposed3x3 : public GPUOperation {
|
||||
|
||||
private:
|
||||
ConvolutionTransposed3x3(const OperationDef& definition,
|
||||
const CLDevice& device, int2 padding);
|
||||
friend absl::Status CreateConvolutionTransposed3x3(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed3x3* result);
|
||||
const DeviceInfo& device_info, int2 padding);
|
||||
friend ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
template <DataType T>
|
||||
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
CLContext* context);
|
||||
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
|
||||
|
||||
template <DataType S, typename T>
|
||||
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
|
||||
@ -85,8 +83,8 @@ class ConvolutionTransposed3x3 : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status ConvolutionTransposed3x3::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
|
||||
void ConvolutionTransposed3x3::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights) {
|
||||
const int src_depth = DivideRoundUp(weights.shape.i, 4);
|
||||
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
|
||||
const int kernel_x = 3; // This operation support only 3x3 kernel
|
||||
@ -117,8 +115,6 @@ absl::Status ConvolutionTransposed3x3::UploadWeights(
|
||||
|
||||
args_.AddObject("weights",
|
||||
absl::make_unique<BufferDescriptor>(std::move(desc)));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
@ -177,13 +173,12 @@ void ConvolutionTransposed3x3::RearrangeWeightsData(
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposed3x3Supported(
|
||||
const CLDevice& device, const OperationDef& definition,
|
||||
const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
absl::Status CreateConvolutionTransposed3x3(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed3x3* result);
|
||||
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -54,9 +54,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposed3x3 operation;
|
||||
ASSERT_OK(CreateConvolutionTransposed3x3(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ConvolutionTransposed3x3 operation = CreateConvolutionTransposed3x3(
|
||||
creation_context_.GetDeviceInfo(), op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 1), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data,
|
||||
|
@ -189,7 +189,7 @@ int3 ConvolutionTransposed3x3Thin::GetGridSize() const {
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposed3x3ThinSupported(
|
||||
const CLDevice& device, const ConvolutionTransposedAttributes& attr) {
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
return attr.weights.shape.o <= 8 && attr.weights.shape.w == 3 &&
|
||||
attr.weights.shape.h == 3 && attr.stride.w == 2 &&
|
||||
attr.stride.h == 2 && attr.padding.prepended.w == 1 &&
|
||||
@ -197,19 +197,12 @@ bool IsConvolutionTransposed3x3ThinSupported(
|
||||
attr.padding.appended.h == 1;
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposed3x3Thin(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed3x3Thin* result) {
|
||||
if (!IsConvolutionTransposed3x3ThinSupported(*creation_context.device,
|
||||
attr)) {
|
||||
return absl::InvalidArgumentError(
|
||||
"ConvolutionTransposed3x3Thin doesn't support this attributes");
|
||||
}
|
||||
*result = ConvolutionTransposed3x3Thin(definition, attr);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadData(attr.weights, attr.bias, creation_context.context));
|
||||
return absl::OkStatus();
|
||||
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposed3x3Thin result(definition, attr);
|
||||
result.UploadData(attr.weights, attr.bias);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -48,17 +48,15 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
|
||||
delete;
|
||||
|
||||
private:
|
||||
friend absl::Status CreateConvolutionTransposed3x3Thin(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed3x3Thin* result);
|
||||
friend ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
explicit ConvolutionTransposed3x3Thin(
|
||||
const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
template <DataType T>
|
||||
absl::Status UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases,
|
||||
CLContext* context);
|
||||
void UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases);
|
||||
|
||||
template <DataType S, typename T>
|
||||
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
|
||||
@ -69,9 +67,9 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status ConvolutionTransposed3x3Thin::UploadData(
|
||||
void ConvolutionTransposed3x3Thin::UploadData(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
|
||||
const tflite::gpu::Tensor<Linear, T>& biases) {
|
||||
const int src_depth = DivideRoundUp(weights.shape.i, 4);
|
||||
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
|
||||
const int kernel_x = 3; // This operation support only 3x3 kernel
|
||||
@ -114,8 +112,6 @@ absl::Status ConvolutionTransposed3x3Thin::UploadData(
|
||||
|
||||
args_.AddObject("weights",
|
||||
absl::make_unique<BufferDescriptor>(std::move(desc)));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
@ -161,12 +157,11 @@ void ConvolutionTransposed3x3Thin::RearrangeWeightsData(
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposed3x3ThinSupported(
|
||||
const CLDevice& device, const ConvolutionTransposedAttributes& attr);
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
absl::Status CreateConvolutionTransposed3x3Thin(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed3x3Thin* result);
|
||||
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -54,9 +54,9 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3ThinSimpleWeights) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposed3x3Thin operation;
|
||||
ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
|
||||
attr, &operation));
|
||||
ConvolutionTransposed3x3Thin operation =
|
||||
CreateConvolutionTransposed3x3Thin(creation_context_.GetDeviceInfo(),
|
||||
op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 1), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data,
|
||||
@ -90,9 +90,9 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3Thin) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposed3x3Thin operation;
|
||||
ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
|
||||
attr, &operation));
|
||||
ConvolutionTransposed3x3Thin operation =
|
||||
CreateConvolutionTransposed3x3Thin(creation_context_.GetDeviceInfo(),
|
||||
op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 1), &dst_tensor));
|
||||
EXPECT_THAT(
|
||||
|
@ -28,14 +28,14 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
ConvolutionTransposed4x4::ConvolutionTransposed4x4(
|
||||
const OperationDef& definition, const CLDevice& device)
|
||||
const OperationDef& definition, const DeviceInfo& device_info)
|
||||
: GPUOperation(definition) {
|
||||
work_group_size_ = int3(8, 4, 1);
|
||||
if (device.IsPowerVR()) {
|
||||
if (device_info.IsPowerVR()) {
|
||||
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC;
|
||||
} else if (device.IsNvidia() || device.IsIntel()) {
|
||||
} else if (device_info.IsNvidia() || device_info.IsIntel()) {
|
||||
weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS;
|
||||
} else if (device.IsAMD()) {
|
||||
} else if (device_info.IsAMD()) {
|
||||
weights_upload_type_ = WeightsUploadType::CONSTANT_MEM;
|
||||
} else {
|
||||
weights_upload_type_ = WeightsUploadType::GLOBAL_MEM;
|
||||
@ -43,7 +43,7 @@ ConvolutionTransposed4x4::ConvolutionTransposed4x4(
|
||||
|
||||
code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type_);
|
||||
if (definition_.precision == CalculationsPrecision::F16 &&
|
||||
device.IsPowerVR()) {
|
||||
device_info.IsPowerVR()) {
|
||||
compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
|
||||
}
|
||||
}
|
||||
@ -307,33 +307,26 @@ int3 ConvolutionTransposed4x4::GetGridSize() const {
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposed4x4Supported(
|
||||
const CLDevice& device, const OperationDef& definition,
|
||||
const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
return attr.weights.shape.w == 4 && attr.weights.shape.h == 4 &&
|
||||
attr.stride.w == 2 && attr.stride.h == 2 &&
|
||||
attr.padding.prepended.w == 1 && attr.padding.prepended.h == 1;
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposed4x4(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed4x4* result) {
|
||||
if (!IsConvolutionTransposed4x4Supported(*creation_context.device, definition,
|
||||
attr)) {
|
||||
return absl::InvalidArgumentError(
|
||||
"ConvolutionTransposed4x4 doesn't support this attributes");
|
||||
}
|
||||
*result = ConvolutionTransposed4x4(definition, *creation_context.device);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposed4x4 result(definition, device_info);
|
||||
result.UploadWeights(attr.weights);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -61,14 +61,12 @@ class ConvolutionTransposed4x4 : public GPUOperation {
|
||||
|
||||
private:
|
||||
ConvolutionTransposed4x4(const OperationDef& definition,
|
||||
const CLDevice& device);
|
||||
friend absl::Status CreateConvolutionTransposed4x4(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed4x4* result);
|
||||
const DeviceInfo& device_info);
|
||||
friend ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
template <DataType T>
|
||||
absl::Status UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
CLContext* context);
|
||||
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
|
||||
|
||||
template <DataType S, typename T>
|
||||
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
|
||||
@ -81,8 +79,8 @@ class ConvolutionTransposed4x4 : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status ConvolutionTransposed4x4::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
|
||||
void ConvolutionTransposed4x4::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights) {
|
||||
const int src_depth = DivideRoundUp(weights.shape.i, 4);
|
||||
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
|
||||
const int kernel_x = 4; // This operation support only 4x4 kernel
|
||||
@ -113,8 +111,6 @@ absl::Status ConvolutionTransposed4x4::UploadWeights(
|
||||
|
||||
args_.AddObject("weights",
|
||||
absl::make_unique<BufferDescriptor>(std::move(desc)));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
@ -160,13 +156,12 @@ void ConvolutionTransposed4x4::RearrangeWeightsData(
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposed4x4Supported(
|
||||
const CLDevice& device, const OperationDef& definition,
|
||||
const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
absl::Status CreateConvolutionTransposed4x4(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed4x4* result);
|
||||
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -55,9 +55,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed4x4) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposed4x4 operation;
|
||||
ASSERT_OK(CreateConvolutionTransposed4x4(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ConvolutionTransposed4x4 operation = CreateConvolutionTransposed4x4(
|
||||
creation_context_.GetDeviceInfo(), op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 1), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data,
|
||||
|
@ -55,9 +55,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedSimpleWeights) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposed operation;
|
||||
ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ConvolutionTransposed operation = CreateConvolutionTransposed(
|
||||
creation_context_.GetDeviceInfo(), op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 2), &dst_tensor));
|
||||
EXPECT_THAT(
|
||||
@ -94,9 +93,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposed operation;
|
||||
ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ConvolutionTransposed operation = CreateConvolutionTransposed(
|
||||
creation_context_.GetDeviceInfo(), op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 1), &dst_tensor));
|
||||
EXPECT_THAT(
|
||||
|
@ -159,26 +159,19 @@ int3 ConvolutionTransposedThin::GetGridSize() const {
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposedThinSupported(
|
||||
const CLDevice& device, const ConvolutionTransposedAttributes& attr) {
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
return attr.weights.shape.o <= 4 && attr.weights.shape.w == attr.stride.w &&
|
||||
attr.weights.shape.h == attr.stride.h &&
|
||||
attr.padding.prepended.w == 0 && attr.padding.prepended.h == 0 &&
|
||||
attr.padding.appended.w == 0 && attr.padding.appended.h == 0;
|
||||
}
|
||||
|
||||
absl::Status CreateConvolutionTransposedThin(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposedThin* result) {
|
||||
if (!IsConvolutionTransposedThinSupported(*creation_context.device, attr)) {
|
||||
return absl::InvalidArgumentError(
|
||||
"ConvolutionTransposedThin doesn't support this attributes");
|
||||
}
|
||||
*result = ConvolutionTransposedThin(definition, attr,
|
||||
creation_context.device->info_);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadData(attr.weights, attr.bias, creation_context.context));
|
||||
return absl::OkStatus();
|
||||
ConvolutionTransposedThin CreateConvolutionTransposedThin(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposedThin result(definition, attr, device_info);
|
||||
result.UploadData(attr.weights, attr.bias);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -47,17 +47,15 @@ class ConvolutionTransposedThin : public GPUOperation {
|
||||
delete;
|
||||
|
||||
private:
|
||||
friend absl::Status CreateConvolutionTransposedThin(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposedThin* result);
|
||||
friend ConvolutionTransposedThin CreateConvolutionTransposedThin(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
ConvolutionTransposedThin(const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const DeviceInfo& device_info);
|
||||
template <DataType T>
|
||||
absl::Status UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases,
|
||||
CLContext* context);
|
||||
void UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases);
|
||||
|
||||
template <DataType S, typename T>
|
||||
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
|
||||
@ -68,9 +66,9 @@ class ConvolutionTransposedThin : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status ConvolutionTransposedThin::UploadData(
|
||||
void ConvolutionTransposedThin::UploadData(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
|
||||
const tflite::gpu::Tensor<Linear, T>& biases) {
|
||||
const int src_depth = DivideRoundUp(weights.shape.i, 4);
|
||||
const int flt4_count =
|
||||
weights.shape.w * weights.shape.h * src_depth * weights.shape.o;
|
||||
@ -105,8 +103,6 @@ absl::Status ConvolutionTransposedThin::UploadData(
|
||||
|
||||
args_.AddObject("weights",
|
||||
absl::make_unique<BufferDescriptor>(std::move(desc)));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
@ -142,12 +138,11 @@ void ConvolutionTransposedThin::RearrangeWeightsData(
|
||||
}
|
||||
|
||||
bool IsConvolutionTransposedThinSupported(
|
||||
const CLDevice& device, const ConvolutionTransposedAttributes& attr);
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
absl::Status CreateConvolutionTransposedThin(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposedThin* result);
|
||||
ConvolutionTransposedThin CreateConvolutionTransposedThin(
|
||||
const DeviceInfo& device_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -55,9 +55,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThinSimpleWeights) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposedThin operation;
|
||||
ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ConvolutionTransposedThin operation = CreateConvolutionTransposedThin(
|
||||
creation_context_.GetDeviceInfo(), op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 2), &dst_tensor));
|
||||
EXPECT_THAT(
|
||||
@ -94,9 +93,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThin) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
ConvolutionTransposedThin operation;
|
||||
ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ConvolutionTransposedThin operation = CreateConvolutionTransposedThin(
|
||||
creation_context_.GetDeviceInfo(), op_def, attr);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 4, 4, 1), &dst_tensor));
|
||||
EXPECT_THAT(
|
||||
|
@ -29,95 +29,73 @@ namespace cl {
|
||||
namespace {
|
||||
|
||||
absl::Status SelectConvolutionTransposedAdreno(
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
if (IsConvolutionTransposedThinSupported(*creation_context.device, attr)) {
|
||||
ConvolutionTransposedThin conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateConvolutionTransposedThin(creation_context, op_def, attr, &conv));
|
||||
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
|
||||
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
|
||||
if (IsConvolutionTransposedThinSupported(attr)) {
|
||||
ConvolutionTransposedThin conv =
|
||||
CreateConvolutionTransposedThin(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposedThin>(std::move(conv));
|
||||
} else if (IsConvolutionTransposed3x3ThinSupported(*creation_context.device,
|
||||
attr)) {
|
||||
ConvolutionTransposed3x3Thin conv;
|
||||
RETURN_IF_ERROR(CreateConvolutionTransposed3x3Thin(creation_context, op_def,
|
||||
attr, &conv));
|
||||
} else if (IsConvolutionTransposed3x3ThinSupported(attr)) {
|
||||
ConvolutionTransposed3x3Thin conv =
|
||||
CreateConvolutionTransposed3x3Thin(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposed3x3Thin>(std::move(conv));
|
||||
} else {
|
||||
ConvolutionTransposed conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateConvolutionTransposed(creation_context, op_def, attr, &conv));
|
||||
ConvolutionTransposed conv =
|
||||
CreateConvolutionTransposed(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposed>(std::move(conv));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status SelectConvolutionTransposedPowerVR(
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
if (IsConvolutionTransposedThinSupported(*creation_context.device, attr)) {
|
||||
ConvolutionTransposedThin conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateConvolutionTransposedThin(creation_context, op_def, attr, &conv));
|
||||
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
|
||||
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
|
||||
if (IsConvolutionTransposedThinSupported(attr)) {
|
||||
ConvolutionTransposedThin conv =
|
||||
CreateConvolutionTransposedThin(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposedThin>(std::move(conv));
|
||||
} else if (IsConvolutionTransposed3x3ThinSupported(*creation_context.device,
|
||||
attr)) {
|
||||
ConvolutionTransposed3x3Thin conv;
|
||||
RETURN_IF_ERROR(CreateConvolutionTransposed3x3Thin(creation_context, op_def,
|
||||
attr, &conv));
|
||||
} else if (IsConvolutionTransposed3x3ThinSupported(attr)) {
|
||||
ConvolutionTransposed3x3Thin conv =
|
||||
CreateConvolutionTransposed3x3Thin(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposed3x3Thin>(std::move(conv));
|
||||
} else if (IsConvolutionTransposed3x3Supported(*creation_context.device,
|
||||
op_def, attr)) {
|
||||
ConvolutionTransposed3x3 conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateConvolutionTransposed3x3(creation_context, op_def, attr, &conv));
|
||||
} else if (IsConvolutionTransposed3x3Supported(op_def, attr)) {
|
||||
ConvolutionTransposed3x3 conv =
|
||||
CreateConvolutionTransposed3x3(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposed3x3>(std::move(conv));
|
||||
} else if (IsConvolutionTransposed4x4Supported(*creation_context.device,
|
||||
op_def, attr)) {
|
||||
ConvolutionTransposed4x4 conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateConvolutionTransposed4x4(creation_context, op_def, attr, &conv));
|
||||
} else if (IsConvolutionTransposed4x4Supported(op_def, attr)) {
|
||||
ConvolutionTransposed4x4 conv =
|
||||
CreateConvolutionTransposed4x4(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposed4x4>(std::move(conv));
|
||||
} else {
|
||||
ConvolutionTransposed conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateConvolutionTransposed(creation_context, op_def, attr, &conv));
|
||||
ConvolutionTransposed conv =
|
||||
CreateConvolutionTransposed(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposed>(std::move(conv));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status SelectConvolutionTransposedMali(
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
ConvolutionTransposed conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateConvolutionTransposed(creation_context, op_def, attr, &conv));
|
||||
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
|
||||
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
|
||||
ConvolutionTransposed conv =
|
||||
CreateConvolutionTransposed(device_info, op_def, attr);
|
||||
*ptr = absl::make_unique<ConvolutionTransposed>(std::move(conv));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::Status SelectConvolutionTransposed(
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
const auto& device_info = creation_context.device->info_;
|
||||
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
|
||||
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr) {
|
||||
if (device_info.IsAdreno()) {
|
||||
return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
|
||||
ptr);
|
||||
return SelectConvolutionTransposedAdreno(attr, device_info, op_def, ptr);
|
||||
} else if (device_info.IsPowerVR() || device_info.IsAMD() ||
|
||||
device_info.IsNvidia() || device_info.IsIntel()) {
|
||||
return SelectConvolutionTransposedPowerVR(attr, creation_context, op_def,
|
||||
ptr);
|
||||
return SelectConvolutionTransposedPowerVR(attr, device_info, op_def, ptr);
|
||||
} else if (device_info.IsMali()) {
|
||||
return SelectConvolutionTransposedMali(attr, creation_context, op_def, ptr);
|
||||
return SelectConvolutionTransposedMali(attr, device_info, op_def, ptr);
|
||||
} else {
|
||||
return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
|
||||
ptr);
|
||||
return SelectConvolutionTransposedAdreno(attr, device_info, op_def, ptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,9 +27,8 @@ namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
absl::Status SelectConvolutionTransposed(
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr);
|
||||
const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info,
|
||||
const OperationDef& op_def, std::unique_ptr<GPUOperation>* ptr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -231,8 +231,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
||||
case OperationType::CONVOLUTION_TRANSPOSED: {
|
||||
auto attr = absl::any_cast<ConvolutionTransposedAttributes>(
|
||||
node.operation.attributes);
|
||||
return SelectConvolutionTransposed(attr, creation_context, op_def,
|
||||
gpu_op);
|
||||
return SelectConvolutionTransposed(attr, creation_context.GetDeviceInfo(),
|
||||
op_def, gpu_op);
|
||||
}
|
||||
case OperationType::DEPTHWISE_CONVOLUTION: {
|
||||
auto attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
|
||||
|
Loading…
Reference in New Issue
Block a user