Added create methods with dynamic weights for convolutions transposed.
PiperOrigin-RevId: 343595686 Change-Id: I90bd0b0a8221e3c419f0976f1569c612de193ec4
This commit is contained in:
parent
5d8eae5093
commit
bd60d491c2
@ -328,6 +328,7 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common/task:weights_conversion",
|
||||
"//tensorflow/lite/delegates/gpu/common/task:weights_layout",
|
||||
],
|
||||
)
|
||||
|
||||
@ -368,6 +369,7 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common/task:weights_conversion",
|
||||
"//tensorflow/lite/delegates/gpu/common/task:weights_layout",
|
||||
],
|
||||
)
|
||||
|
||||
@ -407,6 +409,7 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common/task:weights_conversion",
|
||||
"//tensorflow/lite/delegates/gpu/common/task:weights_layout",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -80,6 +80,19 @@ std::string ConvolutionTransposed3x3::GenerateConvolutionTransposedCode(
|
||||
}
|
||||
AddDstTensor("dst_tensor", dst_desc);
|
||||
|
||||
if (op_def.src_tensors.size() == 2) {
|
||||
// dynamic weights
|
||||
BufferDescriptor desc;
|
||||
desc.element_type = op_def.src_tensors[1].data_type;
|
||||
desc.element_size = 4;
|
||||
desc.memory_type =
|
||||
weights_upload_type ==
|
||||
ConvolutionTransposed3x3::WeightsUploadType::CONSTANT_MEM
|
||||
? MemoryType::CONSTANT
|
||||
: MemoryType::GLOBAL;
|
||||
AddSrcBuffer("weights", desc);
|
||||
}
|
||||
|
||||
args_.AddInt("filter_offset");
|
||||
args_.AddInt("padding_x");
|
||||
args_.AddInt("padding_y");
|
||||
@ -389,6 +402,21 @@ ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
|
||||
return result;
|
||||
}
|
||||
|
||||
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3DynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h);
|
||||
ConvolutionTransposed3x3 result(definition, gpu_info, padding);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -29,6 +29,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
|
||||
@ -52,6 +53,13 @@ class ConvolutionTransposed3x3 : public GPUOperation {
|
||||
ConvolutionTransposed3x3(const ConvolutionTransposed3x3&) = delete;
|
||||
ConvolutionTransposed3x3& operator=(const ConvolutionTransposed3x3&) = delete;
|
||||
|
||||
WeightsDescription GetWeightsDescription() const {
|
||||
WeightsDescription desc;
|
||||
desc.layout = WeightsLayout::kOICustomSSpatialI4O4;
|
||||
desc.spatial_remap = GetSpatialWeightsRemap();
|
||||
return desc;
|
||||
}
|
||||
|
||||
enum class WeightsUploadType {
|
||||
LOCAL_MEM_ASYNC,
|
||||
LOCAL_MEM_BY_THREADS,
|
||||
@ -65,6 +73,10 @@ class ConvolutionTransposed3x3 : public GPUOperation {
|
||||
friend ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
friend ConvolutionTransposed3x3 CreateConvolutionTransposed3x3DynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
template <DataType T>
|
||||
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
|
||||
|
||||
@ -124,6 +136,10 @@ ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3DynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -53,6 +53,15 @@ std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode(
|
||||
AddSrcTensor("src_tensor", src_desc);
|
||||
AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
|
||||
|
||||
if (op_def.src_tensors.size() == 2) {
|
||||
// dynamic weights
|
||||
BufferDescriptor desc;
|
||||
desc.element_type = op_def.src_tensors[1].data_type;
|
||||
desc.element_size = 4;
|
||||
desc.memory_type = MemoryType::CONSTANT;
|
||||
AddSrcBuffer("weights", desc);
|
||||
}
|
||||
|
||||
const auto src_tensor_type = op_def.src_tensors[0].storage_type;
|
||||
|
||||
std::string c = GetCommonDefines(op_def.precision);
|
||||
@ -160,8 +169,7 @@ std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode(
|
||||
for (int d = 0; d < dst_depth; ++d) {
|
||||
const std::string layer = std::to_string(d);
|
||||
c += " {\n";
|
||||
c += " FLT4 bias_val = args.weights.Read(" +
|
||||
std::to_string(36 * filters_index + d) + ");\n";
|
||||
c += " FLT4 bias_val = args.biases.Read(" + layer + ");\n";
|
||||
for (int y = 0; y < 2; ++y) {
|
||||
for (int x = 0; x < 2; ++x) {
|
||||
const std::string x_coord = "X + " + std::to_string(x);
|
||||
@ -205,7 +213,28 @@ ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposed3x3Thin result(definition, attr);
|
||||
result.UploadData(attr.weights, attr.bias);
|
||||
result.UploadWeights(attr.weights);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return result;
|
||||
}
|
||||
|
||||
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3ThinDynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposed3x3Thin result(definition, attr);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
|
||||
@ -49,16 +50,28 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
|
||||
ConvolutionTransposed3x3Thin& operator=(const ConvolutionTransposed3x3Thin&) =
|
||||
delete;
|
||||
|
||||
WeightsDescription GetWeightsDescription() const {
|
||||
WeightsDescription desc;
|
||||
desc.layout = WeightsLayout::kOICustomSSpatialI4O4;
|
||||
desc.spatial_remap = GetSpatialWeightsRemap();
|
||||
return desc;
|
||||
}
|
||||
|
||||
private:
|
||||
friend ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
explicit ConvolutionTransposed3x3Thin(
|
||||
const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
friend ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
friend ConvolutionTransposed3x3Thin
|
||||
CreateConvolutionTransposed3x3ThinDynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
template <DataType T>
|
||||
void UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases);
|
||||
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
|
||||
|
||||
std::vector<int> GetSpatialWeightsRemap() const;
|
||||
|
||||
@ -67,9 +80,8 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
void ConvolutionTransposed3x3Thin::UploadData(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases) {
|
||||
void ConvolutionTransposed3x3Thin::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights) {
|
||||
const int src_depth = DivideRoundUp(weights.shape.i, 4);
|
||||
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
|
||||
const int kernel_x = 3; // This operation support only 3x3 kernel
|
||||
@ -83,33 +95,17 @@ void ConvolutionTransposed3x3Thin::UploadData(
|
||||
desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
|
||||
desc.element_size = 4;
|
||||
desc.memory_type = MemoryType::CONSTANT;
|
||||
desc.size = flt4_size * (flt4_count + dst_depth);
|
||||
desc.size = flt4_size * flt4_count;
|
||||
desc.data.resize(desc.size);
|
||||
|
||||
if (f32_weights) {
|
||||
float4* gpu_data = reinterpret_cast<float4*>(desc.data.data());
|
||||
RearrangeWeightsToOICustomSpatialI4O4(weights, GetSpatialWeightsRemap(),
|
||||
absl::MakeSpan(gpu_data, flt4_count));
|
||||
for (int i = 0; i < dst_depth; ++i) {
|
||||
float4 bias_value(0.0f);
|
||||
for (int c = 0; c < 4; ++c) {
|
||||
int ch = i * 4 + c;
|
||||
bias_value[c] = ch < weights.shape.o ? biases.data[ch] : 0.0f;
|
||||
}
|
||||
gpu_data[flt4_count + i] = bias_value;
|
||||
}
|
||||
} else {
|
||||
half4* gpu_data = reinterpret_cast<half4*>(desc.data.data());
|
||||
RearrangeWeightsToOICustomSpatialI4O4(weights, GetSpatialWeightsRemap(),
|
||||
absl::MakeSpan(gpu_data, flt4_count));
|
||||
for (int i = 0; i < dst_depth; ++i) {
|
||||
half4 bias_value(0.0f);
|
||||
for (int c = 0; c < 4; ++c) {
|
||||
int ch = i * 4 + c;
|
||||
bias_value[c] = ch < weights.shape.o ? biases.data[ch] : 0.0f;
|
||||
}
|
||||
gpu_data[flt4_count + i] = bias_value;
|
||||
}
|
||||
}
|
||||
|
||||
args_.AddObject("weights",
|
||||
@ -123,6 +119,10 @@ ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3ThinDynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -25,24 +25,35 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
namespace {
|
||||
ConvolutionTransposed4x4::WeightsUploadType GetBestWeightsUploadType(
|
||||
const GpuInfo& gpu_info) {
|
||||
ConvolutionTransposed4x4::WeightsUploadType weights_upload_type =
|
||||
ConvolutionTransposed4x4::WeightsUploadType::GLOBAL_MEM;
|
||||
if (gpu_info.IsPowerVR()) {
|
||||
weights_upload_type =
|
||||
ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_ASYNC;
|
||||
} else if (gpu_info.IsNvidia() || gpu_info.IsIntel()) {
|
||||
weights_upload_type =
|
||||
ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_BY_THREADS;
|
||||
} else if (gpu_info.IsAMD()) {
|
||||
weights_upload_type =
|
||||
ConvolutionTransposed4x4::WeightsUploadType::CONSTANT_MEM;
|
||||
} else {
|
||||
weights_upload_type =
|
||||
ConvolutionTransposed4x4::WeightsUploadType::GLOBAL_MEM;
|
||||
}
|
||||
return weights_upload_type;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
ConvolutionTransposed4x4::ConvolutionTransposed4x4(
|
||||
const OperationDef& definition, const GpuInfo& gpu_info,
|
||||
const ConvolutionTransposedAttributes& attr)
|
||||
const OperationDef& definition, const GpuInfo& gpu_info)
|
||||
: GPUOperation(definition) {
|
||||
work_group_size_ = int3(8, 4, 1);
|
||||
WeightsUploadType weights_upload_type = WeightsUploadType::GLOBAL_MEM;
|
||||
if (gpu_info.IsPowerVR()) {
|
||||
weights_upload_type = WeightsUploadType::LOCAL_MEM_ASYNC;
|
||||
} else if (gpu_info.IsNvidia() || gpu_info.IsIntel()) {
|
||||
weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
|
||||
} else if (gpu_info.IsAMD()) {
|
||||
weights_upload_type = WeightsUploadType::CONSTANT_MEM;
|
||||
} else {
|
||||
weights_upload_type = WeightsUploadType::GLOBAL_MEM;
|
||||
}
|
||||
|
||||
code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type);
|
||||
UploadWeights(attr.weights, weights_upload_type);
|
||||
code_ = GenerateConvolutionTransposedCode(definition_,
|
||||
GetBestWeightsUploadType(gpu_info));
|
||||
if (definition_.precision == CalculationsPrecision::F16 &&
|
||||
gpu_info.IsPowerVR()) {
|
||||
compiler_options_.push_back(CompilerOptions::kClPowervrFp16);
|
||||
@ -76,6 +87,19 @@ std::string ConvolutionTransposed4x4::GenerateConvolutionTransposedCode(
|
||||
}
|
||||
AddDstTensor("dst_tensor", dst_desc);
|
||||
|
||||
if (op_def.src_tensors.size() == 2) {
|
||||
// dynamic weights
|
||||
BufferDescriptor desc;
|
||||
desc.element_type = op_def.src_tensors[1].data_type;
|
||||
desc.element_size = 4;
|
||||
desc.memory_type =
|
||||
weights_upload_type ==
|
||||
ConvolutionTransposed4x4::WeightsUploadType::CONSTANT_MEM
|
||||
? MemoryType::CONSTANT
|
||||
: MemoryType::GLOBAL;
|
||||
AddSrcBuffer("weights", desc);
|
||||
}
|
||||
|
||||
args_.AddInt("filter_offset");
|
||||
|
||||
const bool need_local_mem =
|
||||
@ -338,7 +362,22 @@ bool IsConvolutionTransposed4x4Supported(
|
||||
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposed4x4 result(definition, gpu_info, attr);
|
||||
ConvolutionTransposed4x4 result(definition, gpu_info);
|
||||
result.UploadWeights(attr.weights, GetBestWeightsUploadType(gpu_info));
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result.args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return result;
|
||||
}
|
||||
|
||||
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4DynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr) {
|
||||
ConvolutionTransposed4x4 result(definition, gpu_info);
|
||||
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
|
@ -29,6 +29,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
|
||||
@ -54,6 +55,13 @@ class ConvolutionTransposed4x4 : public GPUOperation {
|
||||
ConvolutionTransposed4x4(const ConvolutionTransposed4x4&) = delete;
|
||||
ConvolutionTransposed4x4& operator=(const ConvolutionTransposed4x4&) = delete;
|
||||
|
||||
WeightsDescription GetWeightsDescription() const {
|
||||
WeightsDescription desc;
|
||||
desc.layout = WeightsLayout::kOICustomSSpatialI4O4;
|
||||
desc.spatial_remap = GetSpatialWeightsRemap();
|
||||
return desc;
|
||||
}
|
||||
|
||||
enum class WeightsUploadType {
|
||||
LOCAL_MEM_ASYNC,
|
||||
LOCAL_MEM_BY_THREADS,
|
||||
@ -63,11 +71,15 @@ class ConvolutionTransposed4x4 : public GPUOperation {
|
||||
|
||||
private:
|
||||
ConvolutionTransposed4x4(const OperationDef& definition,
|
||||
const GpuInfo& gpu_info,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
const GpuInfo& gpu_info);
|
||||
|
||||
friend ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
friend ConvolutionTransposed4x4 CreateConvolutionTransposed4x4DynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
template <DataType T>
|
||||
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
WeightsUploadType weights_upload_type);
|
||||
@ -124,6 +136,10 @@ ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4DynamicWeights(
|
||||
const GpuInfo& gpu_info, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
Loading…
Reference in New Issue
Block a user