Added create methods with dynamic weights for convolutions transposed.

PiperOrigin-RevId: 343595686
Change-Id: I90bd0b0a8221e3c419f0976f1569c612de193ec4
This commit is contained in:
Raman Sarokin 2020-11-20 17:34:30 -08:00 committed by TensorFlower Gardener
parent 5d8eae5093
commit bd60d491c2
7 changed files with 176 additions and 45 deletions

View File

@ -328,6 +328,7 @@ cc_library(
"//tensorflow/lite/delegates/gpu/common:tensor",
"//tensorflow/lite/delegates/gpu/common:types",
"//tensorflow/lite/delegates/gpu/common/task:weights_conversion",
"//tensorflow/lite/delegates/gpu/common/task:weights_layout",
],
)
@ -368,6 +369,7 @@ cc_library(
"//tensorflow/lite/delegates/gpu/common:tensor",
"//tensorflow/lite/delegates/gpu/common:types",
"//tensorflow/lite/delegates/gpu/common/task:weights_conversion",
"//tensorflow/lite/delegates/gpu/common/task:weights_layout",
],
)
@ -407,6 +409,7 @@ cc_library(
"//tensorflow/lite/delegates/gpu/common:tensor",
"//tensorflow/lite/delegates/gpu/common:types",
"//tensorflow/lite/delegates/gpu/common/task:weights_conversion",
"//tensorflow/lite/delegates/gpu/common/task:weights_layout",
],
)

View File

@ -80,6 +80,19 @@ std::string ConvolutionTransposed3x3::GenerateConvolutionTransposedCode(
}
AddDstTensor("dst_tensor", dst_desc);
if (op_def.src_tensors.size() == 2) {
// dynamic weights
BufferDescriptor desc;
desc.element_type = op_def.src_tensors[1].data_type;
desc.element_size = 4;
desc.memory_type =
weights_upload_type ==
ConvolutionTransposed3x3::WeightsUploadType::CONSTANT_MEM
? MemoryType::CONSTANT
: MemoryType::GLOBAL;
AddSrcBuffer("weights", desc);
}
args_.AddInt("filter_offset");
args_.AddInt("padding_x");
args_.AddInt("padding_y");
@ -389,6 +402,21 @@ ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
return result;
}
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3DynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h);
ConvolutionTransposed3x3 result(definition, gpu_info, padding);
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return result;
}
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
#include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
@ -52,6 +53,13 @@ class ConvolutionTransposed3x3 : public GPUOperation {
ConvolutionTransposed3x3(const ConvolutionTransposed3x3&) = delete;
ConvolutionTransposed3x3& operator=(const ConvolutionTransposed3x3&) = delete;
WeightsDescription GetWeightsDescription() const {
WeightsDescription desc;
desc.layout = WeightsLayout::kOICustomSSpatialI4O4;
desc.spatial_remap = GetSpatialWeightsRemap();
return desc;
}
enum class WeightsUploadType {
LOCAL_MEM_ASYNC,
LOCAL_MEM_BY_THREADS,
@ -65,6 +73,10 @@ class ConvolutionTransposed3x3 : public GPUOperation {
friend ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
friend ConvolutionTransposed3x3 CreateConvolutionTransposed3x3DynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
@ -124,6 +136,10 @@ ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
ConvolutionTransposed3x3 CreateConvolutionTransposed3x3DynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -53,6 +53,15 @@ std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode(
AddSrcTensor("src_tensor", src_desc);
AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
if (op_def.src_tensors.size() == 2) {
// dynamic weights
BufferDescriptor desc;
desc.element_type = op_def.src_tensors[1].data_type;
desc.element_size = 4;
desc.memory_type = MemoryType::CONSTANT;
AddSrcBuffer("weights", desc);
}
const auto src_tensor_type = op_def.src_tensors[0].storage_type;
std::string c = GetCommonDefines(op_def.precision);
@ -160,8 +169,7 @@ std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode(
for (int d = 0; d < dst_depth; ++d) {
const std::string layer = std::to_string(d);
c += " {\n";
c += " FLT4 bias_val = args.weights.Read(" +
std::to_string(36 * filters_index + d) + ");\n";
c += " FLT4 bias_val = args.biases.Read(" + layer + ");\n";
for (int y = 0; y < 2; ++y) {
for (int x = 0; x < 2; ++x) {
const std::string x_coord = "X + " + std::to_string(x);
@ -205,7 +213,28 @@ ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposed3x3Thin result(definition, attr);
result.UploadData(attr.weights, attr.bias);
result.UploadWeights(attr.weights);
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return result;
}
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3ThinDynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposed3x3Thin result(definition, attr);
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return result;
}

View File

@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
#include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
@ -49,16 +50,28 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
ConvolutionTransposed3x3Thin& operator=(const ConvolutionTransposed3x3Thin&) =
delete;
WeightsDescription GetWeightsDescription() const {
WeightsDescription desc;
desc.layout = WeightsLayout::kOICustomSSpatialI4O4;
desc.spatial_remap = GetSpatialWeightsRemap();
return desc;
}
private:
friend ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
explicit ConvolutionTransposed3x3Thin(
const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
friend ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
friend ConvolutionTransposed3x3Thin
CreateConvolutionTransposed3x3ThinDynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
void UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases);
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
std::vector<int> GetSpatialWeightsRemap() const;
@ -67,9 +80,8 @@ class ConvolutionTransposed3x3Thin : public GPUOperation {
};
template <DataType T>
void ConvolutionTransposed3x3Thin::UploadData(
const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases) {
void ConvolutionTransposed3x3Thin::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights) {
const int src_depth = DivideRoundUp(weights.shape.i, 4);
const int dst_depth = DivideRoundUp(weights.shape.o, 4);
const int kernel_x = 3; // This operation support only 3x3 kernel
@ -83,33 +95,17 @@ void ConvolutionTransposed3x3Thin::UploadData(
desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
desc.element_size = 4;
desc.memory_type = MemoryType::CONSTANT;
desc.size = flt4_size * (flt4_count + dst_depth);
desc.size = flt4_size * flt4_count;
desc.data.resize(desc.size);
if (f32_weights) {
float4* gpu_data = reinterpret_cast<float4*>(desc.data.data());
RearrangeWeightsToOICustomSpatialI4O4(weights, GetSpatialWeightsRemap(),
absl::MakeSpan(gpu_data, flt4_count));
for (int i = 0; i < dst_depth; ++i) {
float4 bias_value(0.0f);
for (int c = 0; c < 4; ++c) {
int ch = i * 4 + c;
bias_value[c] = ch < weights.shape.o ? biases.data[ch] : 0.0f;
}
gpu_data[flt4_count + i] = bias_value;
}
} else {
half4* gpu_data = reinterpret_cast<half4*>(desc.data.data());
RearrangeWeightsToOICustomSpatialI4O4(weights, GetSpatialWeightsRemap(),
absl::MakeSpan(gpu_data, flt4_count));
for (int i = 0; i < dst_depth; ++i) {
half4 bias_value(0.0f);
for (int c = 0; c < 4; ++c) {
int ch = i * 4 + c;
bias_value[c] = ch < weights.shape.o ? biases.data[ch] : 0.0f;
}
gpu_data[flt4_count + i] = bias_value;
}
}
args_.AddObject("weights",
@ -123,6 +119,10 @@ ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3ThinDynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -25,24 +25,35 @@ limitations under the License.
namespace tflite {
namespace gpu {
namespace cl {
namespace {
ConvolutionTransposed4x4::WeightsUploadType GetBestWeightsUploadType(
const GpuInfo& gpu_info) {
ConvolutionTransposed4x4::WeightsUploadType weights_upload_type =
ConvolutionTransposed4x4::WeightsUploadType::GLOBAL_MEM;
if (gpu_info.IsPowerVR()) {
weights_upload_type =
ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_ASYNC;
} else if (gpu_info.IsNvidia() || gpu_info.IsIntel()) {
weights_upload_type =
ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_BY_THREADS;
} else if (gpu_info.IsAMD()) {
weights_upload_type =
ConvolutionTransposed4x4::WeightsUploadType::CONSTANT_MEM;
} else {
weights_upload_type =
ConvolutionTransposed4x4::WeightsUploadType::GLOBAL_MEM;
}
return weights_upload_type;
}
} // namespace
ConvolutionTransposed4x4::ConvolutionTransposed4x4(
const OperationDef& definition, const GpuInfo& gpu_info,
const ConvolutionTransposedAttributes& attr)
const OperationDef& definition, const GpuInfo& gpu_info)
: GPUOperation(definition) {
work_group_size_ = int3(8, 4, 1);
WeightsUploadType weights_upload_type = WeightsUploadType::GLOBAL_MEM;
if (gpu_info.IsPowerVR()) {
weights_upload_type = WeightsUploadType::LOCAL_MEM_ASYNC;
} else if (gpu_info.IsNvidia() || gpu_info.IsIntel()) {
weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
} else if (gpu_info.IsAMD()) {
weights_upload_type = WeightsUploadType::CONSTANT_MEM;
} else {
weights_upload_type = WeightsUploadType::GLOBAL_MEM;
}
code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type);
UploadWeights(attr.weights, weights_upload_type);
code_ = GenerateConvolutionTransposedCode(definition_,
GetBestWeightsUploadType(gpu_info));
if (definition_.precision == CalculationsPrecision::F16 &&
gpu_info.IsPowerVR()) {
compiler_options_.push_back(CompilerOptions::kClPowervrFp16);
@ -76,6 +87,19 @@ std::string ConvolutionTransposed4x4::GenerateConvolutionTransposedCode(
}
AddDstTensor("dst_tensor", dst_desc);
if (op_def.src_tensors.size() == 2) {
// dynamic weights
BufferDescriptor desc;
desc.element_type = op_def.src_tensors[1].data_type;
desc.element_size = 4;
desc.memory_type =
weights_upload_type ==
ConvolutionTransposed4x4::WeightsUploadType::CONSTANT_MEM
? MemoryType::CONSTANT
: MemoryType::GLOBAL;
AddSrcBuffer("weights", desc);
}
args_.AddInt("filter_offset");
const bool need_local_mem =
@ -338,7 +362,22 @@ bool IsConvolutionTransposed4x4Supported(
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposed4x4 result(definition, gpu_info, attr);
ConvolutionTransposed4x4 result(definition, gpu_info);
result.UploadWeights(attr.weights, GetBestWeightsUploadType(gpu_info));
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return result;
}
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4DynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
ConvolutionTransposed4x4 result(definition, gpu_info);
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;

View File

@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
#include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
@ -54,6 +55,13 @@ class ConvolutionTransposed4x4 : public GPUOperation {
ConvolutionTransposed4x4(const ConvolutionTransposed4x4&) = delete;
ConvolutionTransposed4x4& operator=(const ConvolutionTransposed4x4&) = delete;
WeightsDescription GetWeightsDescription() const {
WeightsDescription desc;
desc.layout = WeightsLayout::kOICustomSSpatialI4O4;
desc.spatial_remap = GetSpatialWeightsRemap();
return desc;
}
enum class WeightsUploadType {
LOCAL_MEM_ASYNC,
LOCAL_MEM_BY_THREADS,
@ -63,11 +71,15 @@ class ConvolutionTransposed4x4 : public GPUOperation {
private:
ConvolutionTransposed4x4(const OperationDef& definition,
const GpuInfo& gpu_info,
const ConvolutionTransposedAttributes& attr);
const GpuInfo& gpu_info);
friend ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
friend ConvolutionTransposed4x4 CreateConvolutionTransposed4x4DynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
template <DataType T>
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
WeightsUploadType weights_upload_type);
@ -124,6 +136,10 @@ ConvolutionTransposed4x4 CreateConvolutionTransposed4x4(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
ConvolutionTransposed4x4 CreateConvolutionTransposed4x4DynamicWeights(
const GpuInfo& gpu_info, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr);
} // namespace cl
} // namespace gpu
} // namespace tflite