Rename DepthWise to Depthwise.
PiperOrigin-RevId: 304512174 Change-Id: I8c3f785a77bacac0203dfb6cf3380d1f64123136
This commit is contained in:
parent
7d4df951af
commit
2e5190cf43
@ -73,7 +73,7 @@ std::string GetSrcValue(const TensorCodeGenerator& src_tensor,
|
||||
return c;
|
||||
}
|
||||
|
||||
std::string GenerateDepthWiseConvolutionCode(
|
||||
std::string GenerateDepthwiseConvolutionCode(
|
||||
const OperationDef& op_def, bool stride_correction,
|
||||
const LinearStorage& biases, int channel_multiplier,
|
||||
bool weights_are_buffer,
|
||||
@ -179,7 +179,7 @@ std::string GenerateDepthWiseConvolutionCode(
|
||||
}
|
||||
} // namespace
|
||||
|
||||
DepthWiseConvolution::DepthWiseConvolution(
|
||||
DepthwiseConvolution::DepthwiseConvolution(
|
||||
const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer)
|
||||
: GPUOperation(definition),
|
||||
@ -191,7 +191,7 @@ DepthWiseConvolution::DepthWiseConvolution(
|
||||
channel_multiplier_(attr.weights.shape.o),
|
||||
work_group_size_(8, 8, 1) {}
|
||||
|
||||
DepthWiseConvolution::DepthWiseConvolution(DepthWiseConvolution&& operation)
|
||||
DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation)
|
||||
: GPUOperation(std::move(operation)),
|
||||
weights_are_buffer_(operation.weights_are_buffer_),
|
||||
weights_tex2d_(std::move(operation.weights_tex2d_)),
|
||||
@ -206,8 +206,8 @@ DepthWiseConvolution::DepthWiseConvolution(DepthWiseConvolution&& operation)
|
||||
kernel_(std::move(operation.kernel_)),
|
||||
work_group_size_(operation.work_group_size_) {}
|
||||
|
||||
DepthWiseConvolution& DepthWiseConvolution::operator=(
|
||||
DepthWiseConvolution&& operation) {
|
||||
DepthwiseConvolution& DepthwiseConvolution::operator=(
|
||||
DepthwiseConvolution&& operation) {
|
||||
if (this != &operation) {
|
||||
std::swap(weights_are_buffer_, operation.weights_are_buffer_);
|
||||
weights_tex2d_ = std::move(operation.weights_tex2d_);
|
||||
@ -226,11 +226,11 @@ DepthWiseConvolution& DepthWiseConvolution::operator=(
|
||||
return *this;
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution::Compile(
|
||||
absl::Status DepthwiseConvolution::Compile(
|
||||
const CreationContext& creation_context) {
|
||||
const bool stride_correction =
|
||||
definition_.IsBatchSupported() && stride_.x != 1;
|
||||
const auto code = GenerateDepthWiseConvolutionCode(
|
||||
const auto code = GenerateDepthwiseConvolutionCode(
|
||||
definition_, stride_correction, biases_, channel_multiplier_,
|
||||
weights_are_buffer_, linked_operations_, *creation_context.device);
|
||||
return creation_context.cache->GetOrCreateCLKernel(
|
||||
@ -238,7 +238,7 @@ absl::Status DepthWiseConvolution::Compile(
|
||||
*creation_context.device, &kernel_);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution::BindArguments() {
|
||||
absl::Status DepthwiseConvolution::BindArguments() {
|
||||
kernel_.ResetBindingCounter();
|
||||
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
|
||||
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
|
||||
@ -259,29 +259,29 @@ absl::Status DepthWiseConvolution::BindArguments() {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
int3 DepthWiseConvolution::GetGridSize() const {
|
||||
int3 DepthwiseConvolution::GetGridSize() const {
|
||||
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
|
||||
const int grid_y = dst_[0]->Height();
|
||||
const int grid_z = dst_[0]->Slices();
|
||||
return int3(grid_x, grid_y, grid_z);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution::Tune(const TuningParameters& params) {
|
||||
absl::Status DepthwiseConvolution::Tune(const TuningParameters& params) {
|
||||
RETURN_IF_ERROR(BindArguments());
|
||||
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution::AddToQueue(CLCommandQueue* queue) {
|
||||
absl::Status DepthwiseConvolution::AddToQueue(CLCommandQueue* queue) {
|
||||
RETURN_IF_ERROR(BindArguments());
|
||||
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
|
||||
}
|
||||
|
||||
absl::Status CreateDepthWiseConvolution(
|
||||
absl::Status CreateDepthwiseConvolution(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr,
|
||||
DepthWiseConvolution* result) {
|
||||
DepthwiseConvolution* result) {
|
||||
bool weights_are_buffer = creation_context.device->IsMali();
|
||||
*result = DepthWiseConvolution(definition, attr, weights_are_buffer);
|
||||
*result = DepthwiseConvolution(definition, attr, weights_are_buffer);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
LinearStorageCreateInfo create_info;
|
||||
|
@ -35,26 +35,26 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
class DepthWiseConvolution : public GPUOperation {
|
||||
class DepthwiseConvolution : public GPUOperation {
|
||||
public:
|
||||
DepthWiseConvolution() = default;
|
||||
DepthwiseConvolution() = default;
|
||||
absl::Status AddToQueue(CLCommandQueue* queue) override;
|
||||
absl::Status Tune(const TuningParameters& params) override;
|
||||
|
||||
absl::Status Compile(const CreationContext& creation_context) override;
|
||||
|
||||
// Move only
|
||||
DepthWiseConvolution(DepthWiseConvolution&& operation);
|
||||
DepthWiseConvolution& operator=(DepthWiseConvolution&& operation);
|
||||
DepthWiseConvolution(const DepthWiseConvolution&) = delete;
|
||||
DepthWiseConvolution& operator=(const DepthWiseConvolution&) = delete;
|
||||
DepthwiseConvolution(DepthwiseConvolution&& operation);
|
||||
DepthwiseConvolution& operator=(DepthwiseConvolution&& operation);
|
||||
DepthwiseConvolution(const DepthwiseConvolution&) = delete;
|
||||
DepthwiseConvolution& operator=(const DepthwiseConvolution&) = delete;
|
||||
|
||||
private:
|
||||
friend absl::Status CreateDepthWiseConvolution(
|
||||
friend absl::Status CreateDepthwiseConvolution(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr,
|
||||
DepthWiseConvolution* result);
|
||||
DepthWiseConvolution(const OperationDef& definition,
|
||||
DepthwiseConvolution* result);
|
||||
DepthwiseConvolution(const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr,
|
||||
bool weights_are_buffer);
|
||||
template <DataType T>
|
||||
@ -86,7 +86,7 @@ class DepthWiseConvolution : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status DepthWiseConvolution::UploadWeights(
|
||||
absl::Status DepthwiseConvolution::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
|
||||
const int dst_channels = weights.shape.i * weights.shape.o;
|
||||
const int dst_depth = IntegralDivideRoundUp(dst_channels, 4);
|
||||
@ -134,7 +134,7 @@ absl::Status DepthWiseConvolution::UploadWeights(
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
void DepthWiseConvolution::RearrangeWeightsData(
|
||||
void DepthwiseConvolution::RearrangeWeightsData(
|
||||
const tflite::gpu::Tensor<OHWI, S>& weights, absl::Span<T> dst) {
|
||||
const int dst_channels = weights.shape.i * weights.shape.o;
|
||||
const int dst_depth = IntegralDivideRoundUp(dst_channels, 4);
|
||||
@ -162,9 +162,9 @@ void DepthWiseConvolution::RearrangeWeightsData(
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CreateDepthWiseConvolution(
|
||||
absl::Status CreateDepthwiseConvolution(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthWiseConvolution* result);
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthwiseConvolution* result);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -79,7 +79,7 @@ std::string GetSrcValue(const TensorCodeGenerator& src_tensor,
|
||||
return c;
|
||||
}
|
||||
|
||||
std::string GenerateDepthWiseConvolution3DCode(
|
||||
std::string GenerateDepthwiseConvolution3DCode(
|
||||
const OperationDef& op_def, bool stride_correction,
|
||||
const LinearStorage& biases, int channel_multiplier,
|
||||
bool weights_are_buffer,
|
||||
@ -208,7 +208,7 @@ std::string GenerateDepthWiseConvolution3DCode(
|
||||
}
|
||||
} // namespace
|
||||
|
||||
DepthWiseConvolution3D::DepthWiseConvolution3D(
|
||||
DepthwiseConvolution3D::DepthwiseConvolution3D(
|
||||
const OperationDef& definition,
|
||||
const DepthwiseConvolution3DAttributes& attr, const CLDevice& device)
|
||||
: GPUOperation(definition),
|
||||
@ -222,8 +222,8 @@ DepthWiseConvolution3D::DepthWiseConvolution3D(
|
||||
channel_multiplier_(attr.weights.shape.o),
|
||||
work_group_size_(8, 8, 1) {}
|
||||
|
||||
DepthWiseConvolution3D::DepthWiseConvolution3D(
|
||||
DepthWiseConvolution3D&& operation)
|
||||
DepthwiseConvolution3D::DepthwiseConvolution3D(
|
||||
DepthwiseConvolution3D&& operation)
|
||||
: GPUOperation(std::move(operation)),
|
||||
weights_tex2d_(std::move(operation.weights_tex2d_)),
|
||||
weights_buf_(std::move(operation.weights_buf_)),
|
||||
@ -237,8 +237,8 @@ DepthWiseConvolution3D::DepthWiseConvolution3D(
|
||||
kernel_(std::move(operation.kernel_)),
|
||||
work_group_size_(operation.work_group_size_) {}
|
||||
|
||||
DepthWiseConvolution3D& DepthWiseConvolution3D::operator=(
|
||||
DepthWiseConvolution3D&& operation) {
|
||||
DepthwiseConvolution3D& DepthwiseConvolution3D::operator=(
|
||||
DepthwiseConvolution3D&& operation) {
|
||||
if (this != &operation) {
|
||||
weights_tex2d_ = std::move(operation.weights_tex2d_);
|
||||
weights_buf_ = std::move(operation.weights_buf_);
|
||||
@ -256,11 +256,11 @@ DepthWiseConvolution3D& DepthWiseConvolution3D::operator=(
|
||||
return *this;
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution3D::Compile(
|
||||
absl::Status DepthwiseConvolution3D::Compile(
|
||||
const CreationContext& creation_context) {
|
||||
const bool stride_correction =
|
||||
definition_.IsBatchSupported() && stride_.x != 1;
|
||||
const auto code = GenerateDepthWiseConvolution3DCode(
|
||||
const auto code = GenerateDepthwiseConvolution3DCode(
|
||||
definition_, stride_correction, biases_, channel_multiplier_,
|
||||
weights_are_buffer_, linked_operations_, *creation_context.device);
|
||||
return creation_context.cache->GetOrCreateCLKernel(
|
||||
@ -268,7 +268,7 @@ absl::Status DepthWiseConvolution3D::Compile(
|
||||
*creation_context.device, &kernel_);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution3D::BindArguments() {
|
||||
absl::Status DepthwiseConvolution3D::BindArguments() {
|
||||
kernel_.ResetBindingCounter();
|
||||
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
|
||||
if (weights_are_buffer_) {
|
||||
@ -298,28 +298,28 @@ absl::Status DepthWiseConvolution3D::BindArguments() {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
int3 DepthWiseConvolution3D::GetGridSize() const {
|
||||
int3 DepthwiseConvolution3D::GetGridSize() const {
|
||||
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
|
||||
const int grid_y = dst_[0]->Height();
|
||||
const int grid_z = dst_[0]->Slices() * dst_[0]->Depth();
|
||||
return int3(grid_x, grid_y, grid_z);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution3D::Tune(const TuningParameters& params) {
|
||||
absl::Status DepthwiseConvolution3D::Tune(const TuningParameters& params) {
|
||||
RETURN_IF_ERROR(BindArguments());
|
||||
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConvolution3D::AddToQueue(CLCommandQueue* queue) {
|
||||
absl::Status DepthwiseConvolution3D::AddToQueue(CLCommandQueue* queue) {
|
||||
RETURN_IF_ERROR(BindArguments());
|
||||
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
|
||||
}
|
||||
|
||||
absl::Status CreateDepthWiseConvolution3D(
|
||||
absl::Status CreateDepthwiseConvolution3D(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution3DAttributes& attr,
|
||||
DepthWiseConvolution3D* result) {
|
||||
*result = DepthWiseConvolution3D(definition, attr, *creation_context.device);
|
||||
DepthwiseConvolution3D* result) {
|
||||
*result = DepthwiseConvolution3D(definition, attr, *creation_context.device);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
LinearStorageCreateInfo create_info;
|
||||
|
@ -35,26 +35,26 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
class DepthWiseConvolution3D : public GPUOperation {
|
||||
class DepthwiseConvolution3D : public GPUOperation {
|
||||
public:
|
||||
DepthWiseConvolution3D() = default;
|
||||
DepthwiseConvolution3D() = default;
|
||||
absl::Status AddToQueue(CLCommandQueue* queue) override;
|
||||
absl::Status Tune(const TuningParameters& params) override;
|
||||
|
||||
absl::Status Compile(const CreationContext& creation_context) override;
|
||||
|
||||
// Move only
|
||||
DepthWiseConvolution3D(DepthWiseConvolution3D&& operation);
|
||||
DepthWiseConvolution3D& operator=(DepthWiseConvolution3D&& operation);
|
||||
DepthWiseConvolution3D(const DepthWiseConvolution3D&) = delete;
|
||||
DepthWiseConvolution3D& operator=(const DepthWiseConvolution3D&) = delete;
|
||||
DepthwiseConvolution3D(DepthwiseConvolution3D&& operation);
|
||||
DepthwiseConvolution3D& operator=(DepthwiseConvolution3D&& operation);
|
||||
DepthwiseConvolution3D(const DepthwiseConvolution3D&) = delete;
|
||||
DepthwiseConvolution3D& operator=(const DepthwiseConvolution3D&) = delete;
|
||||
|
||||
private:
|
||||
friend absl::Status CreateDepthWiseConvolution3D(
|
||||
friend absl::Status CreateDepthwiseConvolution3D(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution3DAttributes& attr,
|
||||
DepthWiseConvolution3D* result);
|
||||
DepthWiseConvolution3D(const OperationDef& definition,
|
||||
DepthwiseConvolution3D* result);
|
||||
DepthwiseConvolution3D(const OperationDef& definition,
|
||||
const DepthwiseConvolution3DAttributes& attr,
|
||||
const CLDevice& device);
|
||||
template <DataType T>
|
||||
@ -85,7 +85,7 @@ class DepthWiseConvolution3D : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status DepthWiseConvolution3D::UploadWeights(
|
||||
absl::Status DepthwiseConvolution3D::UploadWeights(
|
||||
const tflite::gpu::Tensor<OHWDI, T>& weights, CLContext* context) {
|
||||
const int dst_channels = weights.shape.i * weights.shape.o;
|
||||
const int dst_slices = IntegralDivideRoundUp(dst_channels, 4);
|
||||
@ -127,7 +127,7 @@ absl::Status DepthWiseConvolution3D::UploadWeights(
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
void DepthWiseConvolution3D::RearrangeWeightsData(
|
||||
void DepthwiseConvolution3D::RearrangeWeightsData(
|
||||
const tflite::gpu::Tensor<OHWDI, S>& weights, absl::Span<T> dst) {
|
||||
const int dst_channels = weights.shape.i * weights.shape.o;
|
||||
const int dst_slices = IntegralDivideRoundUp(dst_channels, 4);
|
||||
@ -158,10 +158,10 @@ void DepthWiseConvolution3D::RearrangeWeightsData(
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CreateDepthWiseConvolution3D(
|
||||
absl::Status CreateDepthwiseConvolution3D(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution3DAttributes& attr,
|
||||
DepthWiseConvolution3D* result);
|
||||
DepthwiseConvolution3D* result);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -28,7 +28,7 @@ namespace gpu {
|
||||
namespace cl {
|
||||
namespace {
|
||||
|
||||
std::string GenerateDepthWiseConvCode(
|
||||
std::string GenerateDepthwiseConvCode(
|
||||
const OperationDef& op_def,
|
||||
const std::vector<ElementwiseOperation*>& linked_operations,
|
||||
const CLDevice& device, bool weights_are_buffer, bool local_mem_uploads) {
|
||||
@ -266,14 +266,14 @@ std::string GenerateDepthWiseConvCode(
|
||||
|
||||
} // namespace
|
||||
|
||||
DepthWiseConv3x3::DepthWiseConv3x3(const OperationDef& definition,
|
||||
DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition,
|
||||
bool weights_are_buffer,
|
||||
bool local_mem_uploads)
|
||||
: GPUOperation(definition),
|
||||
weights_are_buffer_(weights_are_buffer),
|
||||
local_mem_uploads_(local_mem_uploads) {}
|
||||
|
||||
DepthWiseConv3x3::DepthWiseConv3x3(DepthWiseConv3x3&& operation)
|
||||
DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3&& operation)
|
||||
: GPUOperation(std::move(operation)),
|
||||
weights_are_buffer_(operation.weights_are_buffer_),
|
||||
local_mem_uploads_(operation.local_mem_uploads_),
|
||||
@ -283,7 +283,7 @@ DepthWiseConv3x3::DepthWiseConv3x3(DepthWiseConv3x3&& operation)
|
||||
kernel_(std::move(operation.kernel_)),
|
||||
work_group_size_(operation.work_group_size_) {}
|
||||
|
||||
DepthWiseConv3x3& DepthWiseConv3x3::operator=(DepthWiseConv3x3&& operation) {
|
||||
DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) {
|
||||
if (this != &operation) {
|
||||
std::swap(weights_are_buffer_, operation.weights_are_buffer_);
|
||||
std::swap(local_mem_uploads_, operation.local_mem_uploads_);
|
||||
@ -297,9 +297,9 @@ DepthWiseConv3x3& DepthWiseConv3x3::operator=(DepthWiseConv3x3&& operation) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConv3x3::Compile(
|
||||
absl::Status DepthwiseConv3x3::Compile(
|
||||
const CreationContext& creation_context) {
|
||||
std::string code = GenerateDepthWiseConvCode(
|
||||
std::string code = GenerateDepthwiseConvCode(
|
||||
definition_, linked_operations_, *creation_context.device,
|
||||
weights_are_buffer_, local_mem_uploads_);
|
||||
std::vector<CompilerOptions> options;
|
||||
@ -312,7 +312,7 @@ absl::Status DepthWiseConv3x3::Compile(
|
||||
*creation_context.device, &kernel_);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConv3x3::BindArguments() {
|
||||
absl::Status DepthwiseConv3x3::BindArguments() {
|
||||
kernel_.ResetBindingCounter();
|
||||
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
|
||||
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
|
||||
@ -322,14 +322,14 @@ absl::Status DepthWiseConv3x3::BindArguments() {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
int3 DepthWiseConv3x3::GetGridSize() const {
|
||||
int3 DepthwiseConv3x3::GetGridSize() const {
|
||||
const int grid_x = IntegralDivideRoundUp(dst_[0]->Width(), 2);
|
||||
const int grid_y = IntegralDivideRoundUp(dst_[0]->Height(), 2);
|
||||
const int grid_z = dst_[0]->Slices();
|
||||
return int3(grid_x, grid_y, grid_z);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConv3x3::Tune(const TuningParameters& params) {
|
||||
absl::Status DepthwiseConv3x3::Tune(const TuningParameters& params) {
|
||||
if (local_mem_uploads_) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
@ -337,12 +337,12 @@ absl::Status DepthWiseConv3x3::Tune(const TuningParameters& params) {
|
||||
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
|
||||
}
|
||||
|
||||
absl::Status DepthWiseConv3x3::AddToQueue(CLCommandQueue* queue) {
|
||||
absl::Status DepthwiseConv3x3::AddToQueue(CLCommandQueue* queue) {
|
||||
RETURN_IF_ERROR(BindArguments());
|
||||
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
|
||||
}
|
||||
|
||||
bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
|
||||
bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
|
||||
return attr.weights.shape.o == 1 && attr.dilations.w == 1 &&
|
||||
attr.dilations.h == 1 && attr.weights.shape.w == 3 &&
|
||||
attr.weights.shape.h == 3 && attr.strides.w == 1 &&
|
||||
@ -351,18 +351,18 @@ bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
|
||||
attr.padding.appended.h == 1;
|
||||
}
|
||||
|
||||
absl::Status CreateDepthWiseConv3x3(
|
||||
absl::Status CreateDepthwiseConv3x3(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result) {
|
||||
if (!IsDepthWiseConv3x3Supported(attr)) {
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthwiseConv3x3* result) {
|
||||
if (!IsDepthwiseConv3x3Supported(attr)) {
|
||||
return absl::InvalidArgumentError(
|
||||
"DepthWiseConv3x3 doesn't support this attributes");
|
||||
"DepthwiseConv3x3 doesn't support this attributes");
|
||||
}
|
||||
bool weights_are_buffer =
|
||||
creation_context.device->IsPowerVR() || creation_context.device->IsMali();
|
||||
bool local_mem_uploads =
|
||||
weights_are_buffer && creation_context.device->IsPowerVR();
|
||||
*result = DepthWiseConv3x3(definition, weights_are_buffer, local_mem_uploads);
|
||||
*result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads);
|
||||
return result->UploadWeightsAndBiases(attr.weights, attr.bias,
|
||||
creation_context.context);
|
||||
}
|
||||
|
@ -35,31 +35,31 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
class DepthWiseConv3x3 : public GPUOperation {
|
||||
class DepthwiseConv3x3 : public GPUOperation {
|
||||
public:
|
||||
DepthWiseConv3x3() = default;
|
||||
DepthwiseConv3x3() = default;
|
||||
absl::Status AddToQueue(CLCommandQueue* queue) override;
|
||||
absl::Status Tune(const TuningParameters& params) override;
|
||||
|
||||
absl::Status Compile(const CreationContext& creation_context) override;
|
||||
|
||||
// Move only
|
||||
DepthWiseConv3x3(DepthWiseConv3x3&& operation);
|
||||
DepthWiseConv3x3& operator=(DepthWiseConv3x3&& operation);
|
||||
DepthWiseConv3x3(const DepthWiseConv3x3&) = delete;
|
||||
DepthWiseConv3x3& operator=(const DepthWiseConv3x3&) = delete;
|
||||
DepthwiseConv3x3(DepthwiseConv3x3&& operation);
|
||||
DepthwiseConv3x3& operator=(DepthwiseConv3x3&& operation);
|
||||
DepthwiseConv3x3(const DepthwiseConv3x3&) = delete;
|
||||
DepthwiseConv3x3& operator=(const DepthwiseConv3x3&) = delete;
|
||||
|
||||
private:
|
||||
explicit DepthWiseConv3x3(const OperationDef& definition,
|
||||
explicit DepthwiseConv3x3(const OperationDef& definition,
|
||||
bool weights_are_buffer, bool local_mem_uploads);
|
||||
template <DataType T>
|
||||
absl::Status UploadWeightsAndBiases(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context);
|
||||
|
||||
friend absl::Status CreateDepthWiseConv3x3(
|
||||
friend absl::Status CreateDepthwiseConv3x3(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result);
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthwiseConv3x3* result);
|
||||
|
||||
template <DataType S, typename T>
|
||||
void RearrangeWeightsAndBiasesData(
|
||||
@ -80,7 +80,7 @@ class DepthWiseConv3x3 : public GPUOperation {
|
||||
};
|
||||
|
||||
template <DataType T>
|
||||
absl::Status DepthWiseConv3x3::UploadWeightsAndBiases(
|
||||
absl::Status DepthwiseConv3x3::UploadWeightsAndBiases(
|
||||
const tflite::gpu::Tensor<OHWI, T>& weights,
|
||||
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
|
||||
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
|
||||
@ -126,7 +126,7 @@ absl::Status DepthWiseConv3x3::UploadWeightsAndBiases(
|
||||
}
|
||||
|
||||
template <DataType S, typename T>
|
||||
void DepthWiseConv3x3::RearrangeWeightsAndBiasesData(
|
||||
void DepthwiseConv3x3::RearrangeWeightsAndBiasesData(
|
||||
const tflite::gpu::Tensor<OHWI, S>& weights,
|
||||
const tflite::gpu::Tensor<Linear, S>& biases, absl::Span<T> dst) {
|
||||
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
|
||||
@ -158,11 +158,11 @@ void DepthWiseConv3x3::RearrangeWeightsAndBiasesData(
|
||||
}
|
||||
}
|
||||
|
||||
bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr);
|
||||
bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr);
|
||||
|
||||
absl::Status CreateDepthWiseConv3x3(
|
||||
absl::Status CreateDepthwiseConv3x3(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result);
|
||||
const DepthwiseConvolution2DAttributes& attr, DepthwiseConv3x3* result);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
|
@ -31,7 +31,7 @@ namespace gpu {
|
||||
namespace cl {
|
||||
namespace {
|
||||
|
||||
TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
|
||||
TEST_F(OpenCLOperationTest, DepthwiseConv3x3SimpleWeights) {
|
||||
TensorFloat32 src_tensor;
|
||||
src_tensor.shape = BHWC(1, 2, 2, 2);
|
||||
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
|
||||
@ -56,9 +56,9 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
DepthWiseConv3x3 operation;
|
||||
DepthwiseConv3x3 operation;
|
||||
ASSERT_OK(
|
||||
CreateDepthWiseConv3x3(creation_context_, op_def, attr, &operation));
|
||||
CreateDepthwiseConv3x3(creation_context_, op_def, attr, &operation));
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 2, 2), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data,
|
||||
@ -68,7 +68,7 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(OpenCLOperationTest, DepthWiseConv3x3) {
|
||||
TEST_F(OpenCLOperationTest, DepthwiseConv3x3) {
|
||||
TensorFloat32 src_tensor;
|
||||
src_tensor.shape = BHWC(1, 2, 2, 2);
|
||||
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
|
||||
@ -93,9 +93,9 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
DepthWiseConv3x3 operation;
|
||||
DepthwiseConv3x3 operation;
|
||||
ASSERT_OK(
|
||||
CreateDepthWiseConv3x3(creation_context_, op_def, attr, &operation));
|
||||
CreateDepthwiseConv3x3(creation_context_, op_def, attr, &operation));
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 2, 2), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data,
|
||||
|
@ -31,7 +31,7 @@ namespace gpu {
|
||||
namespace cl {
|
||||
namespace {
|
||||
|
||||
TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
|
||||
TEST_F(OpenCLOperationTest, DepthwiseConvSimpleWeights) {
|
||||
TensorFloat32 src_tensor;
|
||||
src_tensor.shape = BHWC(1, 2, 2, 2);
|
||||
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
|
||||
@ -55,8 +55,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
DepthWiseConvolution operation;
|
||||
ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
|
||||
DepthwiseConvolution operation;
|
||||
ASSERT_OK(CreateDepthwiseConvolution(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 2, 2), &dst_tensor));
|
||||
@ -67,7 +67,7 @@ TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
|
||||
TEST_F(OpenCLOperationTest, DepthwiseConvNoMultiplier) {
|
||||
TensorFloat32 src_tensor;
|
||||
src_tensor.shape = BHWC(1, 2, 2, 2);
|
||||
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
|
||||
@ -91,8 +91,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
DepthWiseConvolution operation;
|
||||
ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
|
||||
DepthwiseConvolution operation;
|
||||
ASSERT_OK(CreateDepthwiseConvolution(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 2, 2), &dst_tensor));
|
||||
@ -103,7 +103,7 @@ TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(OpenCLOperationTest, DepthWiseConvMultiplier2) {
|
||||
TEST_F(OpenCLOperationTest, DepthwiseConvMultiplier2) {
|
||||
TensorFloat32 src_tensor;
|
||||
src_tensor.shape = BHWC(1, 2, 2, 2);
|
||||
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
|
||||
@ -128,8 +128,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvMultiplier2) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
DepthWiseConvolution operation;
|
||||
ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
|
||||
DepthwiseConvolution operation;
|
||||
ASSERT_OK(CreateDepthwiseConvolution(creation_context_, op_def, attr,
|
||||
&operation));
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 2, 4), &dst_tensor));
|
||||
|
@ -28,7 +28,7 @@ enum class CalculationsPrecision { F32, F32_F16, F16 };
|
||||
// F32 - all data and all math ops in F32
|
||||
// F16 - all data and all math ops in F16
|
||||
// F32_F16 - as F16, but some operations (Convolution,
|
||||
// DepthWiseConvolution, FullyConnected, ConvolutionTransposed)
|
||||
// DepthwiseConvolution, FullyConnected, ConvolutionTransposed)
|
||||
// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
|
||||
// than converts this partial sum to F32 and add to accumulator.
|
||||
|
||||
|
@ -30,16 +30,16 @@ absl::Status SelectDWConvolutionAdreno(
|
||||
const DepthwiseConvolution2DAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
if (!op_def.IsBatchSupported() && IsDepthWiseConv3x3Supported(attr)) {
|
||||
DepthWiseConv3x3 dw_conv;
|
||||
if (!op_def.IsBatchSupported() && IsDepthwiseConv3x3Supported(attr)) {
|
||||
DepthwiseConv3x3 dw_conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateDepthWiseConv3x3(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthWiseConv3x3>(std::move(dw_conv));
|
||||
CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthwiseConv3x3>(std::move(dw_conv));
|
||||
} else {
|
||||
DepthWiseConvolution dw_conv;
|
||||
DepthwiseConvolution dw_conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateDepthWiseConvolution(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthWiseConvolution>(std::move(dw_conv));
|
||||
CreateDepthwiseConvolution(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthwiseConvolution>(std::move(dw_conv));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
@ -48,16 +48,16 @@ absl::Status SelectDWConvolutionPowerVR(
|
||||
const DepthwiseConvolution2DAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
if (!op_def.IsBatchSupported() && IsDepthWiseConv3x3Supported(attr)) {
|
||||
DepthWiseConv3x3 dw_conv;
|
||||
if (!op_def.IsBatchSupported() && IsDepthwiseConv3x3Supported(attr)) {
|
||||
DepthwiseConv3x3 dw_conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateDepthWiseConv3x3(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthWiseConv3x3>(std::move(dw_conv));
|
||||
CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthwiseConv3x3>(std::move(dw_conv));
|
||||
} else {
|
||||
DepthWiseConvolution dw_conv;
|
||||
DepthwiseConvolution dw_conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateDepthWiseConvolution(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthWiseConvolution>(std::move(dw_conv));
|
||||
CreateDepthwiseConvolution(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthwiseConvolution>(std::move(dw_conv));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
@ -70,18 +70,18 @@ absl::Status SelectDWConvolutionMali(
|
||||
bool buffer_type = storage_type == TensorStorageType::BUFFER ||
|
||||
storage_type == TensorStorageType::IMAGE_BUFFER;
|
||||
MaliInfo mali_info = creation_context.device->GetInfo().mali_info;
|
||||
if (IsDepthWiseConv3x3Supported(attr) && !mali_info.IsMidgard() &&
|
||||
if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() &&
|
||||
!buffer_type && !op_def.IsBatchSupported() &&
|
||||
op_def.precision != CalculationsPrecision::F32) {
|
||||
DepthWiseConv3x3 dw_conv;
|
||||
DepthwiseConv3x3 dw_conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateDepthWiseConv3x3(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthWiseConv3x3>(std::move(dw_conv));
|
||||
CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthwiseConv3x3>(std::move(dw_conv));
|
||||
} else {
|
||||
DepthWiseConvolution dw_conv;
|
||||
DepthwiseConvolution dw_conv;
|
||||
RETURN_IF_ERROR(
|
||||
CreateDepthWiseConvolution(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthWiseConvolution>(std::move(dw_conv));
|
||||
CreateDepthwiseConvolution(creation_context, op_def, attr, &dw_conv));
|
||||
*ptr = absl::make_unique<DepthwiseConvolution>(std::move(dw_conv));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user