Rename DepthWise to Depthwise.

PiperOrigin-RevId: 304512174
Change-Id: I8c3f785a77bacac0203dfb6cf3380d1f64123136
This commit is contained in:
Juhyun Lee 2020-04-02 17:35:10 -07:00 committed by TensorFlower Gardener
parent 7d4df951af
commit 2e5190cf43
10 changed files with 122 additions and 122 deletions

View File

@ -73,7 +73,7 @@ std::string GetSrcValue(const TensorCodeGenerator& src_tensor,
return c;
}
std::string GenerateDepthWiseConvolutionCode(
std::string GenerateDepthwiseConvolutionCode(
const OperationDef& op_def, bool stride_correction,
const LinearStorage& biases, int channel_multiplier,
bool weights_are_buffer,
@ -179,7 +179,7 @@ std::string GenerateDepthWiseConvolutionCode(
}
} // namespace
DepthWiseConvolution::DepthWiseConvolution(
DepthwiseConvolution::DepthwiseConvolution(
const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer)
: GPUOperation(definition),
@ -191,7 +191,7 @@ DepthWiseConvolution::DepthWiseConvolution(
channel_multiplier_(attr.weights.shape.o),
work_group_size_(8, 8, 1) {}
DepthWiseConvolution::DepthWiseConvolution(DepthWiseConvolution&& operation)
DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation)
: GPUOperation(std::move(operation)),
weights_are_buffer_(operation.weights_are_buffer_),
weights_tex2d_(std::move(operation.weights_tex2d_)),
@ -206,8 +206,8 @@ DepthWiseConvolution::DepthWiseConvolution(DepthWiseConvolution&& operation)
kernel_(std::move(operation.kernel_)),
work_group_size_(operation.work_group_size_) {}
DepthWiseConvolution& DepthWiseConvolution::operator=(
DepthWiseConvolution&& operation) {
DepthwiseConvolution& DepthwiseConvolution::operator=(
DepthwiseConvolution&& operation) {
if (this != &operation) {
std::swap(weights_are_buffer_, operation.weights_are_buffer_);
weights_tex2d_ = std::move(operation.weights_tex2d_);
@ -226,11 +226,11 @@ DepthWiseConvolution& DepthWiseConvolution::operator=(
return *this;
}
absl::Status DepthWiseConvolution::Compile(
absl::Status DepthwiseConvolution::Compile(
const CreationContext& creation_context) {
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
const auto code = GenerateDepthWiseConvolutionCode(
const auto code = GenerateDepthwiseConvolutionCode(
definition_, stride_correction, biases_, channel_multiplier_,
weights_are_buffer_, linked_operations_, *creation_context.device);
return creation_context.cache->GetOrCreateCLKernel(
@ -238,7 +238,7 @@ absl::Status DepthWiseConvolution::Compile(
*creation_context.device, &kernel_);
}
absl::Status DepthWiseConvolution::BindArguments() {
absl::Status DepthwiseConvolution::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
@ -259,29 +259,29 @@ absl::Status DepthWiseConvolution::BindArguments() {
return absl::OkStatus();
}
int3 DepthWiseConvolution::GetGridSize() const {
int3 DepthwiseConvolution::GetGridSize() const {
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
const int grid_y = dst_[0]->Height();
const int grid_z = dst_[0]->Slices();
return int3(grid_x, grid_y, grid_z);
}
absl::Status DepthWiseConvolution::Tune(const TuningParameters& params) {
absl::Status DepthwiseConvolution::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status DepthWiseConvolution::AddToQueue(CLCommandQueue* queue) {
absl::Status DepthwiseConvolution::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status CreateDepthWiseConvolution(
absl::Status CreateDepthwiseConvolution(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConvolution* result) {
DepthwiseConvolution* result) {
bool weights_are_buffer = creation_context.device->IsMali();
*result = DepthWiseConvolution(definition, attr, weights_are_buffer);
*result = DepthwiseConvolution(definition, attr, weights_are_buffer);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
LinearStorageCreateInfo create_info;

View File

@ -35,26 +35,26 @@ namespace tflite {
namespace gpu {
namespace cl {
class DepthWiseConvolution : public GPUOperation {
class DepthwiseConvolution : public GPUOperation {
public:
DepthWiseConvolution() = default;
DepthwiseConvolution() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
DepthWiseConvolution(DepthWiseConvolution&& operation);
DepthWiseConvolution& operator=(DepthWiseConvolution&& operation);
DepthWiseConvolution(const DepthWiseConvolution&) = delete;
DepthWiseConvolution& operator=(const DepthWiseConvolution&) = delete;
DepthwiseConvolution(DepthwiseConvolution&& operation);
DepthwiseConvolution& operator=(DepthwiseConvolution&& operation);
DepthwiseConvolution(const DepthwiseConvolution&) = delete;
DepthwiseConvolution& operator=(const DepthwiseConvolution&) = delete;
private:
friend absl::Status CreateDepthWiseConvolution(
friend absl::Status CreateDepthwiseConvolution(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
DepthWiseConvolution* result);
DepthWiseConvolution(const OperationDef& definition,
DepthwiseConvolution* result);
DepthwiseConvolution(const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
bool weights_are_buffer);
template <DataType T>
@ -86,7 +86,7 @@ class DepthWiseConvolution : public GPUOperation {
};
template <DataType T>
absl::Status DepthWiseConvolution::UploadWeights(
absl::Status DepthwiseConvolution::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_depth = IntegralDivideRoundUp(dst_channels, 4);
@ -134,7 +134,7 @@ absl::Status DepthWiseConvolution::UploadWeights(
}
template <DataType S, typename T>
void DepthWiseConvolution::RearrangeWeightsData(
void DepthwiseConvolution::RearrangeWeightsData(
const tflite::gpu::Tensor<OHWI, S>& weights, absl::Span<T> dst) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_depth = IntegralDivideRoundUp(dst_channels, 4);
@ -162,9 +162,9 @@ void DepthWiseConvolution::RearrangeWeightsData(
}
}
absl::Status CreateDepthWiseConvolution(
absl::Status CreateDepthwiseConvolution(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConvolution* result);
const DepthwiseConvolution2DAttributes& attr, DepthwiseConvolution* result);
} // namespace cl
} // namespace gpu

View File

@ -79,7 +79,7 @@ std::string GetSrcValue(const TensorCodeGenerator& src_tensor,
return c;
}
std::string GenerateDepthWiseConvolution3DCode(
std::string GenerateDepthwiseConvolution3DCode(
const OperationDef& op_def, bool stride_correction,
const LinearStorage& biases, int channel_multiplier,
bool weights_are_buffer,
@ -208,7 +208,7 @@ std::string GenerateDepthWiseConvolution3DCode(
}
} // namespace
DepthWiseConvolution3D::DepthWiseConvolution3D(
DepthwiseConvolution3D::DepthwiseConvolution3D(
const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr, const CLDevice& device)
: GPUOperation(definition),
@ -222,8 +222,8 @@ DepthWiseConvolution3D::DepthWiseConvolution3D(
channel_multiplier_(attr.weights.shape.o),
work_group_size_(8, 8, 1) {}
DepthWiseConvolution3D::DepthWiseConvolution3D(
DepthWiseConvolution3D&& operation)
DepthwiseConvolution3D::DepthwiseConvolution3D(
DepthwiseConvolution3D&& operation)
: GPUOperation(std::move(operation)),
weights_tex2d_(std::move(operation.weights_tex2d_)),
weights_buf_(std::move(operation.weights_buf_)),
@ -237,8 +237,8 @@ DepthWiseConvolution3D::DepthWiseConvolution3D(
kernel_(std::move(operation.kernel_)),
work_group_size_(operation.work_group_size_) {}
DepthWiseConvolution3D& DepthWiseConvolution3D::operator=(
DepthWiseConvolution3D&& operation) {
DepthwiseConvolution3D& DepthwiseConvolution3D::operator=(
DepthwiseConvolution3D&& operation) {
if (this != &operation) {
weights_tex2d_ = std::move(operation.weights_tex2d_);
weights_buf_ = std::move(operation.weights_buf_);
@ -256,11 +256,11 @@ DepthWiseConvolution3D& DepthWiseConvolution3D::operator=(
return *this;
}
absl::Status DepthWiseConvolution3D::Compile(
absl::Status DepthwiseConvolution3D::Compile(
const CreationContext& creation_context) {
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
const auto code = GenerateDepthWiseConvolution3DCode(
const auto code = GenerateDepthwiseConvolution3DCode(
definition_, stride_correction, biases_, channel_multiplier_,
weights_are_buffer_, linked_operations_, *creation_context.device);
return creation_context.cache->GetOrCreateCLKernel(
@ -268,7 +268,7 @@ absl::Status DepthWiseConvolution3D::Compile(
*creation_context.device, &kernel_);
}
absl::Status DepthWiseConvolution3D::BindArguments() {
absl::Status DepthwiseConvolution3D::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
if (weights_are_buffer_) {
@ -298,28 +298,28 @@ absl::Status DepthWiseConvolution3D::BindArguments() {
return absl::OkStatus();
}
int3 DepthWiseConvolution3D::GetGridSize() const {
int3 DepthwiseConvolution3D::GetGridSize() const {
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
const int grid_y = dst_[0]->Height();
const int grid_z = dst_[0]->Slices() * dst_[0]->Depth();
return int3(grid_x, grid_y, grid_z);
}
absl::Status DepthWiseConvolution3D::Tune(const TuningParameters& params) {
absl::Status DepthwiseConvolution3D::Tune(const TuningParameters& params) {
RETURN_IF_ERROR(BindArguments());
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status DepthWiseConvolution3D::AddToQueue(CLCommandQueue* queue) {
absl::Status DepthwiseConvolution3D::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
absl::Status CreateDepthWiseConvolution3D(
absl::Status CreateDepthwiseConvolution3D(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
DepthWiseConvolution3D* result) {
*result = DepthWiseConvolution3D(definition, attr, *creation_context.device);
DepthwiseConvolution3D* result) {
*result = DepthwiseConvolution3D(definition, attr, *creation_context.device);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));
LinearStorageCreateInfo create_info;

View File

@ -35,26 +35,26 @@ namespace tflite {
namespace gpu {
namespace cl {
class DepthWiseConvolution3D : public GPUOperation {
class DepthwiseConvolution3D : public GPUOperation {
public:
DepthWiseConvolution3D() = default;
DepthwiseConvolution3D() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
DepthWiseConvolution3D(DepthWiseConvolution3D&& operation);
DepthWiseConvolution3D& operator=(DepthWiseConvolution3D&& operation);
DepthWiseConvolution3D(const DepthWiseConvolution3D&) = delete;
DepthWiseConvolution3D& operator=(const DepthWiseConvolution3D&) = delete;
DepthwiseConvolution3D(DepthwiseConvolution3D&& operation);
DepthwiseConvolution3D& operator=(DepthwiseConvolution3D&& operation);
DepthwiseConvolution3D(const DepthwiseConvolution3D&) = delete;
DepthwiseConvolution3D& operator=(const DepthwiseConvolution3D&) = delete;
private:
friend absl::Status CreateDepthWiseConvolution3D(
friend absl::Status CreateDepthwiseConvolution3D(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
DepthWiseConvolution3D* result);
DepthWiseConvolution3D(const OperationDef& definition,
DepthwiseConvolution3D* result);
DepthwiseConvolution3D(const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
const CLDevice& device);
template <DataType T>
@ -85,7 +85,7 @@ class DepthWiseConvolution3D : public GPUOperation {
};
template <DataType T>
absl::Status DepthWiseConvolution3D::UploadWeights(
absl::Status DepthwiseConvolution3D::UploadWeights(
const tflite::gpu::Tensor<OHWDI, T>& weights, CLContext* context) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_slices = IntegralDivideRoundUp(dst_channels, 4);
@ -127,7 +127,7 @@ absl::Status DepthWiseConvolution3D::UploadWeights(
}
template <DataType S, typename T>
void DepthWiseConvolution3D::RearrangeWeightsData(
void DepthwiseConvolution3D::RearrangeWeightsData(
const tflite::gpu::Tensor<OHWDI, S>& weights, absl::Span<T> dst) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_slices = IntegralDivideRoundUp(dst_channels, 4);
@ -158,10 +158,10 @@ void DepthWiseConvolution3D::RearrangeWeightsData(
}
}
absl::Status CreateDepthWiseConvolution3D(
absl::Status CreateDepthwiseConvolution3D(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
DepthWiseConvolution3D* result);
DepthwiseConvolution3D* result);
} // namespace cl
} // namespace gpu

View File

@ -28,7 +28,7 @@ namespace gpu {
namespace cl {
namespace {
std::string GenerateDepthWiseConvCode(
std::string GenerateDepthwiseConvCode(
const OperationDef& op_def,
const std::vector<ElementwiseOperation*>& linked_operations,
const CLDevice& device, bool weights_are_buffer, bool local_mem_uploads) {
@ -266,14 +266,14 @@ std::string GenerateDepthWiseConvCode(
} // namespace
DepthWiseConv3x3::DepthWiseConv3x3(const OperationDef& definition,
DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition,
bool weights_are_buffer,
bool local_mem_uploads)
: GPUOperation(definition),
weights_are_buffer_(weights_are_buffer),
local_mem_uploads_(local_mem_uploads) {}
DepthWiseConv3x3::DepthWiseConv3x3(DepthWiseConv3x3&& operation)
DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3&& operation)
: GPUOperation(std::move(operation)),
weights_are_buffer_(operation.weights_are_buffer_),
local_mem_uploads_(operation.local_mem_uploads_),
@ -283,7 +283,7 @@ DepthWiseConv3x3::DepthWiseConv3x3(DepthWiseConv3x3&& operation)
kernel_(std::move(operation.kernel_)),
work_group_size_(operation.work_group_size_) {}
DepthWiseConv3x3& DepthWiseConv3x3::operator=(DepthWiseConv3x3&& operation) {
DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) {
if (this != &operation) {
std::swap(weights_are_buffer_, operation.weights_are_buffer_);
std::swap(local_mem_uploads_, operation.local_mem_uploads_);
@ -297,9 +297,9 @@ DepthWiseConv3x3& DepthWiseConv3x3::operator=(DepthWiseConv3x3&& operation) {
return *this;
}
absl::Status DepthWiseConv3x3::Compile(
absl::Status DepthwiseConv3x3::Compile(
const CreationContext& creation_context) {
std::string code = GenerateDepthWiseConvCode(
std::string code = GenerateDepthwiseConvCode(
definition_, linked_operations_, *creation_context.device,
weights_are_buffer_, local_mem_uploads_);
std::vector<CompilerOptions> options;
@ -312,7 +312,7 @@ absl::Status DepthWiseConv3x3::Compile(
*creation_context.device, &kernel_);
}
absl::Status DepthWiseConv3x3::BindArguments() {
absl::Status DepthwiseConv3x3::BindArguments() {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
@ -322,14 +322,14 @@ absl::Status DepthWiseConv3x3::BindArguments() {
return absl::OkStatus();
}
int3 DepthWiseConv3x3::GetGridSize() const {
int3 DepthwiseConv3x3::GetGridSize() const {
const int grid_x = IntegralDivideRoundUp(dst_[0]->Width(), 2);
const int grid_y = IntegralDivideRoundUp(dst_[0]->Height(), 2);
const int grid_z = dst_[0]->Slices();
return int3(grid_x, grid_y, grid_z);
}
absl::Status DepthWiseConv3x3::Tune(const TuningParameters& params) {
absl::Status DepthwiseConv3x3::Tune(const TuningParameters& params) {
if (local_mem_uploads_) {
return absl::OkStatus();
}
@ -337,12 +337,12 @@ absl::Status DepthWiseConv3x3::Tune(const TuningParameters& params) {
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
absl::Status DepthWiseConv3x3::AddToQueue(CLCommandQueue* queue) {
absl::Status DepthwiseConv3x3::AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(BindArguments());
return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
return attr.weights.shape.o == 1 && attr.dilations.w == 1 &&
attr.dilations.h == 1 && attr.weights.shape.w == 3 &&
attr.weights.shape.h == 3 && attr.strides.w == 1 &&
@ -351,18 +351,18 @@ bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
attr.padding.appended.h == 1;
}
absl::Status CreateDepthWiseConv3x3(
absl::Status CreateDepthwiseConv3x3(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result) {
if (!IsDepthWiseConv3x3Supported(attr)) {
const DepthwiseConvolution2DAttributes& attr, DepthwiseConv3x3* result) {
if (!IsDepthwiseConv3x3Supported(attr)) {
return absl::InvalidArgumentError(
"DepthWiseConv3x3 doesn't support this attributes");
"DepthwiseConv3x3 doesn't support this attributes");
}
bool weights_are_buffer =
creation_context.device->IsPowerVR() || creation_context.device->IsMali();
bool local_mem_uploads =
weights_are_buffer && creation_context.device->IsPowerVR();
*result = DepthWiseConv3x3(definition, weights_are_buffer, local_mem_uploads);
*result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads);
return result->UploadWeightsAndBiases(attr.weights, attr.bias,
creation_context.context);
}

View File

@ -35,31 +35,31 @@ namespace tflite {
namespace gpu {
namespace cl {
class DepthWiseConv3x3 : public GPUOperation {
class DepthwiseConv3x3 : public GPUOperation {
public:
DepthWiseConv3x3() = default;
DepthwiseConv3x3() = default;
absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
DepthWiseConv3x3(DepthWiseConv3x3&& operation);
DepthWiseConv3x3& operator=(DepthWiseConv3x3&& operation);
DepthWiseConv3x3(const DepthWiseConv3x3&) = delete;
DepthWiseConv3x3& operator=(const DepthWiseConv3x3&) = delete;
DepthwiseConv3x3(DepthwiseConv3x3&& operation);
DepthwiseConv3x3& operator=(DepthwiseConv3x3&& operation);
DepthwiseConv3x3(const DepthwiseConv3x3&) = delete;
DepthwiseConv3x3& operator=(const DepthwiseConv3x3&) = delete;
private:
explicit DepthWiseConv3x3(const OperationDef& definition,
explicit DepthwiseConv3x3(const OperationDef& definition,
bool weights_are_buffer, bool local_mem_uploads);
template <DataType T>
absl::Status UploadWeightsAndBiases(
const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context);
friend absl::Status CreateDepthWiseConv3x3(
friend absl::Status CreateDepthwiseConv3x3(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result);
const DepthwiseConvolution2DAttributes& attr, DepthwiseConv3x3* result);
template <DataType S, typename T>
void RearrangeWeightsAndBiasesData(
@ -80,7 +80,7 @@ class DepthWiseConv3x3 : public GPUOperation {
};
template <DataType T>
absl::Status DepthWiseConv3x3::UploadWeightsAndBiases(
absl::Status DepthwiseConv3x3::UploadWeightsAndBiases(
const tflite::gpu::Tensor<OHWI, T>& weights,
const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
@ -126,7 +126,7 @@ absl::Status DepthWiseConv3x3::UploadWeightsAndBiases(
}
template <DataType S, typename T>
void DepthWiseConv3x3::RearrangeWeightsAndBiasesData(
void DepthwiseConv3x3::RearrangeWeightsAndBiasesData(
const tflite::gpu::Tensor<OHWI, S>& weights,
const tflite::gpu::Tensor<Linear, S>& biases, absl::Span<T> dst) {
const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
@ -158,11 +158,11 @@ void DepthWiseConv3x3::RearrangeWeightsAndBiasesData(
}
}
bool IsDepthWiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr);
bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr);
absl::Status CreateDepthWiseConv3x3(
absl::Status CreateDepthwiseConv3x3(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, DepthWiseConv3x3* result);
const DepthwiseConvolution2DAttributes& attr, DepthwiseConv3x3* result);
} // namespace cl
} // namespace gpu

View File

@ -31,7 +31,7 @@ namespace gpu {
namespace cl {
namespace {
TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
TEST_F(OpenCLOperationTest, DepthwiseConv3x3SimpleWeights) {
TensorFloat32 src_tensor;
src_tensor.shape = BHWC(1, 2, 2, 2);
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
@ -56,9 +56,9 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthWiseConv3x3 operation;
DepthwiseConv3x3 operation;
ASSERT_OK(
CreateDepthWiseConv3x3(creation_context_, op_def, attr, &operation));
CreateDepthwiseConv3x3(creation_context_, op_def, attr, &operation));
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 2), &dst_tensor));
EXPECT_THAT(dst_tensor.data,
@ -68,7 +68,7 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
}
}
TEST_F(OpenCLOperationTest, DepthWiseConv3x3) {
TEST_F(OpenCLOperationTest, DepthwiseConv3x3) {
TensorFloat32 src_tensor;
src_tensor.shape = BHWC(1, 2, 2, 2);
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
@ -93,9 +93,9 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthWiseConv3x3 operation;
DepthwiseConv3x3 operation;
ASSERT_OK(
CreateDepthWiseConv3x3(creation_context_, op_def, attr, &operation));
CreateDepthwiseConv3x3(creation_context_, op_def, attr, &operation));
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 2), &dst_tensor));
EXPECT_THAT(dst_tensor.data,

View File

@ -31,7 +31,7 @@ namespace gpu {
namespace cl {
namespace {
TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
TEST_F(OpenCLOperationTest, DepthwiseConvSimpleWeights) {
TensorFloat32 src_tensor;
src_tensor.shape = BHWC(1, 2, 2, 2);
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
@ -55,8 +55,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthWiseConvolution operation;
ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
DepthwiseConvolution operation;
ASSERT_OK(CreateDepthwiseConvolution(creation_context_, op_def, attr,
&operation));
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 2), &dst_tensor));
@ -67,7 +67,7 @@ TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
}
}
TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
TEST_F(OpenCLOperationTest, DepthwiseConvNoMultiplier) {
TensorFloat32 src_tensor;
src_tensor.shape = BHWC(1, 2, 2, 2);
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
@ -91,8 +91,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthWiseConvolution operation;
ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
DepthwiseConvolution operation;
ASSERT_OK(CreateDepthwiseConvolution(creation_context_, op_def, attr,
&operation));
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 2), &dst_tensor));
@ -103,7 +103,7 @@ TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
}
}
TEST_F(OpenCLOperationTest, DepthWiseConvMultiplier2) {
TEST_F(OpenCLOperationTest, DepthwiseConvMultiplier2) {
TensorFloat32 src_tensor;
src_tensor.shape = BHWC(1, 2, 2, 2);
src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
@ -128,8 +128,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvMultiplier2) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthWiseConvolution operation;
ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
DepthwiseConvolution operation;
ASSERT_OK(CreateDepthwiseConvolution(creation_context_, op_def, attr,
&operation));
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 4), &dst_tensor));

View File

@ -28,7 +28,7 @@ enum class CalculationsPrecision { F32, F32_F16, F16 };
// F32 - all data and all math ops in F32
// F16 - all data and all math ops in F16
// F32_F16 - as F16, but some operations (Convolution,
// DepthWiseConvolution, FullyConnected, ConvolutionTransposed)
// DepthwiseConvolution, FullyConnected, ConvolutionTransposed)
// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
// than converts this partial sum to F32 and add to accumulator.

View File

@ -30,16 +30,16 @@ absl::Status SelectDWConvolutionAdreno(
const DepthwiseConvolution2DAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
if (!op_def.IsBatchSupported() && IsDepthWiseConv3x3Supported(attr)) {
DepthWiseConv3x3 dw_conv;
if (!op_def.IsBatchSupported() && IsDepthwiseConv3x3Supported(attr)) {
DepthwiseConv3x3 dw_conv;
RETURN_IF_ERROR(
CreateDepthWiseConv3x3(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthWiseConv3x3>(std::move(dw_conv));
CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthwiseConv3x3>(std::move(dw_conv));
} else {
DepthWiseConvolution dw_conv;
DepthwiseConvolution dw_conv;
RETURN_IF_ERROR(
CreateDepthWiseConvolution(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthWiseConvolution>(std::move(dw_conv));
CreateDepthwiseConvolution(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthwiseConvolution>(std::move(dw_conv));
}
return absl::OkStatus();
}
@ -48,16 +48,16 @@ absl::Status SelectDWConvolutionPowerVR(
const DepthwiseConvolution2DAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
if (!op_def.IsBatchSupported() && IsDepthWiseConv3x3Supported(attr)) {
DepthWiseConv3x3 dw_conv;
if (!op_def.IsBatchSupported() && IsDepthwiseConv3x3Supported(attr)) {
DepthwiseConv3x3 dw_conv;
RETURN_IF_ERROR(
CreateDepthWiseConv3x3(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthWiseConv3x3>(std::move(dw_conv));
CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthwiseConv3x3>(std::move(dw_conv));
} else {
DepthWiseConvolution dw_conv;
DepthwiseConvolution dw_conv;
RETURN_IF_ERROR(
CreateDepthWiseConvolution(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthWiseConvolution>(std::move(dw_conv));
CreateDepthwiseConvolution(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthwiseConvolution>(std::move(dw_conv));
}
return absl::OkStatus();
}
@ -70,18 +70,18 @@ absl::Status SelectDWConvolutionMali(
bool buffer_type = storage_type == TensorStorageType::BUFFER ||
storage_type == TensorStorageType::IMAGE_BUFFER;
MaliInfo mali_info = creation_context.device->GetInfo().mali_info;
if (IsDepthWiseConv3x3Supported(attr) && !mali_info.IsMidgard() &&
if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() &&
!buffer_type && !op_def.IsBatchSupported() &&
op_def.precision != CalculationsPrecision::F32) {
DepthWiseConv3x3 dw_conv;
DepthwiseConv3x3 dw_conv;
RETURN_IF_ERROR(
CreateDepthWiseConv3x3(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthWiseConv3x3>(std::move(dw_conv));
CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthwiseConv3x3>(std::move(dw_conv));
} else {
DepthWiseConvolution dw_conv;
DepthwiseConvolution dw_conv;
RETURN_IF_ERROR(
CreateDepthWiseConvolution(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthWiseConvolution>(std::move(dw_conv));
CreateDepthwiseConvolution(creation_context, op_def, attr, &dw_conv));
*ptr = absl::make_unique<DepthwiseConvolution>(std::move(dw_conv));
}
return absl::OkStatus();
}