Added enum for tensor to grid mapping.

Added to have default GetGridSize for base class GPUOperation.
default GetGridSize will allow to reduce amount of specialized ops and have more 'generic' operations.
Demonstrated on ConcatXY.

PiperOrigin-RevId: 326483127
Change-Id: Ib8d04d0841f217b2b7262e6d09554bc82a212730
This commit is contained in:
Raman Sarokin 2020-08-13 11:06:47 -07:00 committed by TensorFlower Gardener
parent c979f5a424
commit bc3589eb2b
6 changed files with 46 additions and 54 deletions

View File

@ -51,7 +51,7 @@ TEST_F(OpenCLOperationTest, ConcatWidth) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConcatXY operation = CreateConcatXY(op_def, attr);
GPUOperation operation = CreateConcatXY(op_def, attr);
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
BHWC(1, 2, 3, 2), &dst_tensor));
EXPECT_THAT(
@ -83,7 +83,7 @@ TEST_F(OpenCLOperationTest, ConcatHeight) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
ConcatXY operation = CreateConcatXY(op_def, attr);
GPUOperation operation = CreateConcatXY(op_def, attr);
ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
BHWC(1, 3, 1, 2), &dst_tensor));
EXPECT_THAT(

View File

@ -27,28 +27,13 @@ limitations under the License.
namespace tflite {
namespace gpu {
namespace cl {
ConcatXY::ConcatXY(const OperationDef& definition, const ConcatAttributes& attr)
: GPUOperation(definition) {
code_ = GetConcatKernelCode(definition, attr);
}
ConcatXY::ConcatXY(ConcatXY&& operation) : GPUOperation(std::move(operation)) {}
ConcatXY& ConcatXY::operator=(ConcatXY&& operation) {
if (this != &operation) {
GPUOperation::operator=(std::move(operation));
}
return *this;
}
std::string ConcatXY::GetConcatKernelCode(const OperationDef& op_def,
const ConcatAttributes& attr) {
namespace {
std::string GetConcatKernelCode(const OperationDef& op_def,
const ConcatAttributes& attr) {
std::vector<std::string> tensor_names(op_def.src_tensors.size());
for (int i = 0; i < op_def.src_tensors.size(); ++i) {
tensor_names[i] = "src_tensor_" + std::to_string(i);
AddSrcTensor(tensor_names[i], op_def.src_tensors[i]);
}
AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
std::map<Axis, std::string> axis_to_selector = {
{Axis::WIDTH, "Width"}, {Axis::HEIGHT, "Height"},
@ -127,17 +112,19 @@ std::string ConcatXY::GetConcatKernelCode(const OperationDef& op_def,
c += "}\n";
return c;
}
} // namespace
int3 ConcatXY::GetGridSize() const {
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
const int grid_z = dst_[0]->Slices();
return int3(grid_x, grid_y, grid_z);
}
ConcatXY CreateConcatXY(const OperationDef& definition,
const ConcatAttributes& attr) {
return ConcatXY(definition, attr);
GPUOperation CreateConcatXY(const OperationDef& definition,
const ConcatAttributes& attr) {
GPUOperation op(definition);
for (int i = 0; i < definition.src_tensors.size(); ++i) {
const std::string name = "src_tensor_" + std::to_string(i);
op.AddSrcTensor(name, definition.src_tensors[i]);
}
op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
op.code_ = GetConcatKernelCode(definition, attr);
op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
return op;
}
} // namespace cl

View File

@ -26,24 +26,8 @@ namespace tflite {
namespace gpu {
namespace cl {
class ConcatXY : public GPUOperation {
public:
ConcatXY(const OperationDef& definition, const ConcatAttributes& attr);
int3 GetGridSize() const override;
// Move only
ConcatXY(ConcatXY&& operation);
ConcatXY& operator=(ConcatXY&& operation);
ConcatXY(const ConcatXY&) = delete;
ConcatXY& operator=(const ConcatXY&) = delete;
private:
std::string GetConcatKernelCode(const OperationDef& op_def,
const ConcatAttributes& attr);
};
ConcatXY CreateConcatXY(const OperationDef& definition,
const ConcatAttributes& attr);
GPUOperation CreateConcatXY(const OperationDef& definition,
const ConcatAttributes& attr);
} // namespace cl
} // namespace gpu

View File

@ -124,6 +124,7 @@ void GPUOperation::SetDst(Tensor* ptr, int index) {
GPUOperation::GPUOperation(GPUOperation&& operation)
: args_(std::move(operation.args_)),
code_(std::move(operation.code_)),
tensor_to_grid_(operation.tensor_to_grid_),
elementwise_(operation.elementwise_),
linkable_(operation.linkable_),
check_src_channels_size_(operation.check_src_channels_size_),
@ -142,6 +143,7 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
if (this != &operation) {
args_ = std::move(operation.args_);
code_ = std::move(operation.code_);
tensor_to_grid_ = operation.tensor_to_grid_;
elementwise_ = operation.elementwise_;
linkable_ = operation.linkable_;
check_src_channels_size_ = operation.check_src_channels_size_;
@ -277,14 +279,19 @@ absl::Status GPUOperation::Tune(const TuningParameters& params) {
}
int3 GPUOperation::GetGridSize() const {
if (elementwise_) {
if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ) {
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
const int grid_y = dst_[0]->Height();
const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
const int grid_z = dst_[0]->Slices();
return int3(grid_x, grid_y, grid_z);
} else {
return int3(0, 0, 0);
}
if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1) {
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
const int grid_z = 1;
return int3(grid_x, grid_y, grid_z);
}
return int3(0, 0, 0);
}
void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {

View File

@ -37,6 +37,18 @@ namespace tflite {
namespace gpu {
namespace cl {
// kCustom: default value
// GPUOperation::GetGridSize must be overloaded
// kWBToX_HDToY_SToZ:
// grid_x = dst_[0]->Width() * dst_[0]->Batch();
// grid_y = dst_[0]->Height() * dst_[0]->Depth();
// grid_z = dst_[0]->Slices();
// kWBToX_HDToY_ZIs1:
// grid_x = dst_[0]->Width() * dst_[0]->Batch();
// grid_y = dst_[0]->Height() * dst_[0]->Depth();
// grid_z = 1;
enum class TensorToGrid { kCustom, kWBToX_HDToY_SToZ, kWBToX_HDToY_ZIs1 };
struct CreationContext {
const CLDevice* device;
CLContext* context;
@ -122,6 +134,8 @@ class GPUOperation {
Arguments args_;
std::string code_;
// not applicable to elementwise
TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;
bool elementwise_ = false;
// applicable only with elementwise_ = true;

View File

@ -110,8 +110,8 @@ absl::Status SelectConcat(const ConcatAttributes& attr,
case Axis::DEPTH:
case Axis::HEIGHT:
case Axis::WIDTH: {
ConcatXY operation = CreateConcatXY(op_def, attr);
*ptr = absl::make_unique<ConcatXY>(std::move(operation));
GPUOperation operation = CreateConcatXY(op_def, attr);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
}
default: