From 1f05cc5973c1182c35cd4b11ee8cf0f01ed707dc Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 17 Jun 2020 17:18:45 -0700 Subject: [PATCH] Better FullyConnected/ConvTransposed selection for Intel. PiperOrigin-RevId: 316997745 Change-Id: I28befdd528917c3846ff6ae79b0f8427389dfc39 --- .../delegates/gpu/cl/kernels/convolution_transposed_3x3.cc | 2 +- .../delegates/gpu/cl/kernels/convolution_transposed_4x4.cc | 2 +- .../gpu/cl/selectors/convolution_transposed_selector.cc | 1 + .../delegates/gpu/cl/selectors/fully_connected_selector.cc | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc index 4a68eda1d95..9b028721d2d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc @@ -269,7 +269,7 @@ ConvolutionTransposed3x3::ConvolutionTransposed3x3( work_group_launch_order_(2, 0, 1) { if (device.IsPowerVR()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; - } else if (device.IsNvidia()) { + } else if (device.IsNvidia() || device.IsIntel()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS; } else if (device.IsAMD()) { weights_upload_type_ = WeightsUploadType::CONSTANT_MEM; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc index 0f7f90989e8..209b675087e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc @@ -270,7 +270,7 @@ ConvolutionTransposed4x4::ConvolutionTransposed4x4( : GPUOperation(definition) { if (device.IsPowerVR()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; - } else if (device.IsNvidia()) { + } else if (device.IsNvidia() || device.IsIntel()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS; } else if (device.IsAMD()) { weights_upload_type_ = WeightsUploadType::CONSTANT_MEM; diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc index 12e99b57aa7..5fdfdca073e 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc @@ -112,6 +112,7 @@ absl::Status SelectConvolutionTransposed( case Vendor::POWERVR: case Vendor::NVIDIA: case Vendor::AMD: + case Vendor::INTEL: return SelectConvolutionTransposedPowerVR(attr, creation_context, op_def, ptr); case Vendor::MALI: diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc index 12a1d726368..eacbea8b586 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc @@ -109,6 +109,9 @@ absl::Status SelectFullyConnected(const FullyConnectedAttributes& attr, return SelectFullyConnectedAdreno(attr, creation_context, op_def, batch_size, ptr); case Vendor::POWERVR: + case Vendor::AMD: + case Vendor::NVIDIA: + case Vendor::INTEL: return SelectFullyConnectedPowerVR(attr, creation_context, op_def, batch_size, ptr); case Vendor::MALI: