From 6a7391ca0231d4ee5426595cec8ecf181f2fc6eb Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 7 Oct 2020 18:43:53 -0700 Subject: [PATCH] [TPU] Don't pass host_shapes across the TPU API boundary. They can be computed from device shapes. PiperOrigin-RevId: 335996516 Change-Id: I702ff81ce311042399ef87bddb5e0d68b7464331 --- tensorflow/compiler/xla/service/executable.h | 3 +++ tensorflow/core/tpu/tpu_on_demand_compiler.cc | 2 -- tensorflow/stream_executor/tpu/c_api_conversions.cc | 5 +---- tensorflow/stream_executor/tpu/c_api_decl.h | 2 -- tensorflow/stream_executor/tpu/tpu_executable_interface.cc | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 9216e5de85d..55d11a79011 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -153,6 +153,9 @@ class ExecutionOutput { std::vector to_be_released) : result_(std::move(result)), to_be_released_(std::move(to_be_released)) {} + ExecutionOutput(Shape on_device_shape, se::DeviceMemoryAllocator* allocator, + int device_ordinal) + : result_(std::move(on_device_shape), allocator, device_ordinal) {} ExecutionOutput(Shape on_host_shape, Shape on_device_shape, se::DeviceMemoryAllocator* allocator, int device_ordinal) : result_(std::move(on_host_shape), std::move(on_device_shape), allocator, diff --git a/tensorflow/core/tpu/tpu_on_demand_compiler.cc b/tensorflow/core/tpu/tpu_on_demand_compiler.cc index 08153f62063..01ea9f5848a 100644 --- a/tensorflow/core/tpu/tpu_on_demand_compiler.cc +++ b/tensorflow/core/tpu/tpu_on_demand_compiler.cc @@ -119,7 +119,6 @@ class TpuExecutable : public TpuExecutableInterface { } ApiConverter::ToC(arg.shape(), &se_args[i]->dynamic_shape); - ApiConverter::ToC(arg.host_shape(), &se_args[i]->host_shape); const auto& unowned_indices = arg.unowned_indices(); se_args[i]->unowned_indices_size = unowned_indices.size(); se_args[i]->unowned_indices = new XLA_ShapeIndex[unowned_indices.size()]; @@ -142,7 +141,6 @@ class TpuExecutable : public TpuExecutableInterface { for (int i = 0; i < arguments.size(); ++i) { ApiConverter::Free(&se_args[i]->shape_tree.shape); ApiConverter::Free(&se_args[i]->dynamic_shape); - ApiConverter::Free(&se_args[i]->host_shape); delete[] se_args[i]->unowned_indices; delete[] se_args[i]->shape_tree.buffers; delete se_args[i]; diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.cc b/tensorflow/stream_executor/tpu/c_api_conversions.cc index 674a1fdfb68..0a7801f45fc 100644 --- a/tensorflow/stream_executor/tpu/c_api_conversions.cc +++ b/tensorflow/stream_executor/tpu/c_api_conversions.cc @@ -23,7 +23,6 @@ limitations under the License. namespace ApiConverter { xla::ShapedBuffer FromC(XLA_ShapedBuffer* c_buffer) { - xla::Shape xla_on_host_shape = ApiConverter::FromC(&c_buffer->on_host_shape); xla::Shape xla_on_device_shape = ApiConverter::FromC(&c_buffer->on_device_shape); @@ -36,7 +35,7 @@ xla::ShapedBuffer FromC(XLA_ShapedBuffer* c_buffer) { } xla::ShapedBuffer xla_shaped_buffer( - xla_on_host_shape, xla_on_device_shape, + xla_on_device_shape, tensorflow::tpu::TpuPlatformInterface::GetRegisteredPlatform(), c_buffer->device_ordinal); xla_shaped_buffer.set_buffers(xla_shape_tree); @@ -199,7 +198,6 @@ xla::MutableBorrowingLiteral FromC(XLA_Literal* c_literal) { } void ToC(const xla::ShapedBuffer& buffer, XLA_ShapedBuffer* c_device_buffer) { - ApiConverter::ToC(buffer.on_host_shape(), &c_device_buffer->on_host_shape); ApiConverter::ToC(buffer.on_device_shape(), &c_device_buffer->on_device_shape); c_device_buffer->device_ordinal = buffer.device_ordinal(); @@ -226,7 +224,6 @@ void Free(XLA_Literal* c_literal) { void Free(XLA_ShapedBuffer* c_buffer) { ApiConverter::Free(&c_buffer->on_device_shape); - ApiConverter::Free(&c_buffer->on_host_shape); delete[] c_buffer->bases; } diff --git a/tensorflow/stream_executor/tpu/c_api_decl.h b/tensorflow/stream_executor/tpu/c_api_decl.h index 7953670dec7..dcb53823e0c 100644 --- a/tensorflow/stream_executor/tpu/c_api_decl.h +++ b/tensorflow/stream_executor/tpu/c_api_decl.h @@ -177,7 +177,6 @@ typedef struct XLA_Shape { // Represents a leaf node for a XLA shaped buffer. typedef struct XLA_ShapedBuffer { - XLA_Shape on_host_shape; XLA_Shape on_device_shape; int device_ordinal; @@ -208,7 +207,6 @@ typedef struct SE_ExecutionInput { XLA_ShapeIndex* unowned_indices; int unowned_indices_size; XLA_Shape dynamic_shape; - XLA_Shape host_shape; } SE_ExecutionInput; typedef struct SE_ExecutionOutput { diff --git a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc index af29f2e2b06..84ce8444420 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc @@ -90,8 +90,7 @@ TpuExecutableInterface::AllocateOutputMemoryWithInputReuse( } } - ExecutionOutput result(host_shape, std::move(device_shape), allocator, - device_ordinal); + ExecutionOutput result(std::move(device_shape), allocator, device_ordinal); // Iterate through and allocate a buffer for each shape index, checking for // possible input buffer reuse. int64 reused_buffer_bytes = 0;