Merge pull request #45022 from nouiz:upstream_maybeowning
PiperOrigin-RevId: 351103206 Change-Id: Ifa9821e3dc1d4fd15c440da19a7e03d26daaa89e
This commit is contained in:
commit
c4b65fb314
@ -278,7 +278,7 @@ class Executable {
|
|||||||
// If the hlo_execution_profile is provided as non-nullptr, profiling will be
|
// If the hlo_execution_profile is provided as non-nullptr, profiling will be
|
||||||
// enabled. Note that profiling is tricky to use correctly, as the profiling
|
// enabled. Note that profiling is tricky to use correctly, as the profiling
|
||||||
// objects (when they exist) must out-live the task.
|
// objects (when they exist) must out-live the task.
|
||||||
StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
|
virtual StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
|
||||||
const ServiceExecutableRunOptions* run_options,
|
const ServiceExecutableRunOptions* run_options,
|
||||||
absl::Span<const ShapedBuffer* const> arguments,
|
absl::Span<const ShapedBuffer* const> arguments,
|
||||||
HloExecutionProfile* hlo_execution_profile);
|
HloExecutionProfile* hlo_execution_profile);
|
||||||
|
@ -299,7 +299,7 @@ GpuExecutable::ResolveConstantGlobals(se::Stream* stream) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
StatusOr<se::DeviceMemoryBase> GpuExecutable::BufferForAllocation(
|
StatusOr<se::DeviceMemoryBase> GpuExecutable::BufferForAllocation(
|
||||||
absl::Span<ExecutionInput const> arguments,
|
VariantArguments arguments,
|
||||||
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
||||||
const BufferAllocation& allocation,
|
const BufferAllocation& allocation,
|
||||||
se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal,
|
se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal,
|
||||||
@ -308,10 +308,17 @@ StatusOr<se::DeviceMemoryBase> GpuExecutable::BufferForAllocation(
|
|||||||
return se::DeviceMemoryBase{};
|
return se::DeviceMemoryBase{};
|
||||||
} else if (allocation.is_entry_computation_parameter()) {
|
} else if (allocation.is_entry_computation_parameter()) {
|
||||||
int64 param_no = allocation.parameter_number();
|
int64 param_no = allocation.parameter_number();
|
||||||
se::DeviceMemoryBase registered_buffer =
|
se::DeviceMemoryBase registered_buffer = [&] {
|
||||||
arguments[param_no]
|
if (auto unowned_shapedbuffers =
|
||||||
|
absl::get_if<absl::Span<const ShapedBuffer* const>>(&arguments)) {
|
||||||
|
return (*unowned_shapedbuffers)[param_no]->buffers().element(
|
||||||
|
allocation.param_shape_index());
|
||||||
|
} else {
|
||||||
|
return absl::get<absl::Span<ExecutionInput>>(arguments)[param_no]
|
||||||
.Buffer(allocation.param_shape_index())
|
.Buffer(allocation.param_shape_index())
|
||||||
.AsDeviceMemoryBase();
|
.AsDeviceMemoryBase();
|
||||||
|
}
|
||||||
|
}();
|
||||||
if (registered_buffer.is_null() && registered_buffer.size() > 0) {
|
if (registered_buffer.is_null() && registered_buffer.size() > 0) {
|
||||||
return FailedPrecondition(
|
return FailedPrecondition(
|
||||||
"Cannot run XLA computation because pointer to (sub-)buffer at "
|
"Cannot run XLA computation because pointer to (sub-)buffer at "
|
||||||
@ -364,7 +371,7 @@ static Status CheckAlignment(const BufferAllocation& allocation,
|
|||||||
}
|
}
|
||||||
|
|
||||||
StatusOr<BufferAllocations> GpuExecutable::GenerateBufferAllocations(
|
StatusOr<BufferAllocations> GpuExecutable::GenerateBufferAllocations(
|
||||||
absl::Span<ExecutionInput const> arguments,
|
VariantArguments arguments,
|
||||||
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
||||||
se::DeviceMemoryAllocator* const memory_allocator,
|
se::DeviceMemoryAllocator* const memory_allocator,
|
||||||
se::StreamExecutor* executor) {
|
se::StreamExecutor* executor) {
|
||||||
@ -391,8 +398,25 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
|
|||||||
const ServiceExecutableRunOptions* run_options,
|
const ServiceExecutableRunOptions* run_options,
|
||||||
std::vector<ExecutionInput> arguments,
|
std::vector<ExecutionInput> arguments,
|
||||||
HloExecutionProfile* hlo_execution_profile) {
|
HloExecutionProfile* hlo_execution_profile) {
|
||||||
XLA_SCOPED_LOGGING_TIMER(
|
return ExecuteAsyncOnStreamImpl(run_options, absl::MakeSpan(arguments),
|
||||||
absl::StrCat("GpuExecutable::ExecuteAsyncOnStream(", module_name_, ")"));
|
hlo_execution_profile);
|
||||||
|
}
|
||||||
|
|
||||||
|
StatusOr<ScopedShapedBuffer> GpuExecutable::ExecuteAsyncOnStream(
|
||||||
|
const ServiceExecutableRunOptions* run_options,
|
||||||
|
absl::Span<const ShapedBuffer* const> arguments,
|
||||||
|
HloExecutionProfile* hlo_execution_profile) {
|
||||||
|
TF_ASSIGN_OR_RETURN(
|
||||||
|
ExecutionOutput out,
|
||||||
|
ExecuteAsyncOnStreamImpl(run_options, arguments, hlo_execution_profile));
|
||||||
|
return out.ConsumeResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStreamImpl(
|
||||||
|
const ServiceExecutableRunOptions* run_options, VariantArguments arguments,
|
||||||
|
HloExecutionProfile* hlo_execution_profile) {
|
||||||
|
XLA_SCOPED_LOGGING_TIMER(absl::StrCat(
|
||||||
|
"GpuExecutable::ExecuteAsyncOnStreamImpl(", module_name_, ")"));
|
||||||
se::DeviceMemoryAllocator* const memory_allocator = run_options->allocator();
|
se::DeviceMemoryAllocator* const memory_allocator = run_options->allocator();
|
||||||
// Force synchronous execution if the allocator requires it.
|
// Force synchronous execution if the allocator requires it.
|
||||||
const bool block_host_until_done =
|
const bool block_host_until_done =
|
||||||
@ -443,18 +467,31 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
|
|||||||
<< " @ index: " << index.ToString();
|
<< " @ index: " << index.ToString();
|
||||||
|
|
||||||
if (output_info.alias_config) {
|
if (output_info.alias_config) {
|
||||||
ExecutionInput& input = arguments[allocation->parameter_number()];
|
|
||||||
MaybeOwningDeviceMemory* maybe_owning_memory =
|
MaybeOwningDeviceMemory* maybe_owning_memory =
|
||||||
input.MutableBuffer(allocation->param_shape_index());
|
[&]() -> xla::MaybeOwningDeviceMemory* {
|
||||||
if (output_info.alias_config->must_alias() &&
|
// ScopedBuffer is never an owned buffer.
|
||||||
|
if (auto* unowned_shapedbuffers =
|
||||||
|
absl::get_if<absl::Span<const ShapedBuffer* const>>(
|
||||||
|
&arguments)) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
auto unowned_execution_input =
|
||||||
|
absl::get<absl::Span<ExecutionInput>>(arguments);
|
||||||
|
ExecutionInput& input =
|
||||||
|
unowned_execution_input[allocation->parameter_number()];
|
||||||
|
return input.MutableBuffer(allocation->param_shape_index());
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
if (output_info.alias_config->must_alias() && maybe_owning_memory &&
|
||||||
!maybe_owning_memory->HasOwnership()) {
|
!maybe_owning_memory->HasOwnership()) {
|
||||||
return InvalidArgument(
|
return InvalidArgument(
|
||||||
"An input was configured to be must-alias at "
|
"An input was configured to be must-alias at "
|
||||||
"compile time but not donated at runtime: allocation %d",
|
"compile time but not donated at runtime: allocation %d",
|
||||||
output_info.allocation_index);
|
output_info.allocation_index);
|
||||||
}
|
}
|
||||||
if (absl::optional<se::OwningDeviceMemory> owning =
|
if (maybe_owning_memory && maybe_owning_memory->HasOwnership()) {
|
||||||
maybe_owning_memory->Release()) {
|
absl::optional<tensorflow::se::OwningDeviceMemory> owning =
|
||||||
|
maybe_owning_memory->Release();
|
||||||
// If the caller passes the ownership of the device memory, reuse it
|
// If the caller passes the ownership of the device memory, reuse it
|
||||||
// as the output buffer. It is up to the caller whether or not to
|
// as the output buffer. It is up to the caller whether or not to
|
||||||
// donate a buffer; the aliasing information describes which buffers
|
// donate a buffer; the aliasing information describes which buffers
|
||||||
@ -520,7 +557,9 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
|
|||||||
buffer_allocations.TearDown(buffers_in_result, allocations_));
|
buffer_allocations.TearDown(buffers_in_result, allocations_));
|
||||||
|
|
||||||
// Free allocations for arguments.
|
// Free allocations for arguments.
|
||||||
MarkToBeReleasedArguments(absl::MakeSpan(arguments), result);
|
if (auto args = absl::get_if<absl::Span<ExecutionInput>>(&arguments)) {
|
||||||
|
MarkToBeReleasedArguments(*args, result);
|
||||||
|
}
|
||||||
return std::move(result);
|
return std::move(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -116,6 +116,17 @@ class GpuExecutable : public Executable {
|
|||||||
std::vector<ExecutionInput> arguments,
|
std::vector<ExecutionInput> arguments,
|
||||||
HloExecutionProfile* hlo_execution_profile) override;
|
HloExecutionProfile* hlo_execution_profile) override;
|
||||||
|
|
||||||
|
StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
|
||||||
|
const ServiceExecutableRunOptions* run_options,
|
||||||
|
absl::Span<const ShapedBuffer* const> arguments,
|
||||||
|
HloExecutionProfile* hlo_execution_profile);
|
||||||
|
|
||||||
|
using VariantArguments = absl::variant<absl::Span<const ShapedBuffer* const>,
|
||||||
|
absl::Span<ExecutionInput>>;
|
||||||
|
StatusOr<ExecutionOutput> ExecuteAsyncOnStreamImpl(
|
||||||
|
const ServiceExecutableRunOptions* run_options,
|
||||||
|
VariantArguments arguments, HloExecutionProfile* hlo_execution_profile);
|
||||||
|
|
||||||
absl::Span<const BufferAllocation> GetAllocations() const {
|
absl::Span<const BufferAllocation> GetAllocations() const {
|
||||||
return allocations_;
|
return allocations_;
|
||||||
}
|
}
|
||||||
@ -146,13 +157,13 @@ class GpuExecutable : public Executable {
|
|||||||
const ServiceExecutableRunOptions* run_options);
|
const ServiceExecutableRunOptions* run_options);
|
||||||
|
|
||||||
StatusOr<BufferAllocations> GenerateBufferAllocations(
|
StatusOr<BufferAllocations> GenerateBufferAllocations(
|
||||||
absl::Span<ExecutionInput const> arguments,
|
VariantArguments arguments,
|
||||||
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
||||||
se::DeviceMemoryAllocator* const memory_allocator,
|
se::DeviceMemoryAllocator* const memory_allocator,
|
||||||
se::StreamExecutor* executor);
|
se::StreamExecutor* executor);
|
||||||
|
|
||||||
StatusOr<se::DeviceMemoryBase> BufferForAllocation(
|
StatusOr<se::DeviceMemoryBase> BufferForAllocation(
|
||||||
absl::Span<ExecutionInput const> arguments,
|
VariantArguments arguments,
|
||||||
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
|
||||||
const BufferAllocation& allocation,
|
const BufferAllocation& allocation,
|
||||||
se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal,
|
se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal,
|
||||||
|
Loading…
Reference in New Issue
Block a user