[XLA] [NFC] Reduce duplication between Executable subclasses
Factor out the code to mark owning arguments as to-be-released. PiperOrigin-RevId: 316507574 Change-Id: I22fce6e69d0933baa25db09e25bda4037beceb17
This commit is contained in:
parent
d1a34523f4
commit
555be8943e
tensorflow/compiler/xla/service
@ -350,16 +350,7 @@ StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
|
||||
std::move(buffers)),
|
||||
hlo_execution_profile});
|
||||
|
||||
// TODO(cheshire): Duplication with other executables.
|
||||
for (ExecutionInput& argument : arguments) {
|
||||
for (auto& index_buffer : *argument.MutableBuffers()) {
|
||||
absl::optional<se::OwningDeviceMemory> maybe_owning_buffer =
|
||||
index_buffer.second.Release();
|
||||
if (maybe_owning_buffer) {
|
||||
result.AddToBeReleased(std::move(*maybe_owning_buffer));
|
||||
}
|
||||
}
|
||||
}
|
||||
MarkToBeReleasedArguments(absl::MakeSpan(arguments), result);
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
|
@ -258,4 +258,16 @@ StatusOr<ExecutionOutput> Executable::ExecuteAsyncOnStreamWrapper(
|
||||
|
||||
int64 Executable::SizeOfGeneratedCodeInBytes() const { return -1; }
|
||||
|
||||
void Executable::MarkToBeReleasedArguments(absl::Span<ExecutionInput> arguments,
|
||||
ExecutionOutput& result) {
|
||||
for (ExecutionInput& argument : arguments) {
|
||||
for (auto& index_buffer : *argument.MutableBuffers()) {
|
||||
if (absl::optional<se::OwningDeviceMemory> maybe_owning_buffer =
|
||||
index_buffer.second.Release()) {
|
||||
result.AddToBeReleased(std::move(*maybe_owning_buffer));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xla
|
||||
|
@ -331,6 +331,15 @@ class Executable {
|
||||
bool dumping_snapshot() const { return hlo_proto_ != nullptr; }
|
||||
HloProto const* hlo_proto() const { return hlo_proto_.get(); }
|
||||
|
||||
// Gather unused but donated buffers, return them to the caller of this API.
|
||||
// We don't free buffers inside this function since the caller could have
|
||||
// different preferences for buffer deallocation. For example, in TensorFlow,
|
||||
// buffers are mostly efficiently deallocated as soon as a program has been
|
||||
// launched. However, in XRT, the buffers are expected to be deallocated after
|
||||
// the program has finished since XRT doesn't support async deallocation.
|
||||
void MarkToBeReleasedArguments(absl::Span<ExecutionInput> arguments,
|
||||
ExecutionOutput& result);
|
||||
|
||||
protected:
|
||||
// HloModule this was compiled from. BufferAssignment keeps pointers to
|
||||
// HloInstructions owned by the HloModule so we need to keep the HloModule
|
||||
|
@ -541,15 +541,7 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
|
||||
buffer_allocations.TearDown(buffers_in_result, assignment_.get()));
|
||||
|
||||
// Free allocations for arguments.
|
||||
for (ExecutionInput& argument : arguments) {
|
||||
for (auto& index_buffer : *argument.MutableBuffers()) {
|
||||
if (absl::optional<se::OwningDeviceMemory> owning =
|
||||
index_buffer.second.Release()) {
|
||||
result.AddToBeReleased(std::move(*owning));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MarkToBeReleasedArguments(absl::MakeSpan(arguments), result);
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
|
@ -122,14 +122,7 @@ StatusOr<ExecutionOutput> InterpreterExecutableBase::ExecuteAsyncOnStream(
|
||||
const double nanoseconds = (end_micros - start_micros) * 1000.0;
|
||||
profile->set_compute_time_ns(std::max(nanoseconds, 1.0));
|
||||
}
|
||||
for (auto& argument : arguments) {
|
||||
for (auto& index_buffer : *argument.MutableBuffers()) {
|
||||
auto maybe_owning_buffer = index_buffer.second.Release();
|
||||
if (maybe_owning_buffer) {
|
||||
result.AddToBeReleased(std::move(*maybe_owning_buffer));
|
||||
}
|
||||
}
|
||||
}
|
||||
MarkToBeReleasedArguments(absl::MakeSpan(arguments), result);
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user