From a749d9dfa85df4900a5b1e211e9df9b13d691216 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 16 Mar 2020 11:53:56 -0700 Subject: [PATCH] Support alternative Evaluators in the XLA Interpreter Executable PiperOrigin-RevId: 301206533 Change-Id: I8aee5751d2714f1c88bbff0d883a8482e63dd52e --- .../compiler/xla/service/interpreter/BUILD | 25 ++++ .../xla/service/interpreter/executable.cc | 109 +------------- .../xla/service/interpreter/executable.h | 14 +- .../service/interpreter/executable_base.cc | 137 ++++++++++++++++++ .../xla/service/interpreter/executable_base.h | 57 ++++++++ .../xla/service/interpreter/executor.h | 8 +- 6 files changed, 236 insertions(+), 114 deletions(-) create mode 100644 tensorflow/compiler/xla/service/interpreter/executable_base.cc create mode 100644 tensorflow/compiler/xla/service/interpreter/executable_base.h diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD index 552c8eb1ae5..7a4eefc1ab6 100644 --- a/tensorflow/compiler/xla/service/interpreter/BUILD +++ b/tensorflow/compiler/xla/service/interpreter/BUILD @@ -71,11 +71,36 @@ cc_library( ), ) +cc_library( + name = "executable_base", + srcs = ["executable_base.cc"], + hdrs = ["executable_base.h"], + deps = [ + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:shape_tree", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_proto_cc", + "//tensorflow/compiler/xla/service:dynamic_dimension_inference", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_execution_profile", + "//tensorflow/compiler/xla/service:maybe_owning_device_memory", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/stream_executor:event", + "//tensorflow/stream_executor:stream", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/types:optional", + ], +) + cc_library( name = "executable", srcs = ["executable.cc"], hdrs = ["executable.h"], deps = [ + ":executable_base", ":executor", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 725cb437f8c..cc7fdeaf0f6 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/interpreter/executable_base.h" #include "tensorflow/compiler/xla/service/interpreter/executor.h" #include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" #include "tensorflow/compiler/xla/service/transfer_manager.h" @@ -41,8 +42,7 @@ InterpreterExecutable::InterpreterExecutable( std::unique_ptr hlo_module, std::unique_ptr evaluator, absl::optional dynamic_dymension_inference) - : Executable(std::move(hlo_module), /*hlo_profile_printer_data=*/nullptr, - /*hlo_profile_index_map=*/nullptr), + : InterpreterExecutableBase(std::move(hlo_module)), evaluator_(std::move(evaluator)), dynamic_dimension_inference_(std::move(dynamic_dymension_inference)) { if (dynamic_dimension_inference_.has_value()) { @@ -51,107 +51,12 @@ InterpreterExecutable::InterpreterExecutable( } } -InterpreterExecutable::~InterpreterExecutable() {} - -StatusOr InterpreterExecutable::ExecuteAsyncOnStream( - const ServiceExecutableRunOptions* run_options, - std::vector arguments, - HloExecutionProfile* hlo_execution_profile) { - se::Stream* stream = run_options->stream(); - se::StreamExecutor* executor = stream->parent(); - const se::Platform* platform = executor->platform(); - - // Convert the ShapeTree to a ShapedBuffer. We do this so we can call - // TransferManager methods below. - std::vector argument_buffers; - argument_buffers.reserve(arguments.size()); - for (auto& argument : arguments) { - const ShapeTree& buffers = argument.Buffers(); - argument_buffers.push_back(ShapedBuffer(buffers.shape(), buffers.shape(), - /*platform=*/nullptr, - /*device_ordinal=*/0)); - auto in_it = buffers.begin(); - auto out_it = argument_buffers.back().buffers().begin(); - for (; in_it != buffers.end(); ++in_it, ++out_it) { - out_it->second = in_it->second.AsDeviceMemoryBase(); - } - } - - VLOG(1) << "Execute " << module().name(); - if (VLOG_IS_ON(2)) { - for (const auto& a : argument_buffers) { - VLOG(2) << "-- argument " << a; - } - } - - uint64 start_micros = tensorflow::Env::Default()->NowMicros(); - - const HloComputation* computation = module().entry_computation(); - if (computation->num_parameters() != arguments.size()) { - return tensorflow::errors::Internal( - "Mismatch between argument count and graph parameter count."); - } - - // Check that the args have the right shape. - for (int64 i = 0; i < computation->num_parameters(); ++i) { - const auto& expected_shape = computation->parameter_instruction(i)->shape(); - const auto& actual_shape = argument_buffers[i].on_device_shape(); - if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, - actual_shape)) { - return InvalidArgument( - "Shape mismatch on parameter %d. Expected %s, but was %s.", i, - ShapeUtil::HumanStringWithLayout(expected_shape), - ShapeUtil::HumanStringWithLayout(actual_shape)); - } - } - - TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager, - TransferManager::GetForPlatform(platform)); - - // Transform the ShapedBuffer arguments into literals which the evaluator - // consumes. - std::vector arg_literals; - for (int64 p = 0; p < computation->num_parameters(); ++p) { - TF_ASSIGN_OR_RETURN(Literal arg_literal, - transfer_manager->TransferLiteralFromDevice( - run_options->stream(), argument_buffers[p])); - arg_literals.push_back(std::move(arg_literal)); - } - +StatusOr InterpreterExecutable::Evaluate( + const HloComputation& computation, absl::Span arg_literals) { // Execute the graph using the HloEvaluator. - Literal result_literal; - { - tensorflow::mutex_lock lock(evaluator_lock_); - evaluator_->ResetVisitStates(); - TF_ASSIGN_OR_RETURN(result_literal, - evaluator_->Evaluate(*computation, arg_literals)); - } - - // Transform the result literal back into a ShapedBuffer. - TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result_buffers, - transfer_manager->AllocateScopedShapedBuffer( - result_literal.shape(), run_options->allocator(), - executor->device_ordinal())); - TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice( - run_options->stream(), result_literal, result_buffers)); - ExecutionOutput result(std::move(result_buffers)); - - uint64 end_micros = tensorflow::Env::Default()->NowMicros(); - - ExecutionProfile* profile = run_options->run_options().execution_profile(); - if (profile) { - const double nanoseconds = (end_micros - start_micros) * 1000.0; - profile->set_compute_time_ns(std::max(nanoseconds, 1.0)); - } - for (auto& argument : arguments) { - for (auto& index_buffer : *argument.MutableBuffers()) { - auto maybe_owning_buffer = index_buffer.second.Release(); - if (maybe_owning_buffer) { - result.AddToBeReleased(std::move(*maybe_owning_buffer)); - } - } - } - return std::move(result); + tensorflow::mutex_lock lock(evaluator_lock_); + evaluator_->ResetVisitStates(); + return evaluator_->Evaluate(computation, arg_literals); } /*static*/ int64 InterpreterExecutable::ShapeSizeBytes(const Shape& shape) { diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h index b6dbc886f35..ce68a8472f5 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.h +++ b/tensorflow/compiler/xla/service/interpreter/executable.h @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/interpreter/executable_base.h" #include "tensorflow/compiler/xla/service/service_executable_run_options.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/statusor.h" @@ -40,23 +41,20 @@ namespace interpreter { // Responsible for running a HLO graph through the HloEvaluator and output // buffer allocation. Refer to interpreter/README.md for more. -class InterpreterExecutable : public Executable { +class InterpreterExecutable : public InterpreterExecutableBase { public: InterpreterExecutable( std::unique_ptr hlo_module, std::unique_ptr evaluator, absl::optional dynamic_dymension_inference); - ~InterpreterExecutable() override; - - StatusOr ExecuteAsyncOnStream( - const ServiceExecutableRunOptions* run_options, - std::vector arguments, - HloExecutionProfile* hlo_execution_profile) override - TF_LOCKS_EXCLUDED(evaluator_lock_); static int64 ShapeSizeBytes(const Shape& shape); protected: + StatusOr Evaluate(const HloComputation& computation, + absl::Span arg_literals) override + TF_LOCKS_EXCLUDED(evaluator_lock_); + // The interpreter interprets executables with an HloEvaluator. std::unique_ptr evaluator_ TF_PT_GUARDED_BY(evaluator_lock_); mutable tensorflow::mutex evaluator_lock_; diff --git a/tensorflow/compiler/xla/service/interpreter/executable_base.cc b/tensorflow/compiler/xla/service/interpreter/executable_base.cc new file mode 100644 index 00000000000..5850cbf005b --- /dev/null +++ b/tensorflow/compiler/xla/service/interpreter/executable_base.cc @@ -0,0 +1,137 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/interpreter/executable_base.h" + +#include +#include + +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/shape_tree.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/stream_executor/lib/statusor.h" +#include "tensorflow/stream_executor/platform.h" +#include "tensorflow/stream_executor/stream.h" +#include "tensorflow/stream_executor/stream_executor_pimpl.h" + +namespace xla { +namespace interpreter { + +InterpreterExecutableBase::InterpreterExecutableBase( + std::unique_ptr hlo_module) + : Executable(std::move(hlo_module), /*hlo_profile_printer_data=*/nullptr, + /*hlo_profile_index_map=*/nullptr) {} + +StatusOr InterpreterExecutableBase::ExecuteAsyncOnStream( + const ServiceExecutableRunOptions* run_options, + std::vector arguments, + HloExecutionProfile* hlo_execution_profile) { + se::Stream* stream = run_options->stream(); + se::StreamExecutor* executor = stream->parent(); + const se::Platform* platform = executor->platform(); + + // Convert the ShapeTree to a ShapedBuffer. We do this so we can call + // TransferManager methods below. + std::vector argument_buffers; + argument_buffers.reserve(arguments.size()); + for (auto& argument : arguments) { + const ShapeTree& buffers = argument.Buffers(); + argument_buffers.push_back(ShapedBuffer(buffers.shape(), buffers.shape(), + /*platform=*/nullptr, + /*device_ordinal=*/0)); + auto in_it = buffers.begin(); + auto out_it = argument_buffers.back().buffers().begin(); + for (; in_it != buffers.end(); ++in_it, ++out_it) { + out_it->second = in_it->second.AsDeviceMemoryBase(); + } + } + + VLOG(1) << "Execute " << module().name(); + if (VLOG_IS_ON(2)) { + for (const auto& a : argument_buffers) { + VLOG(2) << "-- argument " << a; + } + } + + uint64 start_micros = tensorflow::Env::Default()->NowMicros(); + + const HloComputation* computation = module().entry_computation(); + if (computation->num_parameters() != arguments.size()) { + return tensorflow::errors::Internal( + "Mismatch between argument count and graph parameter count."); + } + + // Check that the args have the right shape. + for (int64 i = 0; i < computation->num_parameters(); ++i) { + const auto& expected_shape = computation->parameter_instruction(i)->shape(); + const auto& actual_shape = argument_buffers[i].on_device_shape(); + if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, + actual_shape)) { + return InvalidArgument( + "Shape mismatch on parameter %d. Expected %s, but was %s.", i, + ShapeUtil::HumanStringWithLayout(expected_shape), + ShapeUtil::HumanStringWithLayout(actual_shape)); + } + } + + TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager, + TransferManager::GetForPlatform(platform)); + + // Transform the ShapedBuffer arguments into literals which the evaluator + // consumes. + std::vector arg_literals; + for (int64 p = 0; p < computation->num_parameters(); ++p) { + TF_ASSIGN_OR_RETURN(Literal arg_literal, + transfer_manager->TransferLiteralFromDevice( + run_options->stream(), argument_buffers[p])); + arg_literals.push_back(std::move(arg_literal)); + } + + TF_ASSIGN_OR_RETURN(Literal result_literal, + Evaluate(*computation, arg_literals)); + + // Transform the result literal back into a ShapedBuffer. + TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result_buffers, + transfer_manager->AllocateScopedShapedBuffer( + result_literal.shape(), run_options->allocator(), + executor->device_ordinal())); + TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice( + run_options->stream(), result_literal, result_buffers)); + ExecutionOutput result(std::move(result_buffers)); + + uint64 end_micros = tensorflow::Env::Default()->NowMicros(); + + ExecutionProfile* profile = run_options->run_options().execution_profile(); + if (profile) { + const double nanoseconds = (end_micros - start_micros) * 1000.0; + profile->set_compute_time_ns(std::max(nanoseconds, 1.0)); + } + for (auto& argument : arguments) { + for (auto& index_buffer : *argument.MutableBuffers()) { + auto maybe_owning_buffer = index_buffer.second.Release(); + if (maybe_owning_buffer) { + result.AddToBeReleased(std::move(*maybe_owning_buffer)); + } + } + } + return std::move(result); +} + +} // namespace interpreter +} // namespace xla diff --git a/tensorflow/compiler/xla/service/interpreter/executable_base.h b/tensorflow/compiler/xla/service/interpreter/executable_base.h new file mode 100644 index 00000000000..a02ab7af8d0 --- /dev/null +++ b/tensorflow/compiler/xla/service/interpreter/executable_base.h @@ -0,0 +1,57 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTABLE_BASE_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTABLE_BASE_H_ + +#include + +#include "absl/types/optional.h" +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/service/dynamic_dimension_inference.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_execution_profile.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/service_executable_run_options.h" +#include "tensorflow/compiler/xla/shape.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/xla.pb.h" +namespace xla { +namespace interpreter { + +// Responsible for running a HLO graph through the HloEvaluator and output +// buffer allocation. Refer to interpreter/README.md for more. +class InterpreterExecutableBase : public Executable { + public: + explicit InterpreterExecutableBase(std::unique_ptr hlo_module); + + StatusOr ExecuteAsyncOnStream( + const ServiceExecutableRunOptions* run_options, + std::vector arguments, + HloExecutionProfile* hlo_execution_profile) override; + + protected: + virtual StatusOr Evaluate( + const HloComputation& computation, + absl::Span arg_literals) = 0; + + private: + TF_DISALLOW_COPY_AND_ASSIGN(InterpreterExecutableBase); +}; + +} // namespace interpreter +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTABLE_BASE_H_ diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h index 2279be7d2e5..3c35fda55f1 100644 --- a/tensorflow/compiler/xla/service/interpreter/executor.h +++ b/tensorflow/compiler/xla/service/interpreter/executor.h @@ -130,19 +130,19 @@ class XlaInterpreterExecutor : public internal::StreamExecutorInterface { std::function callback) override; port::Status AllocateEvent(Event *event) override { - return port::Status{port::error::UNIMPLEMENTED, ""}; + return port::Status::OK(); } port::Status DeallocateEvent(Event *event) override { - return port::Status{port::error::UNIMPLEMENTED, ""}; + return port::Status::OK(); } port::Status RecordEvent(Stream *stream, Event *event) override { - return port::Status{port::error::UNIMPLEMENTED, ""}; + return port::Status{port::error::UNIMPLEMENTED, "RecordEvent"}; } port::Status WaitForEvent(Stream *stream, Event *event) override { - return port::Status{port::error::UNIMPLEMENTED, ""}; + return port::Status{port::error::UNIMPLEMENTED, "WaitForEvent"}; } Event::Status PollForEventStatus(Event *event) override {