STT-tensorflow/tensorflow/compiler/xla/client/local_client.h
2017-03-28 07:32:10 -07:00

238 lines
9.5 KiB
C++

/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LOCAL_CLIENT_H_
#define TENSORFLOW_COMPILER_XLA_CLIENT_LOCAL_CLIENT_H_
#include <memory>
#include "tensorflow/compiler/xla/client/client.h"
#include "tensorflow/compiler/xla/client/computation.h"
#include "tensorflow/compiler/xla/executable_run_options.h"
#include "tensorflow/compiler/xla/service/compiler.h"
#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
#include "tensorflow/compiler/xla/service/executable.h"
#include "tensorflow/compiler/xla/service/local_service.h"
#include "tensorflow/compiler/xla/service/shaped_buffer.h"
#include "tensorflow/compiler/xla/statusor.h"
#include "tensorflow/compiler/xla/xla_data.pb.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
namespace xla {
// Class containing options for building an LocalExecutable with
// LocalClient::Compile.
class ExecutableBuildOptions {
public:
// If set, this is the platform to build the computation for. This must match
// the underlying platform of the service. A value of nullptr indicates the
// option has not been set.
//
// TODO(b/28616830): Support multiple platforms.
ExecutableBuildOptions& set_platform(perftools::gputools::Platform* platform);
perftools::gputools::Platform* platform() const;
// If set, this is the device to build the computation for. Valid
// device_ordinal values are: 0 to # of devices - 1. These values are
// identical to the device ordinal values used by StreamExecutor. The built
// executable will be executable on any device equivalent to the specified
// device as determined by Backend::devices_equivalent(). A value of -1
// indicates this option has not been set.
ExecutableBuildOptions& set_device_ordinal(int device_ordinal);
int device_ordinal() const;
// If set, this specifies the layout of the result of the computation. If not
// set, the service will chose the layout of the result. A Shape is used to
// store the layout to accomodate tuple result shapes. A value of nullptr
// indicates the option has not been set.
ExecutableBuildOptions& set_result_layout(const Shape& shape_with_layout);
const Shape* result_layout() const;
// If set, the executable will be built to output a hybrid
// ShapedBuffer with top-level tuple pointers in host memory and
// result buffers in device memory.
ExecutableBuildOptions& set_has_hybrid_result(bool has_hybrid_result);
bool has_hybrid_result() const;
private:
perftools::gputools::Platform* platform_ = nullptr;
int device_ordinal_ = -1;
Shape result_layout_;
bool result_layout_set_ = false;
bool has_hybrid_result_ = true;
};
class LocalExecutable {
public:
// Run the compiled computation with the given arguments and options and
// return the result.
StatusOr<std::unique_ptr<ShapedBuffer>> Run(
const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
const ExecutableRunOptions& options);
// Return the layout (contained in a shape) of the result produced by the
// computation.
const Shape& result_layout() const {
return executable_->module_config()
.entry_computation_layout()
.result_layout()
.shape();
}
// Return the options used to build the executable.
const ExecutableBuildOptions& build_options() const { return build_options_; }
// Return the built executable.
Executable* executable() const { return executable_.get(); }
private:
// Only a local client can construct these objects.
friend class LocalClient;
// Constructor invoked by LocalClient.
LocalExecutable(std::unique_ptr<Executable> executable, Backend* backend,
int device_ordinal,
const ExecutableBuildOptions& build_options);
// Validates that the given arguments and options satisfy various constraints
// of the computation.
tensorflow::Status ValidateExecutionOptions(
const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
const ExecutableRunOptions& options, const Backend& backend);
// Records the computation in a SessionModule proto with the arguments used to
// invoke it, and the result. Enabled by flag: --tla_dump_executions_to.
StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAndDump(
const ServiceExecutableRunOptions* run_options,
const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments);
// Records the arguments used to invoke the computation in a SessionModule
// proto.
tensorflow::Status RecordArguments(
const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
SessionModule* session_module);
// Records the result of the computation in a SessionModule proto.
tensorflow::Status RecordResult(const ShapedBuffer* result,
SessionModule* session_module);
// Copies the contents of a ShapedBuffer into a Literal proto.
tensorflow::Status LiteralFromShapedBuffer(const ShapedBuffer& shaped_buffer,
Literal* literal);
// Compiled computation.
std::unique_ptr<Executable> executable_;
// Execution backend.
Backend* backend_;
// The ordinal of the device which this executable was compiled for. The
// executable can run on all equivalent devices (as determined by
// Backend::devices_equivalent).
int build_device_ordinal_;
// Options used to build the executable.
const ExecutableBuildOptions& build_options_;
};
// An XLA service client object for use when the client and service run in
// the same process.
class LocalClient : public Client {
public:
explicit LocalClient(LocalService* service)
: Client(service), local_service_(service) {}
LocalClient(const LocalClient&) = delete;
void operator=(const LocalClient&) = delete;
// For an array of arguments held on the local service, validate
// that each is placed on the specified device_ordinal, and return
// the DeviceMemoryBase corresponding to each argument.
tensorflow::Status ResolveArguments(
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
int device_ordinal,
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs);
// Return a handle to a buffer large enough to hold shape, allocated
// on device_ordinal on the local service. If
// allocate_space_for_deep_copy, the buffer is large enough to hold
// all sub-buffers of a tuple shape, otherwise it is only as large
// as the top-level tuple pointer array.
StatusOr<std::unique_ptr<GlobalData>> AllocateBufferOnDevice(
const Shape& shape, int device_ordinal,
bool allocate_space_for_deep_copy);
// Build and return a LocalExecutable object. The executable is compiled using
// the given argument layouts and options.
StatusOr<std::unique_ptr<LocalExecutable>> Compile(
const Computation& computation,
const tensorflow::gtl::ArraySlice<const Shape*> argument_layouts,
const ExecutableBuildOptions& options);
// A description of a computation to compile using CompileAheadOfTime.
struct AheadOfTimeComputationInstance {
const Computation* computation;
// Inform the compiler of the expected layout for arguments.
std::vector<const Shape*> argument_layouts;
// Specifies the expected result layout.
const Shape* result_layout;
};
// Compiles a list of computations for ahead-of-time execution. This is
// intended for use in static compilation. The |options| parameter describes
// the target for which the compiler should emit code.
//
// TODO(b/31222190): This doesn't really belong in LocalClient. Move it to its
// own library.
StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
CompileAheadOfTime(
const tensorflow::gtl::ArraySlice<AheadOfTimeComputationInstance>
computations,
const AotCompilationOptions& options);
// Returns the size of a pointer in bytes for a given triple.
static int64 PointerSizeForTriple(tensorflow::StringPiece triple);
// Returns the platform that the underlying service targets.
perftools::gputools::Platform* platform() const;
// Returns the number of devices on the system of the service platform
// type. Not all devices may be supported by the service (see
// device_ordinal_supported method).
int device_count() const;
// Returns the default device ordinal that the service will run computations
// on if no device ordinal is specified in execute options.
int default_device_ordinal() const;
// Returns whether the device with the given ordinal can be used by the
// service to execute computations. Not all devices of a particular platform
// may be usable by the service (eg, a GPU with insufficient CUDA compute
// capability).
bool device_ordinal_supported(int device_ordinal) const;
// Returns the backend used to execute computations.
const Backend& backend() const;
Backend* mutable_backend();
private:
LocalService* local_service_;
};
} // namespace xla
#endif // TENSORFLOW_COMPILER_XLA_CLIENT_LOCAL_CLIENT_H_