STT-tensorflow/tensorflow/compiler/xla/executable_run_options.h
Davide Libenzi 70e9708e23 Enable XRT cache to be shared among multiple GPU devices.
Allow XRT GPU work with multi-threaded based replication, where a single process see all the available devices.

PiperOrigin-RevId: 310376508
Change-Id: I25715feaf74ceca421ba8939405f58a0bf68ee59
2020-05-07 09:34:58 -07:00

174 lines
6.5 KiB
C++

/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_
#define TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_
#include <string>
#include "tensorflow/compiler/xla/types.h"
// These classes are forward declared so that ExecutableRunOptions can be linked
// into an XLA-compiled binary without having to link all of the pointed-to
// objects (e.g., for an ahead-of-time compiled CPU binary, the gpu tools don't
// need to be linked).
namespace stream_executor {
class Stream;
class Platform;
class DeviceMemoryAllocator;
} // namespace stream_executor
namespace Eigen {
struct ThreadPoolDevice;
} // namespace Eigen
namespace xla {
class DeviceAssignment;
class ExecutionProfile;
class GpuExecutableRunOptions;
// A unique identifier for a particular "logical execution" of an XLA model.
//
// A logical execution might encompass multiple executions of one or more
// HloModules. Runs that are part of the same logical execution can
// communicate via collective ops (e.g. kAllToAll), whereas runs that are part
// of different logical executions are isolated.
class RunId {
public:
// Creates a new, unique RunId.
RunId();
explicit RunId(int64 value) : data_(value) {}
RunId(const RunId&) = default;
RunId& operator=(const RunId&) = default;
friend bool operator==(const RunId& a, const RunId& b);
std::string ToString() const;
int64 ToInt() const;
template <typename H>
friend H AbslHashValue(H h, const RunId& id) {
return H::combine(std::move(h), id.data_);
}
private:
int64 data_;
};
// Callback used by the GPU backend only. This is an "one-sided" version of
// ThenDoHostCallback that enqueues a callback onto a stream. The difference
// with ThenDoHostCallback is that the device does not block waiting for the
// callback to complete; instead the callback is scheduled by the runtime.
// This functionality must be provided by the caller, and hence is provided in
// callback form.
using ThenExecuteFunction =
std::function<void(stream_executor::Stream*, std::function<void()>)>;
// Class containing options for running a LocalExecutable.
class ExecutableRunOptions {
public:
// Specifies the allocator to use during execution.
ExecutableRunOptions& set_allocator(
stream_executor::DeviceMemoryAllocator* allocator);
stream_executor::DeviceMemoryAllocator* allocator() const;
// If set, this is the device to run the computation on. Valid device_ordinal
// values are: 0 to # of devices - 1. These values are identical to the device
// ordinal values used by StreamExecutor. The device must be of the same type
// as the executable was compiled for. A value of -1 indicates this option has
// not been set.
ExecutableRunOptions& set_device_ordinal(int device_ordinal);
int device_ordinal() const;
// If set, this is the stream to run the computation on. The platform of the
// stream must match the platform the executable was built for. A value of
// nullptr indicates the option has not been set.
ExecutableRunOptions& set_stream(stream_executor::Stream* stream);
stream_executor::Stream* stream() const;
// If set, this is the stream to perform any pre-computation transfers on.
// The platform of the stream must match the platform the executable was
// built for. A value of nullptr indicates the option has not been set.
ExecutableRunOptions& set_host_to_device_stream(
stream_executor::Stream* stream);
stream_executor::Stream* host_to_device_stream() const;
// Sets the thread pool device on which to run Eigen subcomputations.
//
// This field must be set for XLA:CPU models that call Eigen routines, but may
// be null otherwise. Routines that use this field should always CHECK (or
// TF_RET_CHECK) that it's not null before dereferencing it, so that users get
// a clean crash rather than a segfault.
//
// Does not take ownership.
ExecutableRunOptions& set_intra_op_thread_pool(
const Eigen::ThreadPoolDevice* intra_op_thread_pool);
const Eigen::ThreadPoolDevice* intra_op_thread_pool() const;
// If set, profiling information is written to 'profile'.
ExecutionProfile* execution_profile() const;
ExecutableRunOptions& set_execution_profile(ExecutionProfile* profile);
ExecutableRunOptions& set_device_assignment(
const DeviceAssignment* device_assignment);
const DeviceAssignment* device_assignment() const;
ExecutableRunOptions& set_rng_seed(int rng_seed);
int rng_seed() const;
ExecutableRunOptions& set_launch_id(int32 launch_id) {
launch_id_ = launch_id;
return *this;
}
int32 launch_id() const { return launch_id_; }
ExecutableRunOptions& set_run_id(RunId id);
RunId run_id() const;
// See documentation on ThenExecuteFunction.
ExecutableRunOptions& set_then_execute_function(ThenExecuteFunction* f) {
then_execute_function_ = f;
return *this;
}
ThenExecuteFunction* then_execute_function() const {
return then_execute_function_;
}
// GPU-backend specific options. These are kept out-of-line to avoid bloating
// the size of this dependency for CPU-only AOT builds.
ExecutableRunOptions& set_gpu_executable_run_options(
const GpuExecutableRunOptions* gpu_executable_run_options);
const GpuExecutableRunOptions* gpu_executable_run_options() const;
private:
stream_executor::DeviceMemoryAllocator* allocator_ = nullptr;
int device_ordinal_ = -1;
const DeviceAssignment* device_assignment_ = nullptr;
stream_executor::Stream* stream_ = nullptr;
const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr;
ExecutionProfile* execution_profile_ = nullptr;
int rng_seed_ = 0;
int32 launch_id_ = 0;
stream_executor::Stream* host_to_device_stream_ = nullptr;
ThenExecuteFunction* then_execute_function_ = nullptr;
RunId run_id_;
const GpuExecutableRunOptions* gpu_executable_run_options_ = nullptr;
};
} // namespace xla
#endif // TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_