Prefixing TensorFlow thread annotation macros with TF_.
PiperOrigin-RevId: 299110761 Change-Id: I66ecaa9d01dc441f091888bef3f24d220e9180c5
This commit is contained in:
parent
5536be153f
commit
83d65b152b
@ -774,7 +774,7 @@ extern "C" {
|
||||
static TF_OperationDescription* TF_NewOperationLocked(TF_Graph* graph,
|
||||
const char* op_type,
|
||||
const char* oper_name)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(graph->mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(graph->mu) {
|
||||
return new TF_OperationDescription(graph, op_type, oper_name);
|
||||
}
|
||||
|
||||
@ -1032,7 +1032,7 @@ void TF_SetAttrValueProto(TF_OperationDescription* desc, const char* attr_name,
|
||||
|
||||
static TF_Operation* TF_FinishOperationLocked(TF_OperationDescription* desc,
|
||||
TF_Status* status)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(desc->graph->mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(desc->graph->mu) {
|
||||
Node* ret = nullptr;
|
||||
|
||||
if (desc->graph->name_map.count(desc->node_builder.node_name())) {
|
||||
@ -1706,7 +1706,7 @@ static void GraphImportGraphDefLocked(TF_Graph* graph, const GraphDef& def,
|
||||
const TF_ImportGraphDefOptions* opts,
|
||||
TF_ImportGraphDefResults* tf_results,
|
||||
TF_Status* status)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(graph->mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(graph->mu) {
|
||||
const int last_node_id = graph->graph.num_node_ids();
|
||||
tensorflow::ImportGraphDefResults results;
|
||||
status->status = tensorflow::ImportGraphDef(opts->opts, def, &graph->graph,
|
||||
|
@ -51,7 +51,7 @@ Status ProcessInputs(
|
||||
const TF_Graph* fn_body, const char* fn_name, int ninputs,
|
||||
const TF_Output* inputs, std::vector<OutputTensor>* input_tensors,
|
||||
std::unordered_map<const Node*, std::vector<int>>* input_nodes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
|
||||
input_tensors->reserve(ninputs);
|
||||
for (int i = 0; i < ninputs; ++i) {
|
||||
Node* node = &inputs[i].oper->node;
|
||||
@ -87,7 +87,7 @@ Status ProcessInputs(
|
||||
Status ProcessOutputs(const TF_Graph* fn_body, const char* fn_name,
|
||||
int noutputs, const TF_Output* outputs,
|
||||
std::vector<OutputTensor>* output_tensors)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
|
||||
output_tensors->reserve(noutputs);
|
||||
for (int i = 0; i < noutputs; ++i) {
|
||||
Node* node = &outputs[i].oper->node;
|
||||
@ -111,7 +111,7 @@ Status ComputeBodyNodes(
|
||||
const TF_Operation* const* opers,
|
||||
const std::unordered_map<const Node*, std::vector<int>>& input_nodes,
|
||||
std::vector<const Node*>* body_nodes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
|
||||
if (num_opers == -1) {
|
||||
for (const Node* node : fn_body->graph.op_nodes()) {
|
||||
const auto& iter = input_nodes.find(node);
|
||||
|
@ -71,14 +71,14 @@ struct TF_Graph {
|
||||
TF_Graph();
|
||||
|
||||
tensorflow::mutex mu;
|
||||
tensorflow::Graph graph GUARDED_BY(mu);
|
||||
tensorflow::Graph graph TF_GUARDED_BY(mu);
|
||||
|
||||
// Runs shape inference.
|
||||
tensorflow::ShapeRefiner refiner GUARDED_BY(mu);
|
||||
tensorflow::ShapeRefiner refiner TF_GUARDED_BY(mu);
|
||||
|
||||
// Maps from name of an operation to the Node* in 'graph'.
|
||||
std::unordered_map<tensorflow::string, tensorflow::Node*> name_map
|
||||
GUARDED_BY(mu);
|
||||
TF_GUARDED_BY(mu);
|
||||
|
||||
// The keys of this map are all the active sessions using this graph. Each
|
||||
// value records whether the graph has been mutated since the corresponding
|
||||
@ -94,8 +94,8 @@ struct TF_Graph {
|
||||
// TODO(b/74949947): mutations currently trigger a warning instead of a bad
|
||||
// status, this should be reverted when possible.
|
||||
tensorflow::gtl::FlatMap<TF_Session*, tensorflow::string> sessions
|
||||
GUARDED_BY(mu);
|
||||
bool delete_requested GUARDED_BY(mu); // set true by TF_DeleteGraph
|
||||
TF_GUARDED_BY(mu);
|
||||
bool delete_requested TF_GUARDED_BY(mu); // set true by TF_DeleteGraph
|
||||
|
||||
// Used to link graphs contained in TF_WhileParams to the parent graph that
|
||||
// will eventually contain the full while loop.
|
||||
@ -123,7 +123,7 @@ struct TF_Session {
|
||||
tensorflow::Session* session;
|
||||
TF_Graph* const graph;
|
||||
|
||||
tensorflow::mutex mu ACQUIRED_AFTER(TF_Graph::mu);
|
||||
tensorflow::mutex mu TF_ACQUIRED_AFTER(TF_Graph::mu);
|
||||
int last_num_graph_nodes;
|
||||
|
||||
// If true, TF_SessionRun and similar methods will call
|
||||
@ -169,9 +169,9 @@ struct TF_ApiDefMap {
|
||||
}
|
||||
|
||||
#if !defined(IS_MOBILE_PLATFORM) && !defined(IS_SLIM_BUILD)
|
||||
tensorflow::ApiDefMap api_def_map GUARDED_BY(lock);
|
||||
tensorflow::ApiDefMap api_def_map TF_GUARDED_BY(lock);
|
||||
#endif // !defined(IS_MOBILE_PLATFORM) && !defined(IS_SLIM_BUILD)
|
||||
bool update_docs_called GUARDED_BY(lock);
|
||||
bool update_docs_called TF_GUARDED_BY(lock);
|
||||
tensorflow::mutex lock;
|
||||
};
|
||||
|
||||
@ -210,10 +210,10 @@ void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output,
|
||||
|
||||
void RecordMutation(TF_Graph* graph, const TF_Operation& op,
|
||||
const char* mutation_type)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(graph->mu);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(graph->mu);
|
||||
|
||||
bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status)
|
||||
LOCKS_EXCLUDED(session->graph->mu, session->mu);
|
||||
TF_LOCKS_EXCLUDED(session->graph->mu, session->mu);
|
||||
|
||||
std::string getTF_OutputDebugString(TF_Output node);
|
||||
|
||||
|
@ -41,7 +41,7 @@ class ClientSession::Impl {
|
||||
std::shared_ptr<Graph> graph_;
|
||||
|
||||
mutable mutex mu_;
|
||||
mutable int last_num_graph_nodes_ GUARDED_BY(mu_) = 0;
|
||||
mutable int last_num_graph_nodes_ TF_GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
||||
ClientSession::ClientSession(const Scope& scope, const string& target)
|
||||
|
@ -114,14 +114,14 @@ class Coordinator {
|
||||
condition_variable wait_for_stop_;
|
||||
|
||||
mutex mu_;
|
||||
bool should_stop_ GUARDED_BY(mu_);
|
||||
bool should_stop_ TF_GUARDED_BY(mu_);
|
||||
|
||||
mutex status_lock_;
|
||||
Status status_ GUARDED_BY(status_lock_);
|
||||
Status status_ TF_GUARDED_BY(status_lock_);
|
||||
|
||||
mutable mutex runners_lock_;
|
||||
std::vector<std::unique_ptr<RunnerInterface>> runners_
|
||||
GUARDED_BY(runners_lock_);
|
||||
TF_GUARDED_BY(runners_lock_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(Coordinator);
|
||||
};
|
||||
|
@ -119,8 +119,8 @@ class QueueRunner : public RunnerInterface {
|
||||
std::unique_ptr<thread::ThreadPool> thread_pool_;
|
||||
mutex mu_;
|
||||
int runs_ = 0;
|
||||
Status status_ GUARDED_BY(mu_);
|
||||
Status enqueue_status_ GUARDED_BY(mu_);
|
||||
Status status_ TF_GUARDED_BY(mu_);
|
||||
Status enqueue_status_ TF_GUARDED_BY(mu_);
|
||||
std::unique_ptr<BlockingCounter> counter_;
|
||||
|
||||
Coordinator* coord_;
|
||||
@ -131,7 +131,7 @@ class QueueRunner : public RunnerInterface {
|
||||
std::vector<std::function<void(Status)>> callbacks_;
|
||||
|
||||
mutable std::unique_ptr<mutex> cg_mu_;
|
||||
std::unique_ptr<CostGraphDef> cost_graph_ GUARDED_BY(cg_mu_);
|
||||
std::unique_ptr<CostGraphDef> cost_graph_ TF_GUARDED_BY(cg_mu_);
|
||||
RunOptions run_options_;
|
||||
};
|
||||
|
||||
|
@ -172,8 +172,9 @@ class XlaExecutableClosureStore {
|
||||
|
||||
private:
|
||||
mutex mutex_;
|
||||
int64 key_counter_ GUARDED_BY(mutex_);
|
||||
absl::flat_hash_map<KeyT, XlaExecutableClosure> closures_ GUARDED_BY(mutex_);
|
||||
int64 key_counter_ TF_GUARDED_BY(mutex_);
|
||||
absl::flat_hash_map<KeyT, XlaExecutableClosure> closures_
|
||||
TF_GUARDED_BY(mutex_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosureStore);
|
||||
};
|
||||
|
@ -165,7 +165,8 @@ class XlaCompileOp : public OpKernel {
|
||||
// error when compiling the cluster this _XlaCompile is supposed to compile.
|
||||
// If `cannot_compile_cluster_` is true then we avoid compiling this cluster
|
||||
// on any future calls to _XlaCompile.
|
||||
bool cannot_compile_cluster_ GUARDED_BY(cannot_compile_cluster_mu_) = false;
|
||||
bool cannot_compile_cluster_ TF_GUARDED_BY(cannot_compile_cluster_mu_) =
|
||||
false;
|
||||
|
||||
mutex cannot_compile_cluster_mu_;
|
||||
};
|
||||
|
@ -18,13 +18,15 @@ limitations under the License.
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "tensorflow/compiler/jit/xla_activity.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/platform/thread_annotations.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
// The list of all registered `XlaActivityListener`s.
|
||||
struct XlaActivityListenerList {
|
||||
absl::Mutex mutex;
|
||||
std::vector<std::unique_ptr<XlaActivityListener>> listeners GUARDED_BY(mutex);
|
||||
std::vector<std::unique_ptr<XlaActivityListener>> listeners
|
||||
TF_GUARDED_BY(mutex);
|
||||
};
|
||||
|
||||
void FlushAllListeners();
|
||||
|
@ -151,19 +151,19 @@ class XlaCompilationCache : public ResourceBase {
|
||||
int64 request_count = 0;
|
||||
|
||||
// Did compilation succeed?
|
||||
Status compilation_status GUARDED_BY(mu);
|
||||
Status compilation_status TF_GUARDED_BY(mu);
|
||||
|
||||
// Output of the XlaCompiler.
|
||||
XlaCompiler::CompilationResult compilation_result GUARDED_BY(mu);
|
||||
XlaCompiler::CompilationResult compilation_result TF_GUARDED_BY(mu);
|
||||
|
||||
// The XLA executable compiled from <computation>. May be null if no
|
||||
// executable has been built.
|
||||
std::unique_ptr<xla::LocalExecutable> executable GUARDED_BY(mu);
|
||||
std::unique_ptr<xla::LocalExecutable> executable TF_GUARDED_BY(mu);
|
||||
};
|
||||
|
||||
mutex compile_cache_mu_;
|
||||
absl::flat_hash_map<Signature, std::unique_ptr<Entry>, Signature::Hash> cache_
|
||||
GUARDED_BY(compile_cache_mu_);
|
||||
TF_GUARDED_BY(compile_cache_mu_);
|
||||
|
||||
struct ClusterCompileStats {
|
||||
// Number of times the cluster has been (re-)compiled.
|
||||
@ -185,7 +185,7 @@ class XlaCompilationCache : public ResourceBase {
|
||||
|
||||
// Maps cluster names to compilation statistics for said cluster.
|
||||
absl::flat_hash_map<string, ClusterCompileStats> cluster_compile_stats_
|
||||
GUARDED_BY(cluster_compile_stats_mu_);
|
||||
TF_GUARDED_BY(cluster_compile_stats_mu_);
|
||||
|
||||
// The number of times a lazy compilation must be requested for a specific
|
||||
// signature before we attempt to compile it.
|
||||
|
@ -83,7 +83,7 @@ class XlaDeviceAllocatorState {
|
||||
std::unordered_map<std::pair<const xla::Backend*, int>,
|
||||
std::unique_ptr<XlaDeviceAllocator>,
|
||||
hash<std::pair<const xla::Backend*, int>>>
|
||||
allocators_ GUARDED_BY(allocator_mutex_);
|
||||
allocators_ TF_GUARDED_BY(allocator_mutex_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(XlaDeviceAllocatorState);
|
||||
};
|
||||
|
@ -137,7 +137,7 @@ class XlaDevice : public LocalDevice {
|
||||
~XlaDevice() override;
|
||||
|
||||
Allocator* GetAllocator(AllocatorAttributes attr) override
|
||||
LOCKS_EXCLUDED(mu_);
|
||||
TF_LOCKS_EXCLUDED(mu_);
|
||||
void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
|
||||
void ComputeAsync(AsyncOpKernel* op_kernel, OpKernelContext* context,
|
||||
AsyncOpKernel::DoneCallback done) override;
|
||||
@ -145,18 +145,18 @@ class XlaDevice : public LocalDevice {
|
||||
void Sync(const DoneCallback& done) override;
|
||||
|
||||
Status TryGetDeviceContext(DeviceContext** out_context) override
|
||||
LOCKS_EXCLUDED(mu_);
|
||||
TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
Status MakeTensorFromProto(const TensorProto& tensor_proto,
|
||||
const AllocatorAttributes alloc_attrs,
|
||||
Tensor* tensor) override LOCKS_EXCLUDED(mu_);
|
||||
Tensor* tensor) override TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
// Allocate tensor on fast memory space. This is only applied to the new TPU
|
||||
// hardware which has faster read/write memory. If the hardware doesn't
|
||||
// have such memory space, we fallback to the ordinary memory space.
|
||||
Status MakeFastMemTensorFromProto(const TensorProto& tensor_proto,
|
||||
const AllocatorAttributes alloc_attrs,
|
||||
Tensor* tensor) LOCKS_EXCLUDED(mu_);
|
||||
Tensor* tensor) TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
const Metadata& metadata() { return xla_metadata_; }
|
||||
|
||||
@ -166,34 +166,35 @@ class XlaDevice : public LocalDevice {
|
||||
//
|
||||
// TODO(b/111859745): The Eager context needs to call this method to recover
|
||||
// from failures.
|
||||
Status EnsureDeviceContextOk() LOCKS_EXCLUDED(mu_);
|
||||
Status EnsureDeviceContextOk() TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
// Instructs this XlaDevice to set a GpuDeviceInfo, which holds extra
|
||||
// information for GPU and TPU devices.
|
||||
Status UseGpuDeviceInfo() LOCKS_EXCLUDED(mu_);
|
||||
Status UseGpuDeviceInfo() TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
// Instructs this XlaDevice to return 'sync_on_completion' for
|
||||
// AllowsSyncOnCompletion().
|
||||
void SetAllowsSyncOnCompletion(bool sync_on_completion) LOCKS_EXCLUDED(mu_);
|
||||
bool AllowsSyncOnCompletion() const override LOCKS_EXCLUDED(mu_);
|
||||
void SetAllowsSyncOnCompletion(bool sync_on_completion)
|
||||
TF_LOCKS_EXCLUDED(mu_);
|
||||
bool AllowsSyncOnCompletion() const override TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
// Installs an error handling callback when RefreshStatus sees !status.ok().
|
||||
void SetHandleDeviceErrorCallback(std::function<Status()> callback);
|
||||
|
||||
Status RefreshStatus() override LOCKS_EXCLUDED(mu_);
|
||||
Status RefreshStatus() override TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
private:
|
||||
xla::StatusOr<xla::LocalClient*> GetOrCreateClient() const;
|
||||
Allocator* GetAllocatorLocked(AllocatorAttributes attr)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
Status EnsureStreamOkLocked(xla::Backend* backend, const string& name,
|
||||
std::shared_ptr<se::Stream>* stream,
|
||||
bool* stream_was_changed)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// Return a pair of device context, the second one is fast_mem device context.
|
||||
xla::StatusOr<std::pair<XlaDeviceContext*, XlaDeviceContext*>>
|
||||
GetDeviceContextLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
GetDeviceContextLocked() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
static Status GetMetadataFromDevice(DeviceBase* device,
|
||||
const XlaDevice::Metadata** metadata);
|
||||
@ -218,13 +219,13 @@ class XlaDevice : public LocalDevice {
|
||||
// Intra-op threads to spawn (from SessionOptions).
|
||||
const int intra_op_parallelism_threads_;
|
||||
// Memory allocator associated with this device.
|
||||
Allocator* xla_allocator_ GUARDED_BY(mu_) = nullptr; // Not owned.
|
||||
Allocator* xla_allocator_ TF_GUARDED_BY(mu_) = nullptr; // Not owned.
|
||||
|
||||
// Stream associated with this device. Operations enqueued on this
|
||||
// stream are executed on the device. Operations include data
|
||||
// copying back and forth between CPU and the device, and
|
||||
// computations enqueued by XLA.
|
||||
std::shared_ptr<se::Stream> stream_ GUARDED_BY(mu_);
|
||||
std::shared_ptr<se::Stream> stream_ TF_GUARDED_BY(mu_);
|
||||
// If false, only stream_ is valid and all computation and transfers use
|
||||
// stream_. If true, computation is performed by stream_ and transfers are
|
||||
// performed by host_to_device/device_to_device stream or borrowing a stream
|
||||
@ -232,36 +233,36 @@ class XlaDevice : public LocalDevice {
|
||||
const bool use_multiple_streams_;
|
||||
// If use_multiple_streams_, host to device transfers are performed using this
|
||||
// stream.
|
||||
std::shared_ptr<se::Stream> host_to_device_stream_ GUARDED_BY(mu_);
|
||||
std::shared_ptr<se::Stream> host_to_device_stream_ TF_GUARDED_BY(mu_);
|
||||
// If use_multiple_streams_, transfers between different devices are performed
|
||||
// using these streams.
|
||||
std::vector<std::shared_ptr<se::Stream>> device_to_device_streams_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
|
||||
const XlaCompiler::ShapeRepresentationFn shape_representation_fn_;
|
||||
|
||||
// The device context accessed by all users of the XlaDevice, set by calls to
|
||||
// EnsureDeviceContextOk. If gpu_device_info_ is non-null, this pointer is
|
||||
// also filled in to that struct. XlaDeviceContext is a ref-counted object.
|
||||
XlaDeviceContext* device_context_ GUARDED_BY(mu_) = nullptr;
|
||||
XlaDeviceContext* device_context_ TF_GUARDED_BY(mu_) = nullptr;
|
||||
|
||||
// The device context will allocate memory on fast memory space on TPU.
|
||||
// XlaDeviceContext is a ref-counted object.
|
||||
XlaDeviceContext* fast_mem_device_context_ GUARDED_BY(mu_) = nullptr;
|
||||
XlaDeviceContext* fast_mem_device_context_ TF_GUARDED_BY(mu_) = nullptr;
|
||||
|
||||
// Holds extra information for GPU and TPU devices, e.g. the device context.
|
||||
bool use_gpu_device_info_ GUARDED_BY(mu_) = false;
|
||||
std::unique_ptr<GpuDeviceInfo> gpu_device_info_ GUARDED_BY(mu_);
|
||||
bool use_gpu_device_info_ TF_GUARDED_BY(mu_) = false;
|
||||
std::unique_ptr<GpuDeviceInfo> gpu_device_info_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// Thread pool used for running closures
|
||||
std::unique_ptr<thread::ThreadPool> thread_pool_;
|
||||
|
||||
// True if the device allows XlaDevice::Sync to be called on completion
|
||||
// regardless of status.
|
||||
bool sync_on_completion_ GUARDED_BY(mu_) = true;
|
||||
bool sync_on_completion_ TF_GUARDED_BY(mu_) = true;
|
||||
|
||||
// A callback that will be invoked when RefreshStatus sees a status error.
|
||||
std::function<Status()> device_error_callback_ GUARDED_BY(mu_);
|
||||
std::function<Status()> device_error_callback_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// Set of devices to use. This controls which of the devices on the given
|
||||
// platform will have resources allocated. For GPUs this will be
|
||||
|
@ -117,7 +117,7 @@ class XlaDeviceContext : public DeviceContext {
|
||||
bool use_fast_mem_;
|
||||
|
||||
absl::Mutex mu_;
|
||||
int next_stream_ GUARDED_BY(mu_) = 0;
|
||||
int next_stream_ TF_GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -18,7 +18,6 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_
|
||||
#define TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_
|
||||
|
||||
#include "absl/base/thread_annotations.h"
|
||||
#include "tensorflow/compiler/jit/xla_compilation_cache.h"
|
||||
#include "tensorflow/compiler/jit/xla_tensor.h"
|
||||
#include "tensorflow/compiler/tf2xla/xla_compiler.h"
|
||||
@ -30,6 +29,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
#include "tensorflow/core/platform/thread_annotations.h"
|
||||
#include "tensorflow/stream_executor/device_memory_allocator.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -102,7 +102,7 @@ class VariableInfo {
|
||||
// `variables` is allowed to contain instances that don't track a resource
|
||||
// variable (i.e. variables[i].var() can be null for some i).
|
||||
Status LockVariables(absl::Span<VariableInfo> variables)
|
||||
EXCLUSIVE_LOCK_FUNCTION();
|
||||
TF_EXCLUSIVE_LOCK_FUNCTION();
|
||||
|
||||
// Helper class to perform the marshalling of TensorFlow inputs and outputs to
|
||||
// ShapedBuffers suitable for passing to an XLA computation.
|
||||
|
@ -122,7 +122,7 @@ class XlaTensor {
|
||||
std::shared_ptr<se::Event> definition_event_;
|
||||
// A list of all streams for which the tensor's content is defined for any
|
||||
// newly enqueued command.
|
||||
absl::InlinedVector<se::Stream*, 2> streams_defined_on_ GUARDED_BY(mu_);
|
||||
absl::InlinedVector<se::Stream*, 2> streams_defined_on_ TF_GUARDED_BY(mu_);
|
||||
mutex mu_;
|
||||
};
|
||||
|
||||
|
@ -47,7 +47,7 @@ class LoggerRegistryImpl : public LoggerRegistry {
|
||||
private:
|
||||
mutable mutex mu_;
|
||||
mutable std::unordered_map<string, std::unique_ptr<nvinfer1::ILogger>>
|
||||
registry_ GUARDED_BY(mu_);
|
||||
registry_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
LoggerRegistry* GetLoggerRegistry() {
|
||||
|
@ -71,7 +71,7 @@ class CreateTRTResourceHandle : public OpKernel {
|
||||
string resource_name_;
|
||||
Tensor handle_;
|
||||
mutex mutex_;
|
||||
bool initialized_ GUARDED_BY(mutex_) = false;
|
||||
bool initialized_ TF_GUARDED_BY(mutex_) = false;
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(CreateTRTResourceHandle);
|
||||
};
|
||||
|
@ -136,7 +136,7 @@ struct EngineContext {
|
||||
TrtUniquePtrType<nvinfer1::ICudaEngine> cuda_engine;
|
||||
|
||||
Status GetExecutionContext(int idx, nvinfer1::IExecutionContext** exec_ctx)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
if (idx >= execution_context.size()) {
|
||||
return errors::Internal("Requested engine context with index ", idx,
|
||||
", but only ", execution_context.size(),
|
||||
@ -152,7 +152,7 @@ struct EngineContext {
|
||||
// for inference at a time therefore we need a mutex. More details at
|
||||
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-best-practices/index.html#thread-safety
|
||||
std::vector<TrtUniquePtrType<nvinfer1::IExecutionContext>> execution_context
|
||||
GUARDED_BY(mu);
|
||||
TF_GUARDED_BY(mu);
|
||||
};
|
||||
|
||||
// Contains the context required to build the calibration data.
|
||||
@ -174,8 +174,8 @@ class CalibrationContext {
|
||||
|
||||
private:
|
||||
mutex mu_;
|
||||
bool terminated_ GUARDED_BY(mu_) = false;
|
||||
std::string calibration_table_ GUARDED_BY(mu_);
|
||||
bool terminated_ TF_GUARDED_BY(mu_) = false;
|
||||
std::string calibration_table_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
ABSL_CONST_INIT extern const absl::string_view kTfTrtContainerName;
|
||||
|
@ -229,11 +229,11 @@ class XlaOpRegistry {
|
||||
};
|
||||
|
||||
// Map from compilation device names to a description of the backend.
|
||||
std::unordered_map<string, Backend> backends_ GUARDED_BY(mutex_);
|
||||
std::unordered_map<string, Backend> backends_ TF_GUARDED_BY(mutex_);
|
||||
|
||||
// Map from Tensorflow device names to the corresponding JIT device metadata.
|
||||
std::unordered_map<string, DeviceRegistration> compilation_devices_
|
||||
GUARDED_BY(mutex_);
|
||||
TF_GUARDED_BY(mutex_);
|
||||
|
||||
// A description of a Tensorflow operator that can be compiled to XLA.
|
||||
struct OpRegistration {
|
||||
@ -292,7 +292,7 @@ class XlaOpRegistry {
|
||||
// Registrations present under the same key must satisfy IsCompatible above,
|
||||
// and this is checked during registration.
|
||||
std::unordered_map<string, std::vector<std::unique_ptr<OpRegistration>>> ops_
|
||||
GUARDED_BY(mutex_);
|
||||
TF_GUARDED_BY(mutex_);
|
||||
|
||||
// Have we already registered the JIT kernels on the JIT devices?
|
||||
bool jit_kernels_registered_ = false;
|
||||
@ -301,7 +301,7 @@ class XlaOpRegistry {
|
||||
// registrations created by RegisterCompilationKernels() and
|
||||
// RegisterDeviceKernels().
|
||||
std::vector<std::unique_ptr<kernel_factory::OpKernelRegistrar>>
|
||||
kernel_registrars_ GUARDED_BY(mutex_);
|
||||
kernel_registrars_ TF_GUARDED_BY(mutex_);
|
||||
};
|
||||
|
||||
// REGISTER_XLA_OP() registers an XLA OpKernel by name, for example:
|
||||
|
@ -134,10 +134,10 @@ class ClientLibrary {
|
||||
|
||||
tensorflow::mutex service_mutex_; // Guards the singleton creation state.
|
||||
std::unordered_map<se::Platform::Id, std::unique_ptr<LocalInstance>>
|
||||
local_instances_ GUARDED_BY(service_mutex_);
|
||||
local_instances_ TF_GUARDED_BY(service_mutex_);
|
||||
|
||||
std::unordered_map<se::Platform::Id, std::unique_ptr<CompileOnlyInstance>>
|
||||
compile_only_instances_ GUARDED_BY(service_mutex_);
|
||||
compile_only_instances_ TF_GUARDED_BY(service_mutex_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ClientLibrary);
|
||||
};
|
||||
|
@ -68,7 +68,7 @@ class EventPool {
|
||||
const bool allow_reuse_;
|
||||
|
||||
absl::Mutex mu_;
|
||||
std::stack<std::unique_ptr<se::Event>> free_events_ GUARDED_BY(mu_);
|
||||
std::stack<std::unique_ptr<se::Event>> free_events_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace xla
|
||||
|
@ -237,7 +237,7 @@ class PyLocalBuffer {
|
||||
const Shape on_device_shape_;
|
||||
const std::shared_ptr<Device> device_;
|
||||
mutable absl::Mutex mu_;
|
||||
std::shared_ptr<SharedDeviceBuffer> device_buffer_ GUARDED_BY(mu_);
|
||||
std::shared_ptr<SharedDeviceBuffer> device_buffer_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// The cached value of the buffer on the host, produced either from a call to
|
||||
// CopyToHost or from a call to ToLiteral. Once a value has been fetched to
|
||||
@ -249,7 +249,7 @@ class PyLocalBuffer {
|
||||
Status status;
|
||||
std::shared_ptr<Literal> value;
|
||||
};
|
||||
std::shared_ptr<HostValue> host_value_ GUARDED_BY(mu_);
|
||||
std::shared_ptr<HostValue> host_value_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
// Represents a compiled computation that can be executed given handles to
|
||||
|
@ -129,12 +129,12 @@ class LocalDeviceState {
|
||||
static constexpr int kNumDeviceToDeviceStreams = 4;
|
||||
|
||||
absl::Mutex mu_;
|
||||
int next_device_to_host_stream_ GUARDED_BY(mu_) = 0;
|
||||
int next_device_to_device_stream_ GUARDED_BY(mu_) = 0;
|
||||
int next_device_to_host_stream_ TF_GUARDED_BY(mu_) = 0;
|
||||
int next_device_to_device_stream_ TF_GUARDED_BY(mu_) = 0;
|
||||
|
||||
std::random_device prng_seed_device_ GUARDED_BY(mu_);
|
||||
std::mt19937 prng_seed_generator_ GUARDED_BY(mu_);
|
||||
std::uniform_int_distribution<> prng_seed_distribution_ GUARDED_BY(mu_);
|
||||
std::random_device prng_seed_device_ TF_GUARDED_BY(mu_);
|
||||
std::mt19937 prng_seed_generator_ TF_GUARDED_BY(mu_);
|
||||
std::uniform_int_distribution<> prng_seed_distribution_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// Callback stream is used for running short host-side callbacks after device
|
||||
// side events, without preventing the device-side stream from doing useful
|
||||
|
@ -66,7 +66,7 @@ class BufferDefinitionEvent {
|
||||
void WaitForEventOnStream(se::Stream* stream);
|
||||
|
||||
private:
|
||||
bool EventHasBeenRecorded() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
bool EventHasBeenRecorded() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// An event that is triggered when the content of one or more buffers is
|
||||
// ready. If this event is nullptr, it is assumed that the buffer's content is
|
||||
@ -77,7 +77,7 @@ class BufferDefinitionEvent {
|
||||
|
||||
// A list of all streams for which the buffer's content is known to be defined
|
||||
// at the tail of the queue, i.e., for any newly enqueued command.
|
||||
absl::InlinedVector<se::Stream*, 2> streams_defined_on_ GUARDED_BY(mu_);
|
||||
absl::InlinedVector<se::Stream*, 2> streams_defined_on_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
// Class that represents a node in a reference-counted DAG of device buffers.
|
||||
|
@ -241,8 +241,9 @@ class PyTpuBuffer {
|
||||
// `child_buffers_` stores the child buffers; else, `device_buffer_` stores
|
||||
// the data content and `child_buffers_` is empty.
|
||||
mutable absl::Mutex mu_;
|
||||
std::shared_ptr<TpuSharedBuffer> device_buffer_ GUARDED_BY(mu_);
|
||||
std::vector<std::shared_ptr<TpuSharedBuffer>> child_buffers_ GUARDED_BY(mu_);
|
||||
std::shared_ptr<TpuSharedBuffer> device_buffer_ TF_GUARDED_BY(mu_);
|
||||
std::vector<std::shared_ptr<TpuSharedBuffer>> child_buffers_
|
||||
TF_GUARDED_BY(mu_);
|
||||
// The cached value of the buffer on the host, produced either from a call to
|
||||
// CopyToHost or from a call to ToLiteral. Once a value has been fetched to
|
||||
// the host, it persists Delete() is called or the PyTpuBuffer is destroyed.
|
||||
@ -255,7 +256,7 @@ class PyTpuBuffer {
|
||||
Status status;
|
||||
std::shared_ptr<Literal> value;
|
||||
};
|
||||
std::shared_ptr<HostValue> host_value_ GUARDED_BY(mu_);
|
||||
std::shared_ptr<HostValue> host_value_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
// Represents a compiled computation that can be executed given handles to
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/thread_annotations.h"
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/container/inlined_vector.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
|
@ -40,11 +40,11 @@ class WorkerThread {
|
||||
void Schedule(std::function<void()> fn);
|
||||
|
||||
private:
|
||||
bool WorkAvailable() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
bool WorkAvailable() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
void WorkLoop();
|
||||
|
||||
absl::Mutex mu_;
|
||||
std::queue<std::function<void()>> work_queue_ GUARDED_BY(mu_);
|
||||
std::queue<std::function<void()>> work_queue_ TF_GUARDED_BY(mu_);
|
||||
|
||||
std::unique_ptr<tensorflow::Thread> thread_;
|
||||
};
|
||||
|
@ -73,7 +73,7 @@ namespace {
|
||||
|
||||
struct Uniquer {
|
||||
absl::Mutex mu;
|
||||
NameUniquer name_uniquer GUARDED_BY(mu);
|
||||
NameUniquer name_uniquer TF_GUARDED_BY(mu);
|
||||
};
|
||||
|
||||
Uniquer* GetUniquer() {
|
||||
|
@ -87,7 +87,7 @@ class AllocationTracker {
|
||||
// Internal helper which resolves the given GlobalDataHandle to a
|
||||
// list of ScopedShapedBuffers.
|
||||
StatusOr<std::vector<const ShapedBuffer*>> ResolveInternal(
|
||||
const GlobalDataHandle& data) const EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
const GlobalDataHandle& data) const TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Internal helper which registers a vector of shaped buffers, one per
|
||||
// replica. ShapedBufferTy is either ScopedShapedBuffer or ShapedBuffer. If
|
||||
@ -96,18 +96,19 @@ class AllocationTracker {
|
||||
template <typename ShapedBufferTy>
|
||||
StatusOr<GlobalDataHandle> RegisterInternal(
|
||||
std::vector<ShapedBufferTy> replicated_buffers, const string& tag)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Adds the given device address to the allocation tracker, or if it already
|
||||
// exists, then increment its reference count.
|
||||
void AddAllocationOrIncrementRefCount(se::DeviceMemoryBase device_memory,
|
||||
int device_ordinal)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Decrements the reference count of the given device memory. Then, if it is
|
||||
// zero, deallocate the memory.
|
||||
Status DecrementRefCount(se::DeviceMemoryBase device_memory,
|
||||
int device_ordinal) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
int device_ordinal)
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// A map from device memory opaque value to allocation. One such map is
|
||||
// maintained per device ordinal.
|
||||
@ -121,11 +122,11 @@ class AllocationTracker {
|
||||
|
||||
// The next handle to assign to an allocation, guarded by the same mutex as
|
||||
// the mapping as they'll be mutated at the same time.
|
||||
int64 next_handle_ GUARDED_BY(mutex_);
|
||||
int64 next_handle_ TF_GUARDED_BY(mutex_);
|
||||
|
||||
// A map from device ordinal to AllocationMap.
|
||||
absl::flat_hash_map<int, AllocationMap> opaque_to_allocation_map_
|
||||
GUARDED_BY(mutex_);
|
||||
TF_GUARDED_BY(mutex_);
|
||||
|
||||
// A map from data handle to a vector of shaped buffers that represent the
|
||||
// buffers for different replicas.
|
||||
@ -145,7 +146,7 @@ class AllocationTracker {
|
||||
// free'd when both the view *and* the original tuple are Unregistered. This
|
||||
// refcounting is managed in opaque_to_allocation_map_.
|
||||
absl::flat_hash_map<int64, std::vector<std::unique_ptr<ShapedBuffer>>>
|
||||
handle_to_shaped_buffers_ GUARDED_BY(mutex_);
|
||||
handle_to_shaped_buffers_ TF_GUARDED_BY(mutex_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(AllocationTracker);
|
||||
};
|
||||
|
@ -176,7 +176,7 @@ class Backend {
|
||||
|
||||
// Mapping from stream executor to stream pools, used by `BorrowStream` above.
|
||||
absl::flat_hash_map<se::StreamExecutor*, std::unique_ptr<StreamPool>>
|
||||
stream_pools_ GUARDED_BY(mu_);
|
||||
stream_pools_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// The default memory allocator to use.
|
||||
std::unique_ptr<se::StreamExecutorMemoryAllocator> memory_allocator_;
|
||||
|
@ -68,24 +68,24 @@ class ChannelTracker {
|
||||
// Bumps the next_channel_ number and returns the allocated number
|
||||
// wrapped in a ChannelHandle.
|
||||
ChannelHandle AllocateHandle(ChannelHandle::ChannelType type)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(channel_mutex_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(channel_mutex_);
|
||||
|
||||
Status RegisterSendInternal(const ChannelHandle& handle)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(channel_mutex_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(channel_mutex_);
|
||||
|
||||
Status RegisterRecvInternal(const ChannelHandle& handle)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(channel_mutex_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(channel_mutex_);
|
||||
|
||||
// Guards the channel mapping.
|
||||
tensorflow::mutex channel_mutex_;
|
||||
|
||||
// The next sequence number to assign to a channel.
|
||||
int64 next_channel_ GUARDED_BY(channel_mutex_);
|
||||
int64 next_channel_ TF_GUARDED_BY(channel_mutex_);
|
||||
|
||||
// Mapping from ChannelHandle value to the corresponding registered
|
||||
// Channel object.
|
||||
absl::flat_hash_map<int64, Channel> opaque_to_channel_
|
||||
GUARDED_BY(channel_mutex_);
|
||||
TF_GUARDED_BY(channel_mutex_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ChannelTracker);
|
||||
};
|
||||
|
@ -241,9 +241,9 @@ class Rendezvous {
|
||||
|
||||
tensorflow::mutex mu_;
|
||||
|
||||
bool initialized_ GUARDED_BY(mu_) = false;
|
||||
bool initialized_ TF_GUARDED_BY(mu_) = false;
|
||||
|
||||
std::vector<AllReduceParticipantData> participants_ GUARDED_BY(mu_);
|
||||
std::vector<AllReduceParticipantData> participants_ TF_GUARDED_BY(mu_);
|
||||
|
||||
private:
|
||||
// Runs the all-reduce on the given thread. If successful, returns
|
||||
|
@ -51,7 +51,7 @@ class CompilationCache {
|
||||
using CacheKey = int64;
|
||||
|
||||
absl::flat_hash_map<CacheKey, std::shared_ptr<Executable>> cache_
|
||||
GUARDED_BY(mutex_);
|
||||
TF_GUARDED_BY(mutex_);
|
||||
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(CompilationCache);
|
||||
|
@ -23,7 +23,7 @@ namespace orc_jit_memory_mapper {
|
||||
|
||||
static tensorflow::mutex mapper_instance_mutex(tensorflow::LINKER_INITIALIZED);
|
||||
static llvm::SectionMemoryManager::MemoryMapper* mapper_instance
|
||||
GUARDED_BY(mapper_instance_mutex) = nullptr;
|
||||
TF_GUARDED_BY(mapper_instance_mutex) = nullptr;
|
||||
|
||||
llvm::SectionMemoryManager::MemoryMapper* GetInstance() {
|
||||
tensorflow::mutex_lock lock(mapper_instance_mutex);
|
||||
|
@ -274,7 +274,7 @@ static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
|
||||
// dies. But we only add an entry if dumping is enabled for this module, and
|
||||
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster
|
||||
// than this hashtable leaks memory.
|
||||
static auto& module_id_to_step_number GUARDED_BY(mu) =
|
||||
static auto& module_id_to_step_number TF_GUARDED_BY(mu) =
|
||||
*new absl::flat_hash_map<int64, int64>();
|
||||
|
||||
// Maps a module's unique ID to a timestamp indicating when we've first dumped
|
||||
@ -285,7 +285,7 @@ static auto& module_id_to_step_number GUARDED_BY(mu) =
|
||||
// dies. But we only add an entry if dumping is enabled for this module, and
|
||||
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster
|
||||
// than this hashtable leaks memory.
|
||||
static auto& module_id_to_timestamp GUARDED_BY(mu) =
|
||||
static auto& module_id_to_timestamp TF_GUARDED_BY(mu) =
|
||||
*new absl::flat_hash_map<int64, uint64>();
|
||||
|
||||
int64 StepNumberForModule(const HloModule& module) {
|
||||
@ -432,7 +432,7 @@ void DumpHloSnapshotIfEnabled(const HloModule& module,
|
||||
int64 execution_count;
|
||||
uint64 timestamp;
|
||||
{
|
||||
static auto& module_id_to_execution_count GUARDED_BY(mu) =
|
||||
static auto& module_id_to_execution_count TF_GUARDED_BY(mu) =
|
||||
*new absl::flat_hash_map<int64, int64>();
|
||||
tensorflow::mutex_lock lock(mu);
|
||||
execution_count = module_id_to_execution_count[module.unique_id()]++;
|
||||
@ -469,7 +469,7 @@ void DumpHloSnapshotIfEnabled(const HloSnapshot& snapshot,
|
||||
// have to use its name.
|
||||
int64 execution_count;
|
||||
{
|
||||
static auto& module_name_to_execution_count GUARDED_BY(mu) =
|
||||
static auto& module_name_to_execution_count TF_GUARDED_BY(mu) =
|
||||
*new absl::flat_hash_map<string, int64>();
|
||||
tensorflow::mutex_lock lock(mu);
|
||||
execution_count = module_name_to_execution_count[name]++;
|
||||
|
@ -86,12 +86,12 @@ class ExecutionTracker {
|
||||
|
||||
private:
|
||||
// The next handle to assign to an execution.
|
||||
int64 next_handle_ GUARDED_BY(execution_mutex_);
|
||||
int64 next_handle_ TF_GUARDED_BY(execution_mutex_);
|
||||
|
||||
// Mapping from ExecutionHandle handle to the corresponding registered
|
||||
// AsyncExecution object.
|
||||
std::map<int64, std::unique_ptr<AsyncExecution>> handle_to_execution_
|
||||
GUARDED_BY(execution_mutex_);
|
||||
TF_GUARDED_BY(execution_mutex_);
|
||||
|
||||
tensorflow::mutex execution_mutex_; // Guards the execution mapping.
|
||||
|
||||
|
@ -16,7 +16,6 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CHOLESKY_THUNK_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CHOLESKY_THUNK_H_
|
||||
|
||||
#include "absl/base/thread_annotations.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "tensorflow/compiler/xla/service/buffer_assignment.h"
|
||||
#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
|
||||
@ -29,6 +28,7 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
|
||||
#include "tensorflow/core/platform/thread_annotations.h"
|
||||
#include "tensorflow/stream_executor/blas.h"
|
||||
|
||||
namespace xla {
|
||||
@ -66,7 +66,8 @@ class CholeskyThunk : public Thunk {
|
||||
const int64 n_;
|
||||
|
||||
tensorflow::mutex mu_;
|
||||
absl::flat_hash_map<se::Stream*, CusolverContext> contexts_ GUARDED_BY(mu_);
|
||||
absl::flat_hash_map<se::Stream*, CusolverContext> contexts_
|
||||
TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
@ -127,12 +127,12 @@ class Rendezvous {
|
||||
std::make_shared<BlockingCounter>(key_.num_participants)};
|
||||
|
||||
tensorflow::mutex mu_;
|
||||
bool initialized_ GUARDED_BY(mu_) = false;
|
||||
bool initialized_ TF_GUARDED_BY(mu_) = false;
|
||||
|
||||
// We use an std::map so that we can iterate over it below in a guaranteed
|
||||
// order. The order shouldn't actually matter, but why be nondeterministic if
|
||||
// we don't have to be?
|
||||
std::map<int64, ParticipantData> participants_ GUARDED_BY(mu_);
|
||||
std::map<int64, ParticipantData> participants_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
void EnqueueCopy(se::DeviceMemoryBase src, se::Stream* src_stream,
|
||||
|
@ -45,11 +45,11 @@ using GemmCacheKey =
|
||||
std::tuple<se::StreamExecutor*, Shape, Shape, Shape, std::string>;
|
||||
|
||||
static tensorflow::mutex autotune_cache_mu(tensorflow::LINKER_INITIALIZED);
|
||||
static auto& autotune_cache GUARDED_BY(autotune_cache_mu) =
|
||||
static auto& autotune_cache TF_GUARDED_BY(autotune_cache_mu) =
|
||||
*new absl::flat_hash_map<GemmCacheKey,
|
||||
absl::optional<se::blas::AlgorithmType>>();
|
||||
static int64 cache_hits GUARDED_BY(autotune_cache_mu) = 0;
|
||||
static int64 cache_misses GUARDED_BY(autotune_cache_mu) = 0;
|
||||
static int64 cache_hits TF_GUARDED_BY(autotune_cache_mu) = 0;
|
||||
static int64 cache_misses TF_GUARDED_BY(autotune_cache_mu) = 0;
|
||||
|
||||
// Experimentally tries to pick the best algorithm for the given gemm.
|
||||
//
|
||||
|
@ -268,9 +268,9 @@ ConvCacheKey AutotuneCacheKeyfromInstruction(
|
||||
}
|
||||
|
||||
tensorflow::mutex autotune_cache_lock(tensorflow::LINKER_INITIALIZED);
|
||||
auto& autotune_cache GUARDED_BY(autotune_cache_lock) =
|
||||
auto& autotune_cache TF_GUARDED_BY(autotune_cache_lock) =
|
||||
*new absl::flat_hash_map<ConvCacheKey, AutotuneResult>();
|
||||
auto& autotune_cache_stats GUARDED_BY(autotune_cache_lock) =
|
||||
auto& autotune_cache_stats TF_GUARDED_BY(autotune_cache_lock) =
|
||||
*new ConvCacheStats();
|
||||
} // anonymous namespace
|
||||
|
||||
|
@ -126,17 +126,17 @@ class GpuDebugInfoManager {
|
||||
};
|
||||
|
||||
tensorflow::mutex mutex_;
|
||||
bool tracing_active_ GUARDED_BY(mutex_) = false;
|
||||
bool tracing_active_ TF_GUARDED_BY(mutex_) = false;
|
||||
// Modules that was running currently. Because multiple instances of the
|
||||
// modules can be running in the same time, a reference count is maintained
|
||||
// as map value.
|
||||
absl::flat_hash_map<ModuleIdentifier, int> running_module_ids_
|
||||
GUARDED_BY(mutex_);
|
||||
TF_GUARDED_BY(mutex_);
|
||||
// Active modules are those still tracked by us. There could be much more
|
||||
// active modules than running modules, we will try to reduce the trace size
|
||||
// by only transfer those modules that were running during tracing period.
|
||||
absl::flat_hash_map<ModuleIdentifier, GpuModuleEntry> active_modules_
|
||||
GUARDED_BY(mutex_);
|
||||
TF_GUARDED_BY(mutex_);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
@ -159,9 +159,9 @@ class GpuExecutable : public Executable {
|
||||
// `ResolveConstantGlobals`.
|
||||
tensorflow::mutex module_handle_mutex_;
|
||||
std::map<stream_executor::StreamExecutor*, se::ScopedModuleHandle>
|
||||
module_handles_ GUARDED_BY(module_handle_mutex_);
|
||||
module_handles_ TF_GUARDED_BY(module_handle_mutex_);
|
||||
std::map<stream_executor::StreamExecutor*, BufferAllocToDeviceMemoryMap>
|
||||
module_globals_ GUARDED_BY(module_handle_mutex_);
|
||||
module_globals_ TF_GUARDED_BY(module_handle_mutex_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(GpuExecutable);
|
||||
};
|
||||
|
@ -84,7 +84,7 @@ class KernelThunk : public Thunk {
|
||||
// Loaded kernels for each `StreamExecutor`. Requires pointer stability of
|
||||
// values.
|
||||
std::unordered_map<se::StreamExecutor*, std::unique_ptr<se::KernelBase>>
|
||||
kernel_cache_ GUARDED_BY(mutex_);
|
||||
kernel_cache_ TF_GUARDED_BY(mutex_);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
@ -492,7 +492,7 @@ void RendezvousNcclAllReduce::CleanupImpl(std::shared_ptr<NcclClique> handle,
|
||||
// lives, which is how we avoid expensive reinitialization of NCCL cliques.
|
||||
struct NcclAllReduceThunk::AuxData {
|
||||
tensorflow::mutex mu;
|
||||
absl::flat_hash_set<std::shared_ptr<NcclClique>> cliques GUARDED_BY(mu);
|
||||
absl::flat_hash_set<std::shared_ptr<NcclClique>> cliques TF_GUARDED_BY(mu);
|
||||
};
|
||||
|
||||
/*static*/ bool NcclAllReduceThunk::CanImplement(const HloInstruction* crs) {
|
||||
|
@ -62,8 +62,8 @@ class NVPTXCompiler : public GpuCompiler {
|
||||
// We cache the cuda_data_dir() and the result of our search, so that if the
|
||||
// next module we have to compile has the same cuda_data_dir(), we can skip
|
||||
// the search.
|
||||
string cached_cuda_data_dir_ GUARDED_BY(mutex_);
|
||||
string cached_libdevice_dir_ GUARDED_BY(mutex_);
|
||||
string cached_cuda_data_dir_ TF_GUARDED_BY(mutex_);
|
||||
string cached_libdevice_dir_ TF_GUARDED_BY(mutex_);
|
||||
|
||||
// Tries to compile the given ptx string to cubin. Returns a vector with the
|
||||
// compiled cubin. If compilation was unsuccessful, returns an empty vector.
|
||||
@ -116,7 +116,7 @@ class NVPTXCompiler : public GpuCompiler {
|
||||
// is critical here.
|
||||
absl::node_hash_map<CompilationCacheKey, CompilationCacheValue,
|
||||
CompilationCacheHash, CompilationCacheEq>
|
||||
compilation_cache_ GUARDED_BY(mutex_);
|
||||
compilation_cache_ TF_GUARDED_BY(mutex_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(NVPTXCompiler);
|
||||
};
|
||||
|
@ -1557,7 +1557,7 @@ string WrapDotInHtml(absl::string_view dot) {
|
||||
|
||||
tensorflow::mutex url_renderer_mu(tensorflow::LINKER_INITIALIZED);
|
||||
std::function<StatusOr<string>(absl::string_view)>* url_renderer
|
||||
GUARDED_BY(url_renderer_mu) = nullptr;
|
||||
TF_GUARDED_BY(url_renderer_mu) = nullptr;
|
||||
|
||||
// Precondition: url_renderer != nullptr.
|
||||
//
|
||||
@ -1567,7 +1567,7 @@ std::function<StatusOr<string>(absl::string_view)>* url_renderer
|
||||
// of producing dot for the graph.)
|
||||
StatusOr<string> WrapDotInFormat(absl::string_view dot,
|
||||
RenderedGraphFormat format)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(url_renderer_mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(url_renderer_mu) {
|
||||
switch (format) {
|
||||
case RenderedGraphFormat::kUrl:
|
||||
CHECK(url_renderer != nullptr)
|
||||
|
@ -52,13 +52,13 @@ class InterpreterExecutable : public Executable {
|
||||
const ServiceExecutableRunOptions* run_options,
|
||||
std::vector<ExecutionInput> arguments,
|
||||
HloExecutionProfile* hlo_execution_profile) override
|
||||
LOCKS_EXCLUDED(evaluator_lock_);
|
||||
TF_LOCKS_EXCLUDED(evaluator_lock_);
|
||||
|
||||
static int64 ShapeSizeBytes(const Shape& shape);
|
||||
|
||||
protected:
|
||||
// The interpreter interprets executables with an HloEvaluator.
|
||||
std::unique_ptr<HloEvaluator> evaluator_ PT_GUARDED_BY(evaluator_lock_);
|
||||
std::unique_ptr<HloEvaluator> evaluator_ TF_PT_GUARDED_BY(evaluator_lock_);
|
||||
mutable tensorflow::mutex evaluator_lock_;
|
||||
|
||||
private:
|
||||
|
@ -56,7 +56,7 @@ class StreamPool {
|
||||
void ReturnStream(se::Stream* stream);
|
||||
|
||||
tensorflow::mutex mu_;
|
||||
std::vector<std::unique_ptr<se::Stream>> streams_ GUARDED_BY(mu_);
|
||||
std::vector<std::unique_ptr<se::Stream>> streams_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace xla
|
||||
|
@ -66,12 +66,12 @@ class TestAllocator : public se::StreamExecutorMemoryAllocator {
|
||||
mutable tensorflow::mutex count_mutex_;
|
||||
|
||||
// Global counts of allocations and deallocations.
|
||||
int64 allocation_count_ GUARDED_BY(count_mutex_) = 0;
|
||||
int64 deallocation_count_ GUARDED_BY(count_mutex_) = 0;
|
||||
int64 allocation_count_ TF_GUARDED_BY(count_mutex_) = 0;
|
||||
int64 deallocation_count_ TF_GUARDED_BY(count_mutex_) = 0;
|
||||
|
||||
// Per-device counts of allocations and deallocations.
|
||||
std::map<int, int64> device_allocation_count_ GUARDED_BY(count_mutex_);
|
||||
std::map<int, int64> device_deallocation_count_ GUARDED_BY(count_mutex_);
|
||||
std::map<int, int64> device_allocation_count_ TF_GUARDED_BY(count_mutex_);
|
||||
std::map<int, int64> device_deallocation_count_ TF_GUARDED_BY(count_mutex_);
|
||||
};
|
||||
|
||||
// A base class for tests which exercise the LocalClient interface.
|
||||
|
@ -173,11 +173,11 @@ class XRTCompilationCache : public ResourceBase {
|
||||
// last reference to entry is released, entry is removed from cache_.
|
||||
void DiscardEntryRef(CompiledSubgraph* entry);
|
||||
void DiscardEntryRefLocked(CompiledSubgraph* entry)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// Marks the oldest unmarked entry for eviction. Requires that there is at
|
||||
// least one such entry.
|
||||
void MarkOldestEntryForEviction() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
void MarkOldestEntryForEviction() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// Updates datastructures to indicate that entry, which had been marked for
|
||||
// eviction, has been looked up. This is called by CompileIfKeyAbsent when an
|
||||
@ -195,7 +195,7 @@ class XRTCompilationCache : public ResourceBase {
|
||||
// is never marked for eviction, so an entry larger than the max cache entries
|
||||
// will remain in the cache until it is replaced by something else.
|
||||
void LookupEntryMarkedForEviction(CompiledSubgraph* entry)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// Creates a new entry by running initialize_program and places it in the
|
||||
// cache to be looked up by key. The new entry is in the 'marked for eviction'
|
||||
@ -206,7 +206,7 @@ class XRTCompilationCache : public ResourceBase {
|
||||
CompiledSubgraph* InitializeEntry(
|
||||
const string& key,
|
||||
const std::function<Status(std::unique_ptr<xla::LocalExecutable>*)>&
|
||||
initialize_program) EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
initialize_program) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// The maximum number of entries that are stored in the cache before entries
|
||||
// are marked for eviction.
|
||||
@ -214,23 +214,24 @@ class XRTCompilationCache : public ResourceBase {
|
||||
|
||||
mutable absl::Mutex mu_;
|
||||
// The total number of entries that are stored and not marked for eviction.
|
||||
int cache_entries_ GUARDED_BY(mu_) = 0;
|
||||
int cache_entries_ TF_GUARDED_BY(mu_) = 0;
|
||||
// The total number of entries that are marked for eviction.
|
||||
int marked_for_eviction_entries_ GUARDED_BY(mu_) = 0;
|
||||
int marked_for_eviction_entries_ TF_GUARDED_BY(mu_) = 0;
|
||||
// The value to assign to the last_use field of the next entry that is looked
|
||||
// up.
|
||||
int64 use_counter_ GUARDED_BY(mu_) = 0;
|
||||
int64 use_counter_ TF_GUARDED_BY(mu_) = 0;
|
||||
// All the executables that can be looked up in the cache index by key. An
|
||||
// entry is marked for eviction iff it is present in cache_ and not in
|
||||
// entries_by_last_use_.
|
||||
std::unordered_map<string, CompiledSubgraph*> cache_ GUARDED_BY(mu_);
|
||||
std::unordered_map<string, CompiledSubgraph*> cache_ TF_GUARDED_BY(mu_);
|
||||
// All the executable entries that can be looked up in the cache indexed by
|
||||
// uid.
|
||||
std::unordered_map<int64, CompiledSubgraph*> entries_by_uid_ GUARDED_BY(mu_);
|
||||
std::unordered_map<int64, CompiledSubgraph*> entries_by_uid_
|
||||
TF_GUARDED_BY(mu_);
|
||||
// Map from last_use to entry, used to mark entries for eviction in LRU
|
||||
// order. If an entry's last_use counter is not present as a key in
|
||||
// entries_by_last_use_ then the entry has been marked for eviction.
|
||||
std::map<int64, CompiledSubgraph*> entries_by_last_use_ GUARDED_BY(mu_);
|
||||
std::map<int64, CompiledSubgraph*> entries_by_last_use_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
// Looks up or create an XRTCompilationCache object within the given resource
|
||||
|
@ -165,14 +165,14 @@ class BaseCollectiveExecutor : public CollectiveExecutor {
|
||||
condition_variable launch_cv_;
|
||||
// collective instance key -> number of local devices for which NCCL ops have
|
||||
// been launched.
|
||||
std::unordered_map<int32, int32> launched_ GUARDED_BY(launch_mu_);
|
||||
std::unordered_map<int32, int32> launched_ TF_GUARDED_BY(launch_mu_);
|
||||
|
||||
private:
|
||||
Status CreateCollective(const CollectiveParams& col_params,
|
||||
CollectiveImplementationInterface** col_impl);
|
||||
// Check if all ops on which this collective depends on have launched.
|
||||
bool CheckDependencies(const CollectiveParams& col_params)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(launch_mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(launch_mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -269,7 +269,7 @@ size_t BFCAllocator::RoundedBytes(size_t bytes) {
|
||||
}
|
||||
|
||||
bool BFCAllocator::DeallocateFreeRegions(size_t rounded_bytes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||
// Do nothing if garbage collection is off.
|
||||
if (!garbage_collection_) {
|
||||
return false;
|
||||
@ -326,7 +326,7 @@ bool BFCAllocator::DeallocateFreeRegions(size_t rounded_bytes)
|
||||
|
||||
void BFCAllocator::DeallocateRegions(
|
||||
const absl::flat_hash_set<void*>& region_ptrs)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||
// Explicitly remove the const qualifier as some compilers disallow passing
|
||||
// const_iterator to std::vector::erase(), which is used in
|
||||
// RemoveAllocationRegion().
|
||||
@ -450,7 +450,7 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
|
||||
#endif
|
||||
|
||||
tensorflow::profiler::TraceMe trace_me(
|
||||
[&]() EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||
[&]() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||
AllocatorStats stats = stats_;
|
||||
int64 bytes_available =
|
||||
memory_limit_ - stats.bytes_reserved - stats.bytes_in_use;
|
||||
|
@ -113,12 +113,12 @@ class BFCAllocator : public Allocator {
|
||||
// constituents so they're only useful for allocations not requiring a
|
||||
// particular timestamp.
|
||||
bool MergeTimestampedChunks(size_t required_bytes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Add TraceMe (in memory allocation and deallocation) for memory stats
|
||||
// profiling. The requested_bytes can be negative if it's a deallocation.
|
||||
void AddTraceMe(absl::string_view traceme_name, int64 requested_bytes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// A ChunkHandle is an index into the chunks_ vector in BFCAllocator
|
||||
// kInvalidChunkHandle means an invalid chunk
|
||||
@ -186,7 +186,7 @@ class BFCAllocator : public Allocator {
|
||||
#endif
|
||||
|
||||
string DebugString(BFCAllocator* a,
|
||||
bool recurse) NO_THREAD_SAFETY_ANALYSIS {
|
||||
bool recurse) TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
string dbg;
|
||||
strings::StrAppend(
|
||||
&dbg, " Size: ", strings::HumanReadableNumBytes(size),
|
||||
@ -221,7 +221,7 @@ class BFCAllocator : public Allocator {
|
||||
: allocator_(allocator) {}
|
||||
// Sort first by size and then use pointer address as a tie breaker.
|
||||
bool operator()(const ChunkHandle ha,
|
||||
const ChunkHandle hb) const NO_THREAD_SAFETY_ANALYSIS {
|
||||
const ChunkHandle hb) const TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
const Chunk* a = allocator_->ChunkFromHandle(ha);
|
||||
const Chunk* b = allocator_->ChunkFromHandle(hb);
|
||||
if (a->size != b->size) {
|
||||
@ -382,7 +382,7 @@ class BFCAllocator : public Allocator {
|
||||
// 'rounded_bytes' bytes. Returns true on success and false on
|
||||
// failure.
|
||||
bool Extend(size_t alignment, size_t rounded_bytes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Deallocate free regions to give back the memory to suballocator, so that
|
||||
// we can re-allocate a larger region. The main use scenario of this function
|
||||
@ -394,58 +394,58 @@ class BFCAllocator : public Allocator {
|
||||
|
||||
// Helper function to deallocate regions.
|
||||
void DeallocateRegions(const absl::flat_hash_set<void*>& region_ptrs)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Returns a pointer to an underlying allocated chunk of size
|
||||
// 'rounded_bytes'.
|
||||
void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes,
|
||||
uint64 freed_before) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
uint64 freed_before) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Splits the chunk specified by 'h' into two chunks, one at least
|
||||
// of size 'num_bytes'.
|
||||
void SplitChunk(ChunkHandle h, size_t num_bytes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Merges the two chunk handles. Requires that the chunks are
|
||||
// contiguous in their allocation.
|
||||
void Merge(ChunkHandle h, ChunkHandle h2) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void Merge(ChunkHandle h, ChunkHandle h2) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Adds the chunk 'h' to the proper free bin.
|
||||
void InsertFreeChunkIntoBin(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void InsertFreeChunkIntoBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Removes the free chunk pointed to by 'c' from the set free_chunks.
|
||||
void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks,
|
||||
const Bin::FreeChunkSet::iterator& c)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Removes a free chunk from the bin.
|
||||
void RemoveFreeChunkFromBin(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void RemoveFreeChunkFromBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void MaybeRemoveFreeChunkFromBin(ChunkHandle h)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Removes the chunk metadata represented by 'h'.
|
||||
void DeleteChunk(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void DeleteChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
string RenderOccupancy() EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void DumpMemoryLog(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
MemoryDump RecordMemoryMapInternal() EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void MaybeWriteMemoryMap() EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
string RenderOccupancy() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void DumpMemoryLog(size_t num_bytes) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
MemoryDump RecordMemoryMapInternal() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void MaybeWriteMemoryMap() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
ChunkHandle AllocateChunk() EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void DeallocateChunk(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
ChunkHandle AllocateChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void DeallocateChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
Chunk* ChunkFromHandle(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
Chunk* ChunkFromHandle(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
const Chunk* ChunkFromHandle(ChunkHandle h) const
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
void MarkFree(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
void MarkFree(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
ChunkHandle TryToCoalesce(ChunkHandle h, bool ignore_freed_at)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Fragmentation is calculated as the reverse ratio of the largest free chunk
|
||||
// size over total free memory, and returns a value within [0, 1].
|
||||
double GetFragmentation() EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
double GetFragmentation() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Information about a Bin that is useful for debugging.
|
||||
struct BinDebugInfo {
|
||||
@ -458,7 +458,7 @@ class BFCAllocator : public Allocator {
|
||||
|
||||
// Computes and returns a BinDebugInfo for each Bin.
|
||||
std::array<BinDebugInfo, kNumBins> get_bin_debug_info()
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
AllocatorRetry retry_helper_;
|
||||
|
||||
@ -526,21 +526,21 @@ class BFCAllocator : public Allocator {
|
||||
|
||||
// Structures mutable after construction
|
||||
mutable mutex lock_;
|
||||
RegionManager region_manager_ GUARDED_BY(lock_);
|
||||
RegionManager region_manager_ TF_GUARDED_BY(lock_);
|
||||
|
||||
std::vector<Chunk> chunks_ GUARDED_BY(lock_);
|
||||
std::vector<Chunk> chunks_ TF_GUARDED_BY(lock_);
|
||||
|
||||
// Pointer to head of linked list of free Chunks
|
||||
ChunkHandle free_chunks_list_ GUARDED_BY(lock_);
|
||||
ChunkHandle free_chunks_list_ TF_GUARDED_BY(lock_);
|
||||
|
||||
// Counter containing the next unique identifier to assign to a
|
||||
// newly-created chunk.
|
||||
int64 next_allocation_id_ GUARDED_BY(lock_);
|
||||
int64 next_allocation_id_ TF_GUARDED_BY(lock_);
|
||||
|
||||
// Stats.
|
||||
AllocatorStats stats_ GUARDED_BY(lock_);
|
||||
AllocatorStats stats_ TF_GUARDED_BY(lock_);
|
||||
#ifdef TENSORFLOW_MEM_DEBUG
|
||||
int64 action_counter_ GUARDED_BY(lock_);
|
||||
int64 action_counter_ TF_GUARDED_BY(lock_);
|
||||
#define MEM_DEBUG_SIZE_HISTORY_SIZE 4096
|
||||
int64 size_history_[MEM_DEBUG_SIZE_HISTORY_SIZE];
|
||||
#endif
|
||||
|
@ -103,9 +103,9 @@ class BufRendezvous {
|
||||
const uint64 step_id_;
|
||||
const DeviceMgr* const dev_mgr_; // Not owned.
|
||||
mutex mu_;
|
||||
Status status_ GUARDED_BY(mu_);
|
||||
Status status_ TF_GUARDED_BY(mu_);
|
||||
typedef absl::flat_hash_map<string, Hook*> HookTable;
|
||||
HookTable hook_table_ GUARDED_BY(mu_);
|
||||
HookTable hook_table_ TF_GUARDED_BY(mu_);
|
||||
|
||||
void PurgeTable(const Status& s, HookTable* table);
|
||||
};
|
||||
|
@ -72,7 +72,8 @@ class CollectiveExecutorMgr : public CollectiveExecutorMgrInterface {
|
||||
private:
|
||||
mutex exec_mu_;
|
||||
// Map from step_id to CollectiveExecutor
|
||||
gtl::FlatMap<int64, CollectiveExecutor*> executor_table_ GUARDED_BY(exec_mu_);
|
||||
gtl::FlatMap<int64, CollectiveExecutor*> executor_table_
|
||||
TF_GUARDED_BY(exec_mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -507,7 +507,7 @@ void CollectiveParamResolverLocal::InitInstanceSharedParams(
|
||||
ir->shared.instance.task_names, // NOLINT
|
||||
attributes,
|
||||
[this, gr, cp, ir, attributes, done](const Status& s)
|
||||
EXCLUSIVE_LOCK_FUNCTION(ir->out_mu) {
|
||||
TF_EXCLUSIVE_LOCK_FUNCTION(ir->out_mu) {
|
||||
// Then we recover the lock in the callback thread that will hold it
|
||||
// through the rest of the call chain. Signal the cv now, any
|
||||
// waiting threads will wake only when out_mu is released later.
|
||||
@ -607,7 +607,7 @@ void CollectiveParamResolverLocal::FindInstanceRec(
|
||||
|
||||
void CollectiveParamResolverLocal::CallInitInstanceSharedParams(
|
||||
const GroupRec* gr, const CollectiveParams* cp, InstanceRec* ir,
|
||||
const InstanceRecCallback& done) NO_THREAD_SAFETY_ANALYSIS {
|
||||
const InstanceRecCallback& done) TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
// This function serves merely to make a function call that should
|
||||
// be thread/mutex safe but violates the simple model applied by
|
||||
// static analysis, so we turn off analysis only within this
|
||||
@ -630,7 +630,7 @@ void CollectiveParamResolverLocal::CallInitInstanceSharedParams(
|
||||
ir->known.resize(cp->group.group_size, false);
|
||||
InitInstanceSharedParams(
|
||||
gr, cp, ir,
|
||||
[this, ir, done](const Status& s) UNLOCK_FUNCTION(ir->out_mu) {
|
||||
[this, ir, done](const Status& s) TF_UNLOCK_FUNCTION(ir->out_mu) {
|
||||
DCHECK(ir->out_mu_available);
|
||||
ir->status.Update(s);
|
||||
ir->out_mu.unlock();
|
||||
|
@ -66,12 +66,12 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
struct GroupRec {
|
||||
CollGroupParams group;
|
||||
mutable mutex mu;
|
||||
Status status GUARDED_BY(mu);
|
||||
std::set<string> device_set GUARDED_BY(mu);
|
||||
std::vector<string> device_list GUARDED_BY(mu);
|
||||
std::set<string> task_set GUARDED_BY(mu);
|
||||
std::vector<string> task_list GUARDED_BY(mu);
|
||||
std::vector<StatusCallback> waiting GUARDED_BY(mu);
|
||||
Status status TF_GUARDED_BY(mu);
|
||||
std::set<string> device_set TF_GUARDED_BY(mu);
|
||||
std::vector<string> device_list TF_GUARDED_BY(mu);
|
||||
std::set<string> task_set TF_GUARDED_BY(mu);
|
||||
std::vector<string> task_list TF_GUARDED_BY(mu);
|
||||
std::vector<StatusCallback> waiting TF_GUARDED_BY(mu);
|
||||
};
|
||||
|
||||
// Finds the GroupRec that corresponds to cp->group_key.
|
||||
@ -84,7 +84,7 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
GroupRecCallback;
|
||||
void CompleteGroupLocal(const string& device, CollectiveParams* cp,
|
||||
const GroupRecCallback& done)
|
||||
LOCKS_EXCLUDED(group_mu_);
|
||||
TF_LOCKS_EXCLUDED(group_mu_);
|
||||
|
||||
// Used to complete/verify CollInstance.
|
||||
struct InstanceRec;
|
||||
@ -116,29 +116,29 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
// drop that lock, then take a lock on out_mu before
|
||||
// reading/modifying its values.
|
||||
mutex in_mu;
|
||||
bool is_init GUARDED_BY(in_mu);
|
||||
std::vector<IRConsumer> init_waiters GUARDED_BY(in_mu);
|
||||
bool is_init TF_GUARDED_BY(in_mu);
|
||||
std::vector<IRConsumer> init_waiters TF_GUARDED_BY(in_mu);
|
||||
|
||||
// A thread that wishes to acquire out_mu must ensure that it is available
|
||||
// by invoking WaitForOutMu().
|
||||
mutex out_mu;
|
||||
condition_variable out_cv;
|
||||
bool out_mu_available GUARDED_BY(out_mu);
|
||||
bool out_mu_available TF_GUARDED_BY(out_mu);
|
||||
// Values to be shared by all instances, constant after initialization.
|
||||
CollectiveParams shared GUARDED_BY(out_mu);
|
||||
CollectiveParams shared TF_GUARDED_BY(out_mu);
|
||||
// If an error occurs during initialization this structure stays in
|
||||
// the table with a non-OK status. Purging the table and restarting
|
||||
// needs to be done at a higher level.
|
||||
Status status GUARDED_BY(out_mu);
|
||||
Status status TF_GUARDED_BY(out_mu);
|
||||
|
||||
// These fields are used to count the instances that have called
|
||||
// in and become known while resolving broadcast source identity and
|
||||
// communicator key.
|
||||
int source_rank GUARDED_BY(out_mu);
|
||||
string communicator_key GUARDED_BY(out_mu);
|
||||
int known_count GUARDED_BY(out_mu);
|
||||
std::vector<bool> known GUARDED_BY(out_mu);
|
||||
std::vector<IRConsumer> known_waiters GUARDED_BY(out_mu);
|
||||
int source_rank TF_GUARDED_BY(out_mu);
|
||||
string communicator_key TF_GUARDED_BY(out_mu);
|
||||
int known_count TF_GUARDED_BY(out_mu);
|
||||
std::vector<bool> known TF_GUARDED_BY(out_mu);
|
||||
std::vector<IRConsumer> known_waiters TF_GUARDED_BY(out_mu);
|
||||
|
||||
InstanceRec()
|
||||
: is_init(false),
|
||||
@ -148,7 +148,7 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
|
||||
// If out_mu is unavailable during distributed device locality
|
||||
// initialization, wait on out_cv until it is available again.
|
||||
void WaitForOutMu(mutex_lock& lock) EXCLUSIVE_LOCKS_REQUIRED(out_mu);
|
||||
void WaitForOutMu(mutex_lock& lock) TF_EXCLUSIVE_LOCKS_REQUIRED(out_mu);
|
||||
};
|
||||
|
||||
// Find the InstanceRec with the same instance_key as cp. If it doesn't
|
||||
@ -162,7 +162,7 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
InstanceRecCallback;
|
||||
void FindInstanceRec(const GroupRec* gr, CollectiveParams* cp,
|
||||
const InstanceRecCallback& done)
|
||||
LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
TF_LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
|
||||
// Populate *ir with device membership from gr, then initialize to be specific
|
||||
// to cp->instance_key, i.e. order the devices and tasks.
|
||||
@ -171,26 +171,26 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
// cp is populated with all DeviceLocalities
|
||||
void InitInstanceSharedParams(const GroupRec* gr, const CollectiveParams* cp,
|
||||
InstanceRec* ir, const StatusCallback& done)
|
||||
UNLOCK_FUNCTION(ir->out_mu) LOCKS_EXCLUDED(gr->mu);
|
||||
TF_UNLOCK_FUNCTION(ir->out_mu) TF_LOCKS_EXCLUDED(gr->mu);
|
||||
|
||||
void CallInitInstanceSharedParams(const GroupRec* gr,
|
||||
const CollectiveParams* cp, InstanceRec* ir,
|
||||
const InstanceRecCallback& done)
|
||||
LOCKS_EXCLUDED(ir->out_mu, gr->mu);
|
||||
TF_LOCKS_EXCLUDED(ir->out_mu, gr->mu);
|
||||
|
||||
// Establishes the final order of ir->shared.instance.device_names and
|
||||
// ir->shared.instance.task_names by considering localities of all devices.
|
||||
void CompleteDefaultRanking(const GroupRec* gr, const CollectiveParams* cp,
|
||||
InstanceRec* ir,
|
||||
const std::vector<DeviceAttributes>& attributes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(ir->out_mu);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(ir->out_mu);
|
||||
|
||||
// Finish populating *cp.
|
||||
// Precondition: *gr has been fully populated by CompleteGroupLocal.
|
||||
void CompleteInstanceLocal(const string& device, const GroupRec* gr,
|
||||
CollectiveParams* cp, bool is_source,
|
||||
const StatusCallback& done)
|
||||
LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
TF_LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
|
||||
// Finish populating *cp from fully initialized *ir.
|
||||
// Precondition: *gr and *ir are fully populated.
|
||||
@ -199,12 +199,12 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
CollectiveParams* cp,
|
||||
InstanceRec* ir, bool is_source,
|
||||
const StatusCallback& done)
|
||||
LOCKS_EXCLUDED(ir->out_mu);
|
||||
TF_LOCKS_EXCLUDED(ir->out_mu);
|
||||
|
||||
// Complete instance params after waiting for group.
|
||||
// Precondition: *cp has complete group data and default_rank.
|
||||
void WaitForGroup(InstanceRec* ir, CollectiveParams* cp, bool is_source,
|
||||
const IRConsumer& f) LOCKS_EXCLUDED(ir->out_mu);
|
||||
const IRConsumer& f) TF_LOCKS_EXCLUDED(ir->out_mu);
|
||||
|
||||
// If cp.device_names contains only devices local to this process
|
||||
// populates *localities, else returns an error.
|
||||
@ -225,7 +225,7 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
|
||||
// Helper to grab status under lock, invoke callback out of lock.
|
||||
void CallbackWithStatus(const InstanceRecCallback& done, InstanceRec* irec)
|
||||
LOCKS_EXCLUDED(irec->out_mu);
|
||||
TF_LOCKS_EXCLUDED(irec->out_mu);
|
||||
|
||||
const bool nccl_;
|
||||
const DeviceMgr* dev_mgr_;
|
||||
@ -233,10 +233,10 @@ class CollectiveParamResolverLocal : public ParamResolverInterface {
|
||||
string task_name_;
|
||||
mutex group_mu_;
|
||||
gtl::FlatMap<int32, std::unique_ptr<GroupRec>> group_table_
|
||||
GUARDED_BY(group_mu_);
|
||||
TF_GUARDED_BY(group_mu_);
|
||||
mutex instance_mu_;
|
||||
gtl::FlatMap<int32, std::unique_ptr<InstanceRec>> instance_table_
|
||||
GUARDED_BY(instance_mu_);
|
||||
TF_GUARDED_BY(instance_mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -47,7 +47,7 @@ class CostModelManager {
|
||||
|
||||
private:
|
||||
mutex mu_;
|
||||
CostModelMap cost_models_ GUARDED_BY(mu_);
|
||||
CostModelMap cost_models_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -138,13 +138,14 @@ class DynamicDeviceMgr : public DeviceMgr {
|
||||
mutable mutex devices_mu_;
|
||||
|
||||
std::unordered_map<Device*, std::unique_ptr<Device>> dynamic_devices_
|
||||
GUARDED_BY(devices_mu_);
|
||||
TF_GUARDED_BY(devices_mu_);
|
||||
|
||||
std::unordered_map<string, Device*> device_map_ GUARDED_BY(devices_mu_);
|
||||
std::unordered_map<string, Device*> device_map_ TF_GUARDED_BY(devices_mu_);
|
||||
|
||||
std::unordered_map<string, int> device_type_counts_ GUARDED_BY(devices_mu_);
|
||||
std::unordered_map<string, int> device_type_counts_
|
||||
TF_GUARDED_BY(devices_mu_);
|
||||
|
||||
mutable Device* cpu_device_ GUARDED_BY(devices_mu_);
|
||||
mutable Device* cpu_device_ TF_GUARDED_BY(devices_mu_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(DynamicDeviceMgr);
|
||||
};
|
||||
|
@ -38,7 +38,7 @@ class DeviceSet {
|
||||
~DeviceSet();
|
||||
|
||||
// Does not take ownership of 'device'.
|
||||
void AddDevice(Device* device) LOCKS_EXCLUDED(devices_mu_);
|
||||
void AddDevice(Device* device) TF_LOCKS_EXCLUDED(devices_mu_);
|
||||
|
||||
// Set the device designated as the "client". This device
|
||||
// must also be registered via AddDevice().
|
||||
@ -70,7 +70,7 @@ class DeviceSet {
|
||||
// Return the prioritized list of devices in this set.
|
||||
// Devices are prioritized first by `DeviceTypeOrder`, then by name.
|
||||
const PrioritizedDeviceVector& prioritized_devices() const
|
||||
LOCKS_EXCLUDED(devices_mu_);
|
||||
TF_LOCKS_EXCLUDED(devices_mu_);
|
||||
|
||||
// Return the prioritized list of unique device types in this set.
|
||||
//
|
||||
@ -78,7 +78,7 @@ class DeviceSet {
|
||||
// element in the list's `std::pair<DeviceType, int32>`) will be initialized
|
||||
// to the value of `DeviceTypeOrder` for the device types.
|
||||
const PrioritizedDeviceTypeVector& prioritized_device_types() const
|
||||
LOCKS_EXCLUDED(devices_mu_);
|
||||
TF_LOCKS_EXCLUDED(devices_mu_);
|
||||
|
||||
// An order to sort by device types according to system-determined
|
||||
// priority.
|
||||
@ -112,12 +112,13 @@ class DeviceSet {
|
||||
|
||||
// Cached prioritized vector, created on-the-fly when
|
||||
// prioritized_devices() is called.
|
||||
mutable PrioritizedDeviceVector prioritized_devices_ GUARDED_BY(devices_mu_);
|
||||
mutable PrioritizedDeviceVector prioritized_devices_
|
||||
TF_GUARDED_BY(devices_mu_);
|
||||
|
||||
// Cached prioritized vector, created on-the-fly when
|
||||
// prioritized_device_types() is called.
|
||||
mutable PrioritizedDeviceTypeVector prioritized_device_types_
|
||||
GUARDED_BY(devices_mu_);
|
||||
TF_GUARDED_BY(devices_mu_);
|
||||
|
||||
// Fullname -> device* for device in devices_.
|
||||
std::unordered_map<string, Device*> device_by_name_;
|
||||
|
@ -236,8 +236,9 @@ class DirectSessionFactory : public SessionFactory {
|
||||
}
|
||||
|
||||
mutex sessions_lock_;
|
||||
std::vector<DirectSession*> sessions_ GUARDED_BY(sessions_lock_);
|
||||
absl::flat_hash_set<string> session_metadata_keys_ GUARDED_BY(sessions_lock_);
|
||||
std::vector<DirectSession*> sessions_ TF_GUARDED_BY(sessions_lock_);
|
||||
absl::flat_hash_set<string> session_metadata_keys_
|
||||
TF_GUARDED_BY(sessions_lock_);
|
||||
};
|
||||
|
||||
class DirectSessionRegistrar {
|
||||
|
@ -198,7 +198,7 @@ class DirectSession : public Session {
|
||||
// 'status' is the current status of the execution.
|
||||
struct RunState {
|
||||
mutex mu;
|
||||
Status status GUARDED_BY(mu);
|
||||
Status status TF_GUARDED_BY(mu);
|
||||
std::unique_ptr<CollectiveExecutor::Handle> collective_executor;
|
||||
std::unique_ptr<StepStatsCollector> collector;
|
||||
TensorStore tensor_store;
|
||||
@ -275,7 +275,7 @@ class DirectSession : public Session {
|
||||
bool ShouldUseRunHandlerPool(const RunOptions& run_options) const;
|
||||
|
||||
::tensorflow::Status ExtendLocked(GraphDef graph)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
|
||||
|
||||
::tensorflow::Status ResourceHandleToInputTensor(
|
||||
const Tensor& resource_tensor, Tensor* retrieved_tensor);
|
||||
@ -343,8 +343,8 @@ class DirectSession : public Session {
|
||||
// Unique session identifier.
|
||||
string session_handle_;
|
||||
mutex graph_state_lock_;
|
||||
bool graph_created_ GUARDED_BY(graph_state_lock_) = false;
|
||||
bool finalized_ GUARDED_BY(graph_state_lock_) = false;
|
||||
bool graph_created_ TF_GUARDED_BY(graph_state_lock_) = false;
|
||||
bool finalized_ TF_GUARDED_BY(graph_state_lock_) = false;
|
||||
|
||||
// The thread-pools to use for running ops, with a bool indicating if the pool
|
||||
// is owned.
|
||||
@ -356,7 +356,7 @@ class DirectSession : public Session {
|
||||
bool sync_on_finish_ = true;
|
||||
|
||||
std::vector<std::unique_ptr<FunctionInfo>> functions_
|
||||
GUARDED_BY(executor_lock_);
|
||||
TF_GUARDED_BY(executor_lock_);
|
||||
|
||||
mutex executor_lock_; // protects executors_
|
||||
// Holds mappings from signature to the executors that process
|
||||
@ -365,7 +365,7 @@ class DirectSession : public Session {
|
||||
// The map value is a shared_ptr since multiple map keys can point to the
|
||||
// same ExecutorsAndKey object.
|
||||
std::unordered_map<string, std::shared_ptr<ExecutorsAndKeys>> executors_
|
||||
GUARDED_BY(executor_lock_);
|
||||
TF_GUARDED_BY(executor_lock_);
|
||||
|
||||
class RunCallableCallFrame;
|
||||
struct Callable {
|
||||
@ -374,12 +374,12 @@ class DirectSession : public Session {
|
||||
~Callable();
|
||||
};
|
||||
mutex callables_lock_;
|
||||
int64 next_callable_handle_ GUARDED_BY(callables_lock_) = 0;
|
||||
std::unordered_map<int64, Callable> callables_ GUARDED_BY(callables_lock_);
|
||||
int64 next_callable_handle_ TF_GUARDED_BY(callables_lock_) = 0;
|
||||
std::unordered_map<int64, Callable> callables_ TF_GUARDED_BY(callables_lock_);
|
||||
|
||||
// Holds mappings from handle to partial run state.
|
||||
std::unordered_map<string, std::unique_ptr<PartialRunState>> partial_runs_
|
||||
GUARDED_BY(executor_lock_);
|
||||
TF_GUARDED_BY(executor_lock_);
|
||||
|
||||
// This holds all the tensors that are currently alive in the session.
|
||||
SessionState session_state_;
|
||||
@ -393,11 +393,11 @@ class DirectSession : public Session {
|
||||
// nodes can not be moved to a different device. Maps node names to
|
||||
// device names.
|
||||
std::unordered_map<string, string> stateful_placements_
|
||||
GUARDED_BY(graph_state_lock_);
|
||||
TF_GUARDED_BY(graph_state_lock_);
|
||||
|
||||
// Execution_state; used when placing the entire graph.
|
||||
std::unique_ptr<GraphExecutionState> execution_state_
|
||||
GUARDED_BY(graph_state_lock_);
|
||||
TF_GUARDED_BY(graph_state_lock_);
|
||||
|
||||
// The function library, before any rewrites or optimizations have been
|
||||
// performed. In particular, CreateGraphs() may need to modify the function
|
||||
@ -406,7 +406,7 @@ class DirectSession : public Session {
|
||||
|
||||
// true if the Session has been Closed.
|
||||
mutex closed_lock_;
|
||||
bool closed_ GUARDED_BY(closed_lock_) = false;
|
||||
bool closed_ TF_GUARDED_BY(closed_lock_) = false;
|
||||
|
||||
// For generating unique names for this session instance.
|
||||
std::atomic<int64> edge_name_counter_ = {0};
|
||||
@ -423,7 +423,7 @@ class DirectSession : public Session {
|
||||
|
||||
// For testing collective graph key generation.
|
||||
mutex collective_graph_key_lock_;
|
||||
int64 collective_graph_key_ GUARDED_BY(collective_graph_key_lock_) = -1;
|
||||
int64 collective_graph_key_ TF_GUARDED_BY(collective_graph_key_lock_) = -1;
|
||||
|
||||
// Run in caller's thread if RunOptions.inter_op_thread_pool is negative or
|
||||
// all of following conditions are met:
|
||||
|
@ -293,11 +293,11 @@ class EagerContext : public core::RefCounted {
|
||||
}
|
||||
|
||||
// TODO(apassos) clean up RunMetadata storage.
|
||||
mutex* MetadataMu() LOCK_RETURNED(metadata_mu_) { return &metadata_mu_; }
|
||||
bool ShouldStoreGraphs() LOCKS_EXCLUDED(metadata_mu_);
|
||||
mutex* MetadataMu() TF_LOCK_RETURNED(metadata_mu_) { return &metadata_mu_; }
|
||||
bool ShouldStoreGraphs() TF_LOCKS_EXCLUDED(metadata_mu_);
|
||||
void SetShouldStoreGraphs(bool value);
|
||||
RunMetadata* RunMetadataProto() { return &run_metadata_; }
|
||||
void ClearRunMetadata() EXCLUSIVE_LOCKS_REQUIRED(metadata_mu_);
|
||||
void ClearRunMetadata() TF_EXCLUSIVE_LOCKS_REQUIRED(metadata_mu_);
|
||||
|
||||
void ListDevices(std::vector<tensorflow::DeviceAttributes>* devices);
|
||||
|
||||
@ -511,9 +511,9 @@ class EagerContext : public core::RefCounted {
|
||||
// thread-local-object-local variable in C++11.
|
||||
mutable mutex policy_map_mu_;
|
||||
std::unordered_map<std::thread::id, ContextDevicePlacementPolicy>
|
||||
device_placement_policy_ GUARDED_BY(policy_map_mu_);
|
||||
device_placement_policy_ TF_GUARDED_BY(policy_map_mu_);
|
||||
std::unordered_map<std::thread::id, ContextMirroringPolicy> mirroring_policy_
|
||||
GUARDED_BY(policy_map_mu_);
|
||||
TF_GUARDED_BY(policy_map_mu_);
|
||||
|
||||
OwnedOrUnownedHelper<const DeviceMgr> local_device_manager_;
|
||||
|
||||
@ -552,14 +552,14 @@ class EagerContext : public core::RefCounted {
|
||||
};
|
||||
std::unordered_map<Fprint128, core::RefCountPtr<KernelAndDevice>,
|
||||
Fprint128Hasher>
|
||||
kernel_cache_ GUARDED_BY(cache_mu_);
|
||||
kernel_cache_ TF_GUARDED_BY(cache_mu_);
|
||||
std::unordered_map<string, RegisteredFunction*> registered_functions_
|
||||
GUARDED_BY(cache_mu_);
|
||||
TF_GUARDED_BY(cache_mu_);
|
||||
|
||||
// Whether we should compute RunMetadata.
|
||||
std::atomic<bool> should_store_graphs_{false};
|
||||
mutex metadata_mu_;
|
||||
RunMetadata run_metadata_ GUARDED_BY(metadata_mu_);
|
||||
RunMetadata run_metadata_ TF_GUARDED_BY(metadata_mu_);
|
||||
GraphCollector graph_collector_;
|
||||
// TODO(fishx): Allow update following two bool after context creation.
|
||||
const bool log_device_placement_;
|
||||
@ -567,13 +567,14 @@ class EagerContext : public core::RefCounted {
|
||||
|
||||
// Information related to step containers.
|
||||
std::atomic<int> num_active_steps_;
|
||||
std::unique_ptr<ScopedStepContainer> step_container_ GUARDED_BY(metadata_mu_);
|
||||
std::unique_ptr<ScopedStepContainer> step_container_
|
||||
TF_GUARDED_BY(metadata_mu_);
|
||||
|
||||
EagerExecutor default_executor_;
|
||||
mutable mutex executor_map_mu_;
|
||||
// Not owned.
|
||||
std::unordered_map<std::thread::id, EagerExecutor*> thread_local_executor_
|
||||
GUARDED_BY(executor_map_mu_);
|
||||
TF_GUARDED_BY(executor_map_mu_);
|
||||
|
||||
const bool log_memory_;
|
||||
|
||||
@ -607,28 +608,28 @@ class EagerContext : public core::RefCounted {
|
||||
|
||||
mutable mutex remote_state_mu_;
|
||||
|
||||
uint64 context_id_ GUARDED_BY(remote_state_mu_);
|
||||
uint64 context_id_ TF_GUARDED_BY(remote_state_mu_);
|
||||
// The view id of an eager context should be set to 0 when context is created,
|
||||
// and continuously incremented when context with the same context_id gets
|
||||
// updated. The view id should be consistent between master and workers.
|
||||
uint64 context_view_id_ GUARDED_BY(remote_state_mu_);
|
||||
uint64 context_view_id_ TF_GUARDED_BY(remote_state_mu_);
|
||||
std::vector<string> remote_contexts_;
|
||||
|
||||
int keep_alive_secs_ GUARDED_BY(remote_state_mu_);
|
||||
int keep_alive_secs_ TF_GUARDED_BY(remote_state_mu_);
|
||||
std::atomic<int> sleep_for_secs_;
|
||||
|
||||
std::unique_ptr<Thread> keep_alive_thread_;
|
||||
mutex keep_alive_thread_shutdown_mu_;
|
||||
condition_variable keep_alive_thread_cv_;
|
||||
bool shutting_down_ GUARDED_BY(keep_alive_thread_shutdown_mu_) = false;
|
||||
bool shutting_down_ TF_GUARDED_BY(keep_alive_thread_shutdown_mu_) = false;
|
||||
|
||||
std::unique_ptr<eager::RemoteMgr, std::function<void(eager::RemoteMgr*)>>
|
||||
remote_mgr_;
|
||||
bool is_master_ GUARDED_BY(remote_state_mu_);
|
||||
bool is_master_ TF_GUARDED_BY(remote_state_mu_);
|
||||
|
||||
// Maps from a remote worker to a list of parsed device filters.
|
||||
std::unordered_map<string, std::vector<DeviceNameUtils::ParsedName>>
|
||||
cluster_device_filters_ GUARDED_BY(remote_state_mu_);
|
||||
cluster_device_filters_ TF_GUARDED_BY(remote_state_mu_);
|
||||
|
||||
#endif // IS_MOBILE_PLATFORM
|
||||
|
||||
|
@ -151,7 +151,7 @@ class EagerExecutor {
|
||||
return status_;
|
||||
}
|
||||
|
||||
bool ok() const NO_THREAD_SAFETY_ANALYSIS { return ok_; }
|
||||
bool ok() const TF_NO_THREAD_SAFETY_ANALYSIS { return ok_; }
|
||||
|
||||
private:
|
||||
// Possible states for this executor.
|
||||
@ -183,11 +183,12 @@ class EagerExecutor {
|
||||
NodeState state;
|
||||
};
|
||||
|
||||
const char* StateStringLocked() EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_);
|
||||
const char* StateStringLocked()
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_);
|
||||
|
||||
void NodeDone(const core::RefCountPtr<NodeItem>& item, const Status& status,
|
||||
bool from_queue);
|
||||
void NotifyWaiters(uint64 id) EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_);
|
||||
void NotifyWaiters(uint64 id) TF_EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_);
|
||||
|
||||
// Starts execution of pending EagerNodes. This function loops till executor
|
||||
// state_ is set to kShutDown. If any errors are encountered, these are set
|
||||
@ -201,7 +202,7 @@ class EagerExecutor {
|
||||
// The impl of WaitForAllPendingNodes
|
||||
// `lock` is the lock that holds node_queue_mutex_.
|
||||
Status WaitForAllPendingNodesLocked(mutex_lock* lock)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_);
|
||||
|
||||
Status WaitImpl(bool wait_all, uint64 node_id);
|
||||
|
||||
@ -210,27 +211,27 @@ class EagerExecutor {
|
||||
mutable mutex node_queue_mutex_;
|
||||
|
||||
// Used to signal that some EagerNodes are pending execution.
|
||||
condition_variable nodes_pending_ GUARDED_BY(node_queue_mutex_);
|
||||
condition_variable nodes_pending_ TF_GUARDED_BY(node_queue_mutex_);
|
||||
|
||||
// Queue of pending NodeItems. Ordered by NodeItem::id.
|
||||
std::queue<core::RefCountPtr<NodeItem>> node_queue_
|
||||
GUARDED_BY(node_queue_mutex_);
|
||||
TF_GUARDED_BY(node_queue_mutex_);
|
||||
|
||||
// Ordered by NodeItem::id.
|
||||
std::map<uint64, core::RefCountPtr<NodeItem>, std::less<uint64>>
|
||||
unfinished_nodes_ GUARDED_BY(node_queue_mutex_);
|
||||
unfinished_nodes_ TF_GUARDED_BY(node_queue_mutex_);
|
||||
|
||||
// `status_` is set based on any errors raised during execution of a
|
||||
// EagerNode. It remains set until ClearError is called.
|
||||
Status status_ GUARDED_BY(node_queue_mutex_);
|
||||
std::atomic<bool> ok_ GUARDED_BY(node_queue_mutex_);
|
||||
Status status_ TF_GUARDED_BY(node_queue_mutex_);
|
||||
std::atomic<bool> ok_ TF_GUARDED_BY(node_queue_mutex_);
|
||||
|
||||
// Map from id of a EagerNode to condition_variables (not owned by the map).
|
||||
// These condition_variables are notified and removed when that EagerNode is
|
||||
// done executing, or if an error is found in execution of any EagerNode.
|
||||
// The map is ordered by id.
|
||||
std::multimap<uint64, condition_variable*, std::less<uint64>>
|
||||
node_done_notifications_ GUARDED_BY(node_queue_mutex_);
|
||||
node_done_notifications_ TF_GUARDED_BY(node_queue_mutex_);
|
||||
|
||||
// thread_exited_notification_ is notified by the `thread_` right before it
|
||||
// exits.
|
||||
@ -238,7 +239,8 @@ class EagerExecutor {
|
||||
|
||||
// When state_ is set to kShutDown, it indicates that `thread_` should stop as
|
||||
// soon as it is done executing the current EagerNode.
|
||||
ExecutorState state_ GUARDED_BY(node_queue_mutex_) = ExecutorState::kActive;
|
||||
ExecutorState state_ TF_GUARDED_BY(node_queue_mutex_) =
|
||||
ExecutorState::kActive;
|
||||
|
||||
// Thread object that calls the `Run` method in async mode.This thread runs
|
||||
// until state_ is set to kShuttingDown. It is `nullptr` in sync mode.
|
||||
|
@ -241,17 +241,17 @@ class TensorHandle : public core::RefCounted {
|
||||
|
||||
// Map of local mirrors. This can include both ready and non-ready mirrors.
|
||||
std::unordered_map<const tensorflow::Device*, LocalTensorHandleData>
|
||||
local_mirrors_ GUARDED_BY(mu_);
|
||||
local_mirrors_ TF_GUARDED_BY(mu_);
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
// TODO(yujingzhang): Remove resource_shape_mirrors_ once scalable per-replica
|
||||
// variable is ready, since we could get the shape locally without remote copy
|
||||
// then.
|
||||
std::unordered_map<string, RemoteTensorHandleData> resource_shape_mirrors_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
// TODO(gjn): Is std::map the most optimal choice here? Perhaps this should be
|
||||
// a fixed size map.
|
||||
std::unordered_map<string, RemoteTensorHandleData> remote_mirrors_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
#endif
|
||||
|
||||
// `ctx` is only guaranteed to be set if the handle is not "ready". This is
|
||||
|
@ -98,8 +98,8 @@ class LocalTensorHandleData {
|
||||
|
||||
private:
|
||||
mutable mutex mu_;
|
||||
bool is_ready_ GUARDED_BY(mu_);
|
||||
Status is_poisoned_ GUARDED_BY(mu_);
|
||||
bool is_ready_ TF_GUARDED_BY(mu_);
|
||||
Status is_poisoned_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
absl::variant<NonBlockingControl, BlockingControl> ctrl_;
|
||||
|
@ -1117,16 +1117,16 @@ class ExecutorState {
|
||||
int num_pending_inputs = 0;
|
||||
|
||||
// The highest iteration number we have reached so far in this frame.
|
||||
int64 iteration_count GUARDED_BY(mu) = 0;
|
||||
int64 iteration_count TF_GUARDED_BY(mu) = 0;
|
||||
|
||||
// The number of outstanding iterations.
|
||||
int num_outstanding_iterations GUARDED_BY(mu) = 1;
|
||||
int num_outstanding_iterations TF_GUARDED_BY(mu) = 1;
|
||||
|
||||
private:
|
||||
// The active iteration states of this frame.
|
||||
gtl::InlinedVector<IterationState*, 12> iterations;
|
||||
IterationState** const iterations_raw GUARDED_BY(mu);
|
||||
IterationState* iterations_first GUARDED_BY(mu);
|
||||
IterationState** const iterations_raw TF_GUARDED_BY(mu);
|
||||
IterationState* iterations_first TF_GUARDED_BY(mu);
|
||||
|
||||
public:
|
||||
// The NextIteration nodes to enter a new iteration. If the number of
|
||||
@ -1134,18 +1134,18 @@ class ExecutorState {
|
||||
// the next iteration until the number of outstanding iterations falls
|
||||
// below the limit.
|
||||
std::vector<std::pair<const NodeItem*, Entry>> next_iter_roots
|
||||
GUARDED_BY(mu);
|
||||
TF_GUARDED_BY(mu);
|
||||
|
||||
// The values of the loop invariants for this loop. They are added into
|
||||
// this list as they "enter" the frame. When a loop invariant enters,
|
||||
// we make it available to all active iterations. When the frame starts
|
||||
// a new iteration, we make all the current loop invariants available
|
||||
// to the new iteration.
|
||||
std::vector<std::pair<const NodeItem*, Entry>> inv_values GUARDED_BY(mu);
|
||||
std::vector<std::pair<const NodeItem*, Entry>> inv_values TF_GUARDED_BY(mu);
|
||||
|
||||
// The list of dead exit node items for the current highest iteration. We
|
||||
// will only "execute" the dead exits of the final iteration.
|
||||
std::vector<const NodeItem*> dead_exits GUARDED_BY(mu);
|
||||
std::vector<const NodeItem*> dead_exits TF_GUARDED_BY(mu);
|
||||
|
||||
// Static information specific to this frame.
|
||||
PendingCounts* pending_counts = nullptr;
|
||||
@ -1167,7 +1167,7 @@ class ExecutorState {
|
||||
}
|
||||
|
||||
inline IterationState* GetIteration(int64 iter)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
if (TF_PREDICT_TRUE(iter == 0)) {
|
||||
return iterations_first;
|
||||
} else {
|
||||
@ -1177,7 +1177,7 @@ class ExecutorState {
|
||||
}
|
||||
|
||||
inline void SetIteration(int64 iter, IterationState* state)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
size_t index = iter % (max_parallel_iterations + 1);
|
||||
DCHECK(state == nullptr || iterations[index] == nullptr);
|
||||
iterations_raw[index] = state;
|
||||
@ -1198,7 +1198,7 @@ class ExecutorState {
|
||||
// frame. Return true iff the execution of the frame is done.
|
||||
inline bool DecrementOutstandingOpsLocked(const GraphView* gview,
|
||||
int64 iter, TaggedNodeSeq* ready)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
IterationState* istate = GetIteration(iter);
|
||||
istate->outstanding_ops--;
|
||||
if (istate->outstanding_ops != 0) {
|
||||
@ -1209,39 +1209,40 @@ class ExecutorState {
|
||||
}
|
||||
|
||||
// Returns true if the computation in the frame is completed.
|
||||
inline bool IsFrameDone() EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
inline bool IsFrameDone() TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
|
||||
return (num_pending_inputs == 0 && num_outstanding_iterations == 0);
|
||||
}
|
||||
|
||||
// Returns true if the iteration of the frame is completed.
|
||||
bool IsIterationDone(int64 iter) EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
bool IsIterationDone(int64 iter) TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Increments the iteration id. If this is a new iteration, initialize it.
|
||||
void IncrementIteration(const GraphView* gview, TaggedNodeSeq* ready)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Activate all the deferred NextIteration nodes in a new iteration.
|
||||
void ActivateNexts(const GraphView* gview, int64 iter, TaggedNodeSeq* ready)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Activate all the current loop invariants in a new iteration.
|
||||
void ActivateLoopInvs(const GraphView* gview, int64 iter,
|
||||
TaggedNodeSeq* ready) EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
TaggedNodeSeq* ready) TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Add a new loop invariant and make it available to all active
|
||||
// iterations.
|
||||
void AddLoopInv(const NodeItem* item, const Entry& entry,
|
||||
TaggedNodeSeq* ready) EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
TaggedNodeSeq* ready) TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Activate the successors of a node. Contents of *outputs are left in an
|
||||
// indeterminate state after returning from this method.
|
||||
void ActivateNodes(const NodeItem* item, const bool is_dead, int64 iter,
|
||||
EntryVector* outputs, TaggedNodeSeq* ready)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Cleanup iterations of this frame starting from iteration iter.
|
||||
bool CleanupIterations(const GraphView* gview, int64 iter,
|
||||
TaggedNodeSeq* ready) EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
TaggedNodeSeq* ready)
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
void DumpIterationState(ExecutorState* parent) {
|
||||
mutex_lock l(mu);
|
||||
@ -1361,18 +1362,19 @@ class ExecutorState {
|
||||
|
||||
// Available via OpKernelContext to every OpKernel invocation.
|
||||
mutex num_deferred_ops_mu_;
|
||||
int64 num_deferred_ops_ GUARDED_BY(num_deferred_ops_mu_) = 0;
|
||||
bool finish_when_deferred_ops_done_ GUARDED_BY(num_deferred_ops_mu_) = false;
|
||||
int64 num_deferred_ops_ TF_GUARDED_BY(num_deferred_ops_mu_) = 0;
|
||||
bool finish_when_deferred_ops_done_ TF_GUARDED_BY(num_deferred_ops_mu_) =
|
||||
false;
|
||||
|
||||
mutex mu_;
|
||||
Status status_ GUARDED_BY(mu_);
|
||||
Status status_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// Mapping from frame name to outstanding frames. A new frame is created
|
||||
// at some iteration of an active frame. So the unique key for the new
|
||||
// child frame is composed of the name of the parent frame, the iteration
|
||||
// number at which the parent frame is creating the new frame, and the
|
||||
// name of the new frame from nodedef.
|
||||
gtl::FlatMap<string, FrameState*> outstanding_frames_ GUARDED_BY(mu_);
|
||||
gtl::FlatMap<string, FrameState*> outstanding_frames_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// The unique name of a frame.
|
||||
inline string MakeFrameName(FrameState* frame, int64 iter_id,
|
||||
@ -1452,7 +1454,7 @@ class ExecutorState {
|
||||
// resizes and this particular iteration's array element will not
|
||||
// be changed out from under us because the iteration is still alive).
|
||||
Entry* GetInputTensors(FrameState* input_frame,
|
||||
int64 input_iter) const NO_THREAD_SAFETY_ANALYSIS {
|
||||
int64 input_iter) const TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return input_frame->GetIteration(input_iter)->input_tensors;
|
||||
}
|
||||
};
|
||||
|
@ -188,8 +188,8 @@ class ExecutorBarrier {
|
||||
StatusCallback done_cb_ = nullptr;
|
||||
|
||||
mutable mutex mu_;
|
||||
int pending_ GUARDED_BY(mu_) = 0;
|
||||
StatusGroup status_group_ GUARDED_BY(mu_);
|
||||
int pending_ TF_GUARDED_BY(mu_) = 0;
|
||||
StatusGroup status_group_ TF_GUARDED_BY(mu_);
|
||||
|
||||
void WhenDone(const Status& s) {
|
||||
Rendezvous* error_rendez = nullptr;
|
||||
|
@ -48,7 +48,7 @@ void ExecutorFactory::Register(const string& executor_type,
|
||||
|
||||
namespace {
|
||||
const string RegisteredFactoriesErrorMessageLocked()
|
||||
SHARED_LOCKS_REQUIRED(executor_factory_lock) {
|
||||
TF_SHARED_LOCKS_REQUIRED(executor_factory_lock) {
|
||||
std::vector<string> factory_types;
|
||||
for (const auto& executor_factory : *executor_factories()) {
|
||||
factory_types.push_back(executor_factory.first);
|
||||
|
@ -402,7 +402,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
|
||||
|
||||
mutable mutex mu_;
|
||||
|
||||
int next_handle_ GUARDED_BY(mu_);
|
||||
int next_handle_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// The instantiated and transformed function is encoded as a Graph
|
||||
// object, and an executor is created for the graph.
|
||||
@ -423,7 +423,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
|
||||
}
|
||||
};
|
||||
std::unique_ptr<std::unordered_map<Handle, std::unique_ptr<Item>>> items_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
|
||||
ProcessFunctionLibraryRuntime* parent_ = nullptr; // not owned.
|
||||
|
||||
|
@ -58,7 +58,7 @@ class FakeAllocator {
|
||||
AllocatorRetry retry_;
|
||||
void* good_ptr_ = reinterpret_cast<void*>(0xdeadbeef);
|
||||
mutex mu_;
|
||||
size_t memory_capacity_ GUARDED_BY(mu_);
|
||||
size_t memory_capacity_ TF_GUARDED_BY(mu_);
|
||||
int millis_to_wait_;
|
||||
};
|
||||
|
||||
@ -100,7 +100,7 @@ class AlternatingBarrier {
|
||||
}
|
||||
|
||||
private:
|
||||
void IncrementTurn() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
void IncrementTurn() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
int skipped = 0;
|
||||
while (skipped < num_users_) {
|
||||
next_turn_ = (next_turn_ + 1) % num_users_;
|
||||
@ -112,8 +112,8 @@ class AlternatingBarrier {
|
||||
mutex mu_;
|
||||
condition_variable cv_;
|
||||
int num_users_;
|
||||
int next_turn_ GUARDED_BY(mu_);
|
||||
std::vector<bool> done_ GUARDED_BY(mu_);
|
||||
int next_turn_ TF_GUARDED_BY(mu_);
|
||||
std::vector<bool> done_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
class GPUAllocatorRetryTest : public ::testing::Test {
|
||||
@ -174,8 +174,8 @@ class GPUAllocatorRetryTest : public ::testing::Test {
|
||||
std::vector<int> consumer_count_;
|
||||
Notification notifier_;
|
||||
mutex mu_;
|
||||
bool has_failed_ GUARDED_BY(mu_) = false;
|
||||
int count_ GUARDED_BY(mu_) = 0;
|
||||
bool has_failed_ TF_GUARDED_BY(mu_) = false;
|
||||
int count_ TF_GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
||||
// Verifies correct retrying when memory is slightly overcommitted but
|
||||
|
@ -233,7 +233,7 @@ class GPUKernelTracker {
|
||||
// Caller is responsible for ensuring that RecordTerminate() is eventually
|
||||
// called with the same counter value.
|
||||
void RecordQueued(uint64 queued_count, int weight)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// Takes a count value returned by RecordQueued and finds the corresponding
|
||||
// PendingKernel record in the ring buffer. Marks the kernel as completed and
|
||||
@ -259,7 +259,7 @@ class GPUKernelTracker {
|
||||
|
||||
// Yield current thread until number of pending kernels no longer
|
||||
// exceeds the cap.
|
||||
void PauseWhilePendingExceeds(int cap) LOCKS_EXCLUDED(mu_) {
|
||||
void PauseWhilePendingExceeds(int cap) TF_LOCKS_EXCLUDED(mu_) {
|
||||
mutex_lock l(mu_);
|
||||
while (num_pending_ > cap) {
|
||||
VLOG(1) << "num_pending_=" << num_pending_ << " cap=" << cap;
|
||||
@ -293,20 +293,20 @@ class GPUKernelTracker {
|
||||
PendingKernel() : queued_count(0), weight(0), terminated(false) {}
|
||||
};
|
||||
mutex mu_;
|
||||
int32 mem_since_last_ GUARDED_BY(mu_);
|
||||
int32 ops_since_last_ GUARDED_BY(mu_);
|
||||
int32 mem_since_last_ TF_GUARDED_BY(mu_);
|
||||
int32 ops_since_last_ TF_GUARDED_BY(mu_);
|
||||
// Ring buffer of PendingKernel records.
|
||||
std::vector<PendingKernel> pending_kernels_ GUARDED_BY(mu_);
|
||||
std::vector<PendingKernel> pending_kernels_ TF_GUARDED_BY(mu_);
|
||||
// Next unused slot in pending_kernels_.
|
||||
int first_available_ GUARDED_BY(mu_) = 0;
|
||||
int first_available_ TF_GUARDED_BY(mu_) = 0;
|
||||
// Last completed PendingKernel such that all prior PendingKernels are
|
||||
// also completed. With out-of-order completion there may be a mixture
|
||||
// of completed and uncompleted entries between last_completed_ and
|
||||
// first_available_.
|
||||
int last_completed_ GUARDED_BY(mu_) = -1;
|
||||
int last_completed_ TF_GUARDED_BY(mu_) = -1;
|
||||
// Sum of weights of the outstanding events marking tracked kernels.
|
||||
int num_pending_ GUARDED_BY(mu_) = 0;
|
||||
condition_variable pending_decreased_ GUARDED_BY(mu_);
|
||||
int num_pending_ TF_GUARDED_BY(mu_) = 0;
|
||||
condition_variable pending_decreased_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
class BaseGPUDeviceFactory : public DeviceFactory {
|
||||
|
@ -112,9 +112,9 @@ class EventMgr {
|
||||
const int64 deferred_bytes_threshold_;
|
||||
const int32 polling_active_delay_usecs_;
|
||||
mutex mu_;
|
||||
condition_variable events_pending_ GUARDED_BY(mu_);
|
||||
condition_variable events_pending_ TF_GUARDED_BY(mu_);
|
||||
|
||||
void FlushAccumulatedTensors() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
void FlushAccumulatedTensors() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
struct InUse {
|
||||
se::Event* event;
|
||||
@ -152,20 +152,20 @@ class EventMgr {
|
||||
// Tensors and/or a BufRec to be deleted only after the Event
|
||||
// records.
|
||||
void QueueInUse(se::Stream* stream, InUse in_use)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
void QueueTensors(se::Stream* stream, TensorReferenceVector* tensors)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
QueueInUse(stream, {nullptr, tensors, BufRec(), nullptr});
|
||||
}
|
||||
|
||||
void QueueBuffer(se::Stream* stream, BufRec bufrec)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
QueueInUse(stream, {nullptr, nullptr, bufrec, nullptr});
|
||||
}
|
||||
|
||||
void QueueFunc(se::Stream* stream, std::function<void()> func)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
QueueInUse(stream, {nullptr, nullptr, BufRec(), std::move(func)});
|
||||
}
|
||||
|
||||
@ -175,7 +175,7 @@ class EventMgr {
|
||||
// to "*to_free". The caller should call FreeMemory(to_free)
|
||||
// when this returns.
|
||||
void PollEvents(bool is_dedicated_poller, ToFreeVector* to_free)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// An internal polling loop that runs at a low frequency to clear
|
||||
// straggler Events.
|
||||
@ -186,18 +186,18 @@ class EventMgr {
|
||||
void StopPollingLoop();
|
||||
|
||||
// A stack of unused events
|
||||
std::vector<se::Event*> free_events_ GUARDED_BY(mu_);
|
||||
std::vector<se::Event*> free_events_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// Buffered list of tensors waiting to have an event queued for deletion
|
||||
se::Stream* accumulated_stream_ GUARDED_BY(mu_);
|
||||
TensorReferenceVector* accumulated_tensors_ GUARDED_BY(mu_);
|
||||
se::Stream* accumulated_stream_ TF_GUARDED_BY(mu_);
|
||||
TensorReferenceVector* accumulated_tensors_ TF_GUARDED_BY(mu_);
|
||||
// Sum of the TotalBytes() of the tensors in "accumulated_tensors_"
|
||||
int64 accumulated_tensor_bytes_ GUARDED_BY(mu_);
|
||||
int64 accumulated_tensor_bytes_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// A FIFO queue of InUse events and associated tensors.
|
||||
std::deque<InUse> used_events_ GUARDED_BY(mu_);
|
||||
std::deque<InUse> used_events_ TF_GUARDED_BY(mu_);
|
||||
|
||||
bool stop_polling_ GUARDED_BY(mu_);
|
||||
bool stop_polling_ TF_GUARDED_BY(mu_);
|
||||
std::unique_ptr<Notification> polling_stopped_;
|
||||
|
||||
// The main PollLoop for the event manager runs in this threadpool.
|
||||
@ -216,7 +216,7 @@ class EventMgrFactory {
|
||||
|
||||
// Maintain one EventMgr per physical device (StreamExecutor is
|
||||
// per-physical-device).
|
||||
std::map<se::StreamExecutor*, EventMgr*> event_mgr_map_ GUARDED_BY(mu_);
|
||||
std::map<se::StreamExecutor*, EventMgr*> event_mgr_map_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -35,7 +35,7 @@ class TfToPlatformGpuIdMap {
|
||||
}
|
||||
|
||||
Status Insert(TfGpuId tf_gpu_id, PlatformGpuId platform_gpu_id)
|
||||
LOCKS_EXCLUDED(mu_) {
|
||||
TF_LOCKS_EXCLUDED(mu_) {
|
||||
std::pair<IdMapType::iterator, bool> result;
|
||||
{
|
||||
mutex_lock lock(mu_);
|
||||
@ -58,7 +58,7 @@ class TfToPlatformGpuIdMap {
|
||||
}
|
||||
|
||||
bool Find(TfGpuId tf_gpu_id, PlatformGpuId* platform_gpu_id) const
|
||||
LOCKS_EXCLUDED(mu_) {
|
||||
TF_LOCKS_EXCLUDED(mu_) {
|
||||
// TODO(mrry): Consider replacing this with an atomic `is_initialized` bit,
|
||||
// to avoid writing to a shared cache line in the tf_shared_lock.
|
||||
tf_shared_lock lock(mu_);
|
||||
@ -71,14 +71,14 @@ class TfToPlatformGpuIdMap {
|
||||
private:
|
||||
TfToPlatformGpuIdMap() = default;
|
||||
|
||||
void TestOnlyReset() LOCKS_EXCLUDED(mu_) {
|
||||
void TestOnlyReset() TF_LOCKS_EXCLUDED(mu_) {
|
||||
mutex_lock lock(mu_);
|
||||
id_map_.clear();
|
||||
}
|
||||
|
||||
using IdMapType = std::unordered_map<int32, int32>;
|
||||
mutable mutex mu_;
|
||||
IdMapType id_map_ GUARDED_BY(mu_);
|
||||
IdMapType id_map_ TF_GUARDED_BY(mu_);
|
||||
|
||||
friend class ::tensorflow::GpuIdManager;
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(TfToPlatformGpuIdMap);
|
||||
|
@ -56,13 +56,13 @@ class GPUProcessState {
|
||||
|
||||
// Query whether any GPU device has been created so far.
|
||||
// Disable thread safety analysis since a race is benign here.
|
||||
bool HasGPUDevice() const NO_THREAD_SAFETY_ANALYSIS {
|
||||
bool HasGPUDevice() const TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return gpu_device_enabled_;
|
||||
}
|
||||
|
||||
// Set the flag to indicate a GPU device has been created.
|
||||
// Disable thread safety analysis since a race is benign here.
|
||||
void EnableGPUDevice() NO_THREAD_SAFETY_ANALYSIS {
|
||||
void EnableGPUDevice() TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
gpu_device_enabled_ = true;
|
||||
}
|
||||
|
||||
@ -147,14 +147,15 @@ class GPUProcessState {
|
||||
SubAllocator* sub_allocator; // owned by allocator
|
||||
std::unique_ptr<Allocator> recording_allocator;
|
||||
};
|
||||
std::vector<AllocatorParts> gpu_allocators_ GUARDED_BY(mu_);
|
||||
std::vector<std::vector<SubAllocator::Visitor>> gpu_visitors_ GUARDED_BY(mu_);
|
||||
std::vector<AllocatorParts> gpu_allocators_ TF_GUARDED_BY(mu_);
|
||||
std::vector<std::vector<SubAllocator::Visitor>> gpu_visitors_
|
||||
TF_GUARDED_BY(mu_);
|
||||
|
||||
std::vector<AllocatorParts> gpu_host_allocators_ GUARDED_BY(mu_);
|
||||
std::vector<AllocatorParts> gpu_host_allocators_ TF_GUARDED_BY(mu_);
|
||||
std::vector<std::vector<SubAllocator::Visitor>> gpu_host_alloc_visitors_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
std::vector<std::vector<SubAllocator::Visitor>> gpu_host_free_visitors_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -86,7 +86,7 @@ class SimpleRendezvous : public RendezvousInterface {
|
||||
typedef std::unordered_map<string, Tensor> Table;
|
||||
|
||||
mutex mu_;
|
||||
Table table_ GUARDED_BY(mu_);
|
||||
Table table_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -312,7 +312,7 @@ void HierarchicalTreeBroadcaster::RunTree() {
|
||||
}
|
||||
|
||||
mutex mu; // also guards status_ while callbacks are pending
|
||||
int pending_count = 0; // GUARDED_BY(mu)
|
||||
int pending_count = 0; // TF_GUARDED_BY(mu)
|
||||
condition_variable all_done;
|
||||
|
||||
if (my_rank >= 0 && my_rank != source_rank) {
|
||||
|
@ -188,7 +188,7 @@ class FailTestRMA : public CollectiveRemoteAccessLocal {
|
||||
}
|
||||
|
||||
mutex mu_;
|
||||
int fail_after_ GUARDED_BY(mu_);
|
||||
int fail_after_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
class HierarchicalTreeBroadcasterTest : public ::testing::Test {
|
||||
@ -725,9 +725,9 @@ class HierarchicalTreeBroadcasterTest : public ::testing::Test {
|
||||
std::unique_ptr<tensorflow::DeviceMgr> dev_mgr_;
|
||||
std::unique_ptr<string> gpu_ring_order_;
|
||||
mutex mu_;
|
||||
int bcast_recv_counter_ GUARDED_BY(mu_) = 0;
|
||||
int bcast_send_counter_ GUARDED_BY(mu_) = 0;
|
||||
int failure_count_ GUARDED_BY(mu_) = 0;
|
||||
int bcast_recv_counter_ TF_GUARDED_BY(mu_) = 0;
|
||||
int bcast_send_counter_ TF_GUARDED_BY(mu_) = 0;
|
||||
int failure_count_ TF_GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
||||
TEST_F(HierarchicalTreeBroadcasterTest, InitializeParams1Task8GPU) {
|
||||
|
@ -52,7 +52,7 @@ class LocalDevice : public Device {
|
||||
// computations.
|
||||
static mutex global_tp_mu_;
|
||||
static gtl::InlinedVector<EigenThreadPoolInfo*, 4> global_tp_info_
|
||||
GUARDED_BY(global_tp_mu_);
|
||||
TF_GUARDED_BY(global_tp_mu_);
|
||||
|
||||
friend class test::Benchmark;
|
||||
|
||||
|
@ -98,7 +98,7 @@ class MklSmallSizeAllocator : public Allocator {
|
||||
|
||||
private:
|
||||
// Increment statistics for the allocator handling small allocations.
|
||||
inline void IncrementStats(size_t alloc_size) LOCKS_EXCLUDED(mutex_) {
|
||||
inline void IncrementStats(size_t alloc_size) TF_LOCKS_EXCLUDED(mutex_) {
|
||||
mutex_lock l(mutex_);
|
||||
++stats_.num_allocs;
|
||||
stats_.bytes_in_use += alloc_size;
|
||||
@ -109,7 +109,7 @@ class MklSmallSizeAllocator : public Allocator {
|
||||
}
|
||||
|
||||
// Decrement statistics for the allocator handling small allocations.
|
||||
inline void DecrementStats(size_t dealloc_size) LOCKS_EXCLUDED(mutex_) {
|
||||
inline void DecrementStats(size_t dealloc_size) TF_LOCKS_EXCLUDED(mutex_) {
|
||||
mutex_lock l(mutex_);
|
||||
stats_.bytes_in_use -= dealloc_size;
|
||||
}
|
||||
@ -123,7 +123,7 @@ class MklSmallSizeAllocator : public Allocator {
|
||||
string name_;
|
||||
|
||||
// Allocator stats for small allocs
|
||||
AllocatorStats stats_ GUARDED_BY(mutex_);
|
||||
AllocatorStats stats_ TF_GUARDED_BY(mutex_);
|
||||
};
|
||||
|
||||
/// CPU allocator for MKL that wraps BFC allocator and intercepts
|
||||
@ -199,19 +199,20 @@ class MklCPUAllocator : public Allocator {
|
||||
|
||||
inline string Name() override { return kName; }
|
||||
inline bool IsSmallSizeAllocation(const void* ptr) const
|
||||
LOCKS_EXCLUDED(mutex_) {
|
||||
TF_LOCKS_EXCLUDED(mutex_) {
|
||||
mutex_lock l(mutex_);
|
||||
return large_allocations_map_.find(ptr) == large_allocations_map_.end();
|
||||
}
|
||||
// AddLargeAllocMap and RemoveLargeAllocMap are always called with a lock held
|
||||
inline void AddLargeAllocMap(void* ptr, size_t num_bytes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
|
||||
if (ptr != nullptr) {
|
||||
std::pair<void*, size_t> map_val(ptr, num_bytes);
|
||||
large_allocations_map_.insert(map_val);
|
||||
}
|
||||
}
|
||||
inline void RemoveLargeAllocMap(void* ptr) EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
|
||||
inline void RemoveLargeAllocMap(void* ptr)
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
|
||||
auto map_iter = large_allocations_map_.find(ptr);
|
||||
if (map_iter != large_allocations_map_.end()) {
|
||||
large_allocations_map_.erase(map_iter);
|
||||
@ -313,12 +314,12 @@ class MklCPUAllocator : public Allocator {
|
||||
|
||||
SubAllocator* sub_allocator_; // not owned by this class
|
||||
mutable mutex mutex_;
|
||||
AllocatorStats stats_ GUARDED_BY(mutex_);
|
||||
AllocatorStats stats_ TF_GUARDED_BY(mutex_);
|
||||
|
||||
// Hash map to keep track of "BFC" allocations
|
||||
// We do not use BFC allocator for small allocations.
|
||||
std::unordered_map<const void*, size_t> large_allocations_map_
|
||||
GUARDED_BY(mutex_);
|
||||
TF_GUARDED_BY(mutex_);
|
||||
|
||||
// Size in bytes that defines the upper-bound for "small" allocations.
|
||||
// Any allocation below this threshold is "small" allocation.
|
||||
|
@ -81,21 +81,21 @@ class PoolAllocator : public Allocator {
|
||||
// consistency with other threads is not important.
|
||||
|
||||
// Number of Get() requests satisfied from pool.
|
||||
int64 get_from_pool_count() const NO_THREAD_SAFETY_ANALYSIS {
|
||||
int64 get_from_pool_count() const TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return get_from_pool_count_;
|
||||
}
|
||||
// Number of Put() requests.
|
||||
int64 put_count() const NO_THREAD_SAFETY_ANALYSIS { return put_count_; }
|
||||
int64 put_count() const TF_NO_THREAD_SAFETY_ANALYSIS { return put_count_; }
|
||||
// Number of Get() requests requiring a fresh allocation.
|
||||
int64 allocated_count() const NO_THREAD_SAFETY_ANALYSIS {
|
||||
int64 allocated_count() const TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return allocated_count_;
|
||||
}
|
||||
// Number of pool evictions.
|
||||
int64 evicted_count() const NO_THREAD_SAFETY_ANALYSIS {
|
||||
int64 evicted_count() const TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return evicted_count_;
|
||||
}
|
||||
// Current size limit.
|
||||
size_t size_limit() const NO_THREAD_SAFETY_ANALYSIS {
|
||||
size_t size_limit() const TF_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return pool_size_limit_;
|
||||
}
|
||||
|
||||
@ -108,13 +108,13 @@ class PoolAllocator : public Allocator {
|
||||
};
|
||||
|
||||
// Remove "pr" from the double-linked LRU list.
|
||||
void RemoveFromList(PtrRecord* pr) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
void RemoveFromList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Add "pr" to the head of the double-linked LRU list.
|
||||
void AddToList(PtrRecord* pr) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
void AddToList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Delete the least recently used record.
|
||||
void EvictOne() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
void EvictOne() TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
const string name_;
|
||||
const bool has_size_limit_;
|
||||
@ -123,13 +123,13 @@ class PoolAllocator : public Allocator {
|
||||
std::unique_ptr<SubAllocator> allocator_;
|
||||
std::unique_ptr<RoundUpInterface> size_rounder_;
|
||||
mutex mutex_;
|
||||
std::multimap<const size_t, PtrRecord*> pool_ GUARDED_BY(mutex_);
|
||||
PtrRecord* lru_head_ GUARDED_BY(mutex_) = nullptr;
|
||||
PtrRecord* lru_tail_ GUARDED_BY(mutex_) = nullptr;
|
||||
int64 get_from_pool_count_ GUARDED_BY(mutex_) = 0;
|
||||
int64 put_count_ GUARDED_BY(mutex_) = 0;
|
||||
int64 allocated_count_ GUARDED_BY(mutex_) = 0;
|
||||
int64 evicted_count_ GUARDED_BY(mutex_) = 0;
|
||||
std::multimap<const size_t, PtrRecord*> pool_ TF_GUARDED_BY(mutex_);
|
||||
PtrRecord* lru_head_ TF_GUARDED_BY(mutex_) = nullptr;
|
||||
PtrRecord* lru_tail_ TF_GUARDED_BY(mutex_) = nullptr;
|
||||
int64 get_from_pool_count_ TF_GUARDED_BY(mutex_) = 0;
|
||||
int64 put_count_ TF_GUARDED_BY(mutex_) = 0;
|
||||
int64 allocated_count_ TF_GUARDED_BY(mutex_) = 0;
|
||||
int64 evicted_count_ TF_GUARDED_BY(mutex_) = 0;
|
||||
};
|
||||
|
||||
// Do-nothing rounder. Passes through sizes unchanged.
|
||||
|
@ -317,7 +317,7 @@ class ProcessFunctionLibraryRuntime {
|
||||
FunctionLibraryRuntime::Handle AddHandleLocked(
|
||||
const string& function_key, const string& device_name,
|
||||
FunctionLibraryRuntime::LocalHandle local_handle)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
// For a given device_name, returns a DeviceContext for copying
|
||||
// tensors to/from the device.
|
||||
@ -423,11 +423,11 @@ class ProcessFunctionLibraryRuntime {
|
||||
mutex mu_;
|
||||
|
||||
const string target_device_;
|
||||
FunctionLibraryRuntime::LocalHandle local_handle_ GUARDED_BY(mu_);
|
||||
FunctionLibraryRuntime::LocalHandle local_handle_ TF_GUARDED_BY(mu_);
|
||||
const string function_key_;
|
||||
bool is_cross_process_ GUARDED_BY(mu_) = false;
|
||||
bool init_started_ GUARDED_BY(mu_) = false;
|
||||
Status init_result_ GUARDED_BY(mu_);
|
||||
bool is_cross_process_ TF_GUARDED_BY(mu_) = false;
|
||||
bool init_started_ TF_GUARDED_BY(mu_) = false;
|
||||
Status init_result_ TF_GUARDED_BY(mu_);
|
||||
Notification init_done_;
|
||||
};
|
||||
|
||||
@ -442,22 +442,22 @@ class ProcessFunctionLibraryRuntime {
|
||||
|
||||
// Holds all the function instantiations. Maps function_keys to handles.
|
||||
std::unordered_map<string, FunctionLibraryRuntime::Handle> table_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
|
||||
// Function data for instantiated remote functions.
|
||||
std::unordered_map<FunctionLibraryRuntime::Handle,
|
||||
std::unique_ptr<FunctionData>>
|
||||
function_data_ GUARDED_BY(mu_);
|
||||
function_data_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// Function data for instantiated multi-device functions.
|
||||
std::unordered_map<FunctionLibraryRuntime::Handle,
|
||||
std::unique_ptr<MultiDeviceFunctionData>>
|
||||
mdevice_data_ GUARDED_BY(mu_);
|
||||
mdevice_data_ TF_GUARDED_BY(mu_);
|
||||
|
||||
std::unique_ptr<
|
||||
std::unordered_map<Device*, std::unique_ptr<FunctionLibraryRuntime>>>
|
||||
flr_map_;
|
||||
int next_handle_ GUARDED_BY(mu_);
|
||||
int next_handle_ TF_GUARDED_BY(mu_);
|
||||
const SessionMetadata* const session_metadata_;
|
||||
};
|
||||
|
||||
|
@ -74,7 +74,7 @@ class TestClusterFLR : public DistributedFunctionLibraryRuntime {
|
||||
|
||||
private:
|
||||
mutex mu_;
|
||||
int next_handle_ GUARDED_BY(mu_) = 0;
|
||||
int next_handle_ TF_GUARDED_BY(mu_) = 0;
|
||||
DeviceMgr* device_mgr_;
|
||||
};
|
||||
|
||||
|
@ -98,14 +98,14 @@ class ProcessState : public ProcessStateInterface {
|
||||
|
||||
// Indexed by numa_node. If we want numa-specific allocators AND a
|
||||
// non-specific allocator, maybe should index by numa_node+1.
|
||||
std::vector<Allocator*> cpu_allocators_ GUARDED_BY(mu_);
|
||||
std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ GUARDED_BY(mu_);
|
||||
std::vector<SubAllocator::Visitor> cpu_free_visitors_ GUARDED_BY(mu_);
|
||||
std::vector<Allocator*> cpu_allocators_ TF_GUARDED_BY(mu_);
|
||||
std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ TF_GUARDED_BY(mu_);
|
||||
std::vector<SubAllocator::Visitor> cpu_free_visitors_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// Optional RecordingAllocators that wrap the corresponding
|
||||
// Allocators for runtime attribute use analysis.
|
||||
MDMap mem_desc_map_;
|
||||
std::vector<Allocator*> cpu_al_ GUARDED_BY(mu_);
|
||||
std::vector<Allocator*> cpu_al_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
@ -102,8 +102,8 @@ class RingAlg : public CollectiveImplementationInterface {
|
||||
private:
|
||||
mutex pcq_mu_;
|
||||
condition_variable cv_;
|
||||
int waiter_count_ GUARDED_BY(pcq_mu_) = 0;
|
||||
std::deque<RingField*> deque_ GUARDED_BY(pcq_mu_);
|
||||
int waiter_count_ TF_GUARDED_BY(pcq_mu_) = 0;
|
||||
std::deque<RingField*> deque_ TF_GUARDED_BY(pcq_mu_);
|
||||
};
|
||||
|
||||
const CollectiveType type_;
|
||||
@ -117,7 +117,7 @@ class RingAlg : public CollectiveImplementationInterface {
|
||||
Notification group_size_tensor_ready_;
|
||||
std::unique_ptr<CollectiveAdapter> ca_;
|
||||
mutex status_mu_;
|
||||
Status status_ GUARDED_BY(status_mu_);
|
||||
Status status_ TF_GUARDED_BY(status_mu_);
|
||||
std::vector<RingField> rfv_;
|
||||
};
|
||||
|
||||
|
@ -94,7 +94,7 @@ class FailTestRMA : public CollectiveRemoteAccessLocal {
|
||||
}
|
||||
|
||||
mutex mu_;
|
||||
int fail_after_ GUARDED_BY(mu_);
|
||||
int fail_after_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
std::unique_ptr<OpKernel> GetKernel(const NodeDef& node,
|
||||
@ -528,7 +528,7 @@ class RingGathererTest : public ::testing::Test {
|
||||
std::unique_ptr<tensorflow::DeviceMgr> dev_mgr_;
|
||||
std::unique_ptr<string> gpu_ring_order_;
|
||||
mutex mu_;
|
||||
int32 gather_counter_ GUARDED_BY(mu_) = 0;
|
||||
int32 gather_counter_ TF_GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
||||
CollectiveParams SetUpCollectiveParams(const int num_devs_per_task,
|
||||
|
@ -94,7 +94,7 @@ class FailTestRMA : public CollectiveRemoteAccessLocal {
|
||||
}
|
||||
|
||||
mutex mu_;
|
||||
int fail_after_ GUARDED_BY(mu_);
|
||||
int fail_after_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
std::unique_ptr<OpKernel> GetKernel(const NodeDef& node,
|
||||
@ -556,7 +556,7 @@ class RingReducerTest : public ::testing::Test {
|
||||
std::unique_ptr<tensorflow::DeviceMgr> dev_mgr_;
|
||||
std::unique_ptr<string> gpu_ring_order_;
|
||||
mutex mu_;
|
||||
int32 reduce_counter_ GUARDED_BY(mu_) = 0;
|
||||
int32 reduce_counter_ TF_GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
||||
CollectiveParams SetUpCollectiveParams(const int num_devs_per_task,
|
||||
|
@ -51,7 +51,7 @@ class ScopedAllocator {
|
||||
|
||||
// Automatically deletes when last use expires, or when
|
||||
// ScopedAllocatorContainer decides to delete.
|
||||
~ScopedAllocator() LOCKS_EXCLUDED(mu_);
|
||||
~ScopedAllocator() TF_LOCKS_EXCLUDED(mu_);
|
||||
|
||||
// For debugging: returns true iff p is a pointer that could have
|
||||
// been returned by AllocateRaw.
|
||||
@ -66,8 +66,8 @@ class ScopedAllocator {
|
||||
friend class ScopedAllocatorInstance;
|
||||
// Only ScopedAllocatorInstances can call AllocateRaw and DeallocateRaw on a
|
||||
// ScopedAllocator
|
||||
void* AllocateRaw(int32 field_index, size_t num_bytes) LOCKS_EXCLUDED(mu_);
|
||||
void DeallocateRaw(void* p) LOCKS_EXCLUDED(mu_);
|
||||
void* AllocateRaw(int32 field_index, size_t num_bytes) TF_LOCKS_EXCLUDED(mu_);
|
||||
void DeallocateRaw(void* p) TF_LOCKS_EXCLUDED(mu_);
|
||||
Tensor backing_tensor_;
|
||||
TensorBuffer* tbuf_;
|
||||
int32 id_;
|
||||
@ -75,8 +75,8 @@ class ScopedAllocator {
|
||||
ScopedAllocatorContainer* container_;
|
||||
std::vector<Field> fields_;
|
||||
mutex mu_;
|
||||
int32 expected_call_count_ GUARDED_BY(mu_);
|
||||
int32 live_alloc_count_ GUARDED_BY(mu_);
|
||||
int32 expected_call_count_ TF_GUARDED_BY(mu_);
|
||||
int32 live_alloc_count_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
// An Allocator that will return a pointer into the backing buffer of
|
||||
@ -98,14 +98,14 @@ class ScopedAllocatorInstance : public Allocator {
|
||||
// When a ScopedAllocatorContainer "Drops" a scope_id, it calls DropFromTable
|
||||
// on the underlying ScopedAllocatorInstance. If this instance has already
|
||||
// deallocated the tensor slice, we can safely delete this.
|
||||
void DropFromTable() LOCKS_EXCLUDED(mu_);
|
||||
void DropFromTable() TF_LOCKS_EXCLUDED(mu_);
|
||||
void* AllocateRaw(size_t alignment, size_t num_bytes)
|
||||
LOCKS_EXCLUDED(mu_) override;
|
||||
TF_LOCKS_EXCLUDED(mu_) override;
|
||||
void* AllocateRaw(size_t alignment, size_t num_bytes,
|
||||
const AllocationAttributes& allocator_attr) override {
|
||||
return AllocateRaw(alignment, num_bytes);
|
||||
}
|
||||
void DeallocateRaw(void* p) LOCKS_EXCLUDED(mu_) override;
|
||||
void DeallocateRaw(void* p) TF_LOCKS_EXCLUDED(mu_) override;
|
||||
bool TracksAllocationSizes() const override { return false; }
|
||||
size_t RequestedSize(const void* ptr) const override { return 0; }
|
||||
size_t AllocatedSize(const void* ptr) const override { return 0; }
|
||||
@ -117,9 +117,9 @@ class ScopedAllocatorInstance : public Allocator {
|
||||
mutex mu_;
|
||||
ScopedAllocator* scoped_allocator_;
|
||||
int32 field_index_;
|
||||
bool allocated_ GUARDED_BY(mu_);
|
||||
bool deallocated_ GUARDED_BY(mu_);
|
||||
bool in_table_ GUARDED_BY(mu_);
|
||||
bool allocated_ TF_GUARDED_BY(mu_);
|
||||
bool deallocated_ TF_GUARDED_BY(mu_);
|
||||
bool in_table_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -66,7 +66,7 @@ class ScopedAllocatorContainer : public core::RefCounted {
|
||||
: field_index(ScopedAllocator::kBackingIndex),
|
||||
scoped_allocator(nullptr) {}
|
||||
};
|
||||
std::unordered_map<int32, SAField> allocators_ GUARDED_BY(mu_);
|
||||
std::unordered_map<int32, SAField> allocators_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
// At most one of these exists per device.
|
||||
@ -103,7 +103,7 @@ class ScopedAllocatorMgr {
|
||||
string device_name_;
|
||||
mutex mu_;
|
||||
std::unordered_map<int64, ScopedAllocatorContainer*> per_step_map_
|
||||
GUARDED_BY(mu_);
|
||||
TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -198,14 +198,14 @@ class StepStatsCollector : public StepStatsCollectorInterface {
|
||||
typedef std::vector<std::unique_ptr<NodeExecStatsWrapper>> NodeStatsVector;
|
||||
typedef std::unordered_map<uint32, string> ThreadNamesMap;
|
||||
|
||||
void FinalizeInternal() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
void FinalizeInternal() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
mutex mu_;
|
||||
bool finalized_ GUARDED_BY(mu_);
|
||||
std::unordered_map<string, NodeStatsVector> dev_stats_ GUARDED_BY(mu_);
|
||||
std::unordered_map<string, ThreadNamesMap> thread_names_ GUARDED_BY(mu_);
|
||||
StepStats* step_stats_ GUARDED_BY(mu_);
|
||||
uint64 collected_nodes_ GUARDED_BY(mu_) = 0;
|
||||
bool finalized_ TF_GUARDED_BY(mu_);
|
||||
std::unordered_map<string, NodeStatsVector> dev_stats_ TF_GUARDED_BY(mu_);
|
||||
std::unordered_map<string, ThreadNamesMap> thread_names_ TF_GUARDED_BY(mu_);
|
||||
StepStats* step_stats_ TF_GUARDED_BY(mu_);
|
||||
uint64 collected_nodes_ TF_GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -64,8 +64,8 @@ class SYCLAllocator : public Allocator {
|
||||
|
||||
private:
|
||||
mutable mutex mu_;
|
||||
Eigen::SyclDevice* sycl_device_ GUARDED_BY(mu_); // owned
|
||||
AllocatorStats stats_ GUARDED_BY(mu_);
|
||||
Eigen::SyclDevice* sycl_device_ TF_GUARDED_BY(mu_); // owned
|
||||
AllocatorStats stats_ TF_GUARDED_BY(mu_);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
|
||||
};
|
||||
|
@ -114,7 +114,7 @@ class TestCollectiveExecutorMgr : public CollectiveExecutorMgrInterface {
|
||||
void RetireStepId(int64 graph_key, int64 step_id) override {}
|
||||
|
||||
mutex mu_;
|
||||
gtl::FlatMap<int64, CollectiveExecutor*> table_ GUARDED_BY(mu_);
|
||||
gtl::FlatMap<int64, CollectiveExecutor*> table_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -61,7 +61,7 @@ class DebugCallbackRegistry {
|
||||
mutex mu_;
|
||||
|
||||
// Maps debug_url keys to callbacks for routing observed tensors.
|
||||
std::map<string, EventCallback> keyed_callback_ GUARDED_BY(mu_);
|
||||
std::map<string, EventCallback> keyed_callback_ TF_GUARDED_BY(mu_);
|
||||
|
||||
static DebugCallbackRegistry* instance_;
|
||||
};
|
||||
|
@ -192,8 +192,8 @@ TEST_F(GrpcDebugTest, SendMultipleDebugTensorsSynchronizedViaGrpcTest) {
|
||||
|
||||
mutex mu;
|
||||
Notification all_done;
|
||||
int tensor_count GUARDED_BY(mu) = 0;
|
||||
std::vector<Status> statuses GUARDED_BY(mu);
|
||||
int tensor_count TF_GUARDED_BY(mu) = 0;
|
||||
std::vector<Status> statuses TF_GUARDED_BY(mu);
|
||||
|
||||
const std::vector<string> urls({server_data_.url});
|
||||
|
||||
|
@ -60,9 +60,9 @@ class TestEventListenerImpl final : public EventListener::Service {
|
||||
std::atomic_bool stop_requested_;
|
||||
std::atomic_bool stopped_;
|
||||
|
||||
std::vector<DebugNodeKey> debug_node_keys_ GUARDED_BY(states_mu_);
|
||||
std::vector<DebugNodeKey> debug_node_keys_ TF_GUARDED_BY(states_mu_);
|
||||
std::vector<EventReply::DebugOpStateChange::State> new_states_
|
||||
GUARDED_BY(states_mu_);
|
||||
TF_GUARDED_BY(states_mu_);
|
||||
|
||||
std::unordered_set<DebugNodeKey> write_enabled_debug_node_keys_;
|
||||
|
||||
|
@ -357,11 +357,11 @@ TEST_F(DebugIOUtilsTest, PublishTensorConcurrentlyToPartiallyOverlappingPaths) {
|
||||
const string dump_root_base = testing::TmpDir();
|
||||
|
||||
mutex mu;
|
||||
std::vector<string> dump_roots GUARDED_BY(mu);
|
||||
std::vector<string> dump_file_paths GUARDED_BY(mu);
|
||||
std::vector<string> dump_roots TF_GUARDED_BY(mu);
|
||||
std::vector<string> dump_file_paths TF_GUARDED_BY(mu);
|
||||
|
||||
int dump_count GUARDED_BY(mu) = 0;
|
||||
int done_count GUARDED_BY(mu) = 0;
|
||||
int dump_count TF_GUARDED_BY(mu) = 0;
|
||||
int done_count TF_GUARDED_BY(mu) = 0;
|
||||
Notification all_done;
|
||||
|
||||
auto fn = [this, &dump_count, &done_count, &mu, &dump_root_base, &dump_roots,
|
||||
|
@ -103,7 +103,7 @@ class BaseRendezvousMgr : public RendezvousMgrInterface {
|
||||
const WorkerEnv* const worker_env_;
|
||||
|
||||
mutex mu_;
|
||||
Table table_ GUARDED_BY(mu_);
|
||||
Table table_ TF_GUARDED_BY(mu_);
|
||||
|
||||
BaseRemoteRendezvous* FindOrCreate(int64 step_id);
|
||||
|
||||
@ -182,9 +182,9 @@ class BaseRemoteRendezvous : public RemoteRendezvous {
|
||||
mutable mutex init_mu_;
|
||||
|
||||
// Status given by StartAbort() if any.
|
||||
Status status_ GUARDED_BY(init_mu_);
|
||||
Status status_ TF_GUARDED_BY(init_mu_);
|
||||
|
||||
WorkerSession* session_ GUARDED_BY(init_mu_); // Not owned.
|
||||
WorkerSession* session_ TF_GUARDED_BY(init_mu_); // Not owned.
|
||||
|
||||
// Data structures to handle calls when partially initialized.
|
||||
struct DeferredCall {
|
||||
@ -193,16 +193,16 @@ class BaseRemoteRendezvous : public RemoteRendezvous {
|
||||
|
||||
DeferredCall(const ParsedKey& parsed, DoneCallback done);
|
||||
};
|
||||
std::vector<DeferredCall> deferred_calls_ GUARDED_BY(init_mu_);
|
||||
std::vector<DeferredCall> deferred_calls_ TF_GUARDED_BY(init_mu_);
|
||||
|
||||
typedef std::function<void()> InactiveCallback;
|
||||
|
||||
// Active outstanding RecvTensor calls.
|
||||
mutex active_mu_;
|
||||
std::unordered_map<BaseRecvTensorCall*, InactiveCallback> active_
|
||||
GUARDED_BY(active_mu_);
|
||||
TF_GUARDED_BY(active_mu_);
|
||||
|
||||
bool is_initialized_locked() SHARED_LOCKS_REQUIRED(init_mu_) {
|
||||
bool is_initialized_locked() TF_SHARED_LOCKS_REQUIRED(init_mu_) {
|
||||
return session_ != nullptr;
|
||||
}
|
||||
|
||||
|
@ -68,10 +68,10 @@ class CallOptions {
|
||||
|
||||
private:
|
||||
mutex mu_;
|
||||
CancelFunction cancel_func_ GUARDED_BY(mu_);
|
||||
CancelFunction cancel_func_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// RPC operation timeout in milliseconds.
|
||||
int64 timeout_in_ms_ GUARDED_BY(mu_) = 0;
|
||||
int64 timeout_in_ms_ TF_GUARDED_BY(mu_) = 0;
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(CallOptions);
|
||||
};
|
||||
|
@ -82,7 +82,7 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime {
|
||||
recv_keys(recv_keys) {}
|
||||
};
|
||||
|
||||
std::vector<FunctionData> function_data_ GUARDED_BY(mu_);
|
||||
std::vector<FunctionData> function_data_ TF_GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -48,11 +48,11 @@ class CollectiveParamResolverDistributed : public CollectiveParamResolverLocal {
|
||||
protected:
|
||||
// Returns true iff there's an entry for this group_key in the
|
||||
// local group_table_.
|
||||
bool GroupIsCached(int32 group_key) LOCKS_EXCLUDED(group_mu_);
|
||||
bool GroupIsCached(int32 group_key) TF_LOCKS_EXCLUDED(group_mu_);
|
||||
|
||||
// Updates group_table_ with contents of resp.
|
||||
Status UpdateGroupCache(const CompleteGroupResponse& resp)
|
||||
LOCKS_EXCLUDED(group_mu_);
|
||||
TF_LOCKS_EXCLUDED(group_mu_);
|
||||
|
||||
// Finds the GroupRec that corresponds to cp->group_key and also
|
||||
// populates cp->group from that GroupRec.
|
||||
@ -65,13 +65,13 @@ class CollectiveParamResolverDistributed : public CollectiveParamResolverLocal {
|
||||
|
||||
// Returns true iff there's an entry for this instance_key in the
|
||||
// local instance_table_.
|
||||
bool InstanceIsCached(int32 instance_key) LOCKS_EXCLUDED(instance_mu_);
|
||||
bool InstanceIsCached(int32 instance_key) TF_LOCKS_EXCLUDED(instance_mu_);
|
||||
|
||||
// Updates instance_table_ with contents of resp.
|
||||
void UpdateInstanceCache(const GroupRec* gr, CollectiveParams* cp,
|
||||
const CompleteInstanceResponse& resp,
|
||||
const StatusCallback& done)
|
||||
LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
TF_LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
|
||||
// Finish populating *cp. Semantics are like those of
|
||||
// CompleteInstanceLocal but will make a remote call to the group
|
||||
@ -80,7 +80,7 @@ class CollectiveParamResolverDistributed : public CollectiveParamResolverLocal {
|
||||
CollectiveParams* cp,
|
||||
CancellationManager* cancel_mgr,
|
||||
const StatusCallback& done)
|
||||
LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
TF_LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_);
|
||||
|
||||
WorkerCacheInterface* worker_cache_; // Not owned
|
||||
const string group_leader_;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user