profiler::TraceMe - Improve EventQueue documentation

PiperOrigin-RevId: 247496054
This commit is contained in:
A. Unique TensorFlower 2019-05-09 14:24:22 -07:00 committed by TensorFlower Gardener
parent ecd61b9ee8
commit 94751d7ba3

View File

@ -14,21 +14,6 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/internal/traceme_recorder.h"
// To avoid unnecessary synchronization between threads, each thread has a
// ThreadLocalRecorder that independently records its events.
//
// Events are stored in an EventQueue implemented as a linked-list of blocks,
// with start and end pointers:
// [ events........ | next-]--> [ events......... | next ]
// ^start_block ^start ^end_block ^end
//
// Record() writes at end, and then advances it, allocating a block if needed.
// Clear() takes ownership of events in the range [start, end).
// The end pointer is atomic so these can be concurrent.
//
// If a thread dies, the ThreadLocalRecorder's destructor hands its data off to
// the orphaned_events list.
#include <cstddef>
#include "tensorflow/core/platform/env.h"
@ -48,17 +33,27 @@ namespace {
// A single-producer single-consumer queue of Events.
//
// Push and Consume are lock free and each might be called from at most one
// thread. Push is only be called by the owner thread. Consume is called by the
// owner thread when it shuts down, or by the tracing control thread.
// Thus, Consume might race with Push, so Consume only removes events that were
// in the queue when it was invoked. If Push is called while Consume is active,
// the new event remains in the queue. Thus, the tracing control thread should
// call Consume when tracing stops to remove events created during tracing, but
// also when tracing starts again to clear any remaining events.
// Implemented as a linked-list of blocks containing numbered slots, with start
// and end pointers:
//
// Internally, we have a linked list of blocks containing numbered slots.
// start is the first occupied slot, end is the first unoccupied slot.
// [ events........ | next-]--> [ events......... | next ]
// ^start_block_ ^start_ ^end_block_ ^end_
//
// start_ is the first occupied slot, end_ is the first unoccupied slot.
//
// Push writes at end_, and then advances it, allocating a block if needed.
// PopAll takes ownership of events in the range [start_, end_).
// The end_ pointer is atomic so Push and PopAll can be concurrent.
//
// Push and PopAll are lock free and each might be called from at most one
// thread. Push is only called by the owner thread. PopAll is called by the
// owner thread when it shuts down, or by the tracing control thread.
//
// Thus, PopAll might race with Push, so PopAll only removes events that were
// in the queue when it was invoked. If Push is called while PopAll is active,
// the new event remains in the queue. Thus, the tracing control thread should
// call PopAll when tracing stops to remove events created during tracing, but
// also when tracing starts again to clear any remaining events.
class EventQueue {
public:
EventQueue()
@ -67,13 +62,13 @@ class EventQueue {
end_block_(start_block_),
end_(start_) {}
// REQUIRES: Consume() was called since the last Push().
// REQUIRES: PopAll() was called since the last Push().
// Memory should be deallocated and trace events destroyed on destruction.
// This doesn't require global lock as this discards all the stored trace
// events and we assume of destruction of this class only after the last
// events and we assume of destruction of this instance only after the last
// Push() has been called.
~EventQueue() {
DCHECK_EQ(start_, end_.load()) << "EventQueue destroyed without Consume()";
DCHECK(Empty()) << "EventQueue destroyed without PopAll()";
delete end_block_;
}
@ -91,25 +86,32 @@ class EventQueue {
}
// Retrieve and remove all events in the queue at the time of invocation.
// If Push is called while Consume is active, the new event will not be
// If Push is called while PopAll is active, the new event will not be
// removed from the queue.
std::vector<TraceMeRecorder::Event> Consume() {
std::vector<TraceMeRecorder::Event> PopAll() {
// Read index before contents.
size_t end = end_.load(std::memory_order_acquire);
std::vector<TraceMeRecorder::Event> result;
result.reserve(end - start_);
while (start_ != end) {
Shift(&result);
result.emplace_back(Pop());
}
return result;
}
private:
// Shift one event off the front of the queue into *out.
void Shift(std::vector<TraceMeRecorder::Event>* out) {
// Returns true if the queue is empty at the time of invocation.
bool Empty() const {
return (start_ == end_.load(std::memory_order_acquire));
}
// Remove one event off the front of the queue and return it.
// REQUIRES: The queue must not be empty.
TraceMeRecorder::Event Pop() {
DCHECK(!Empty());
// Move the next event into the output.
auto& event = start_block_->events[start_++ - start_block_->start].event;
out->push_back(std::move(event));
TraceMeRecorder::Event out = std::move(event);
event.~Event(); // Events must be individually destroyed.
// If we reach the end of a block, we own it and should delete it.
// The next block is present: end always points to something.
@ -117,10 +119,11 @@ class EventQueue {
auto* next_block = start_block_->next;
delete start_block_;
start_block_ = next_block;
DCHECK_EQ(start_, start_block_->start);
}
return out;
}
// The number of slots in a block. Chosen so that the block fits in 64k.
struct Block {
// The number of slots in a block is chosen so the block fits in 64 KiB.
static constexpr size_t kSize = 1 << 16;
@ -151,6 +154,8 @@ class EventQueue {
} // namespace
// To avoid unnecessary synchronization between threads, each thread has a
// ThreadLocalRecorder that independently records its events.
class TraceMeRecorder::ThreadLocalRecorder {
public:
// The recorder is created the first time TraceMeRecorder::Record() is called
@ -170,7 +175,7 @@ class TraceMeRecorder::ThreadLocalRecorder {
// Clear is called from the control thread when tracing starts/stops, or from
// the owner thread when it shuts down (see destructor).
TraceMeRecorder::ThreadEvents Clear() { return {info_, queue_.Consume()}; }
TraceMeRecorder::ThreadEvents Clear() { return {info_, queue_.PopAll()}; }
private:
TraceMeRecorder::ThreadInfo info_;