[tfdbg2] Add tfdbg_run_id to metadata of data dumps
- A data dump file set generated by tfdbg2 can contain multiple subsets when there are multiple hosts involved in the instrumented TensorFlow job (e.g., TPUs and Parameter Servers). Currently, there is no bit in those subset of files that indicates they belong to the same instrumented TF job. - This CL addresses this problem by adding a field to the metadata proto used by those files (`tfdbg_run_id`) - The DebugEventsWriter code is revised, so that this new field is written to the metadata file of the file set on the writer's construction. - Also in this CL: remove the previous 1-arg `GetDebugEventsWriter(dump_root)` that creates the writer object if it doesn't exist at the specified dump_root. Replace it with `LookUpDebugEventsWriter(dump_root)` that only looks up the writer object and returns a non-OK status if such an object hasn't been created at `dump_root`. This makes the code less error prone by keeping only the fully-explicit, 3-arg `GetDebugEventsWriter()`. PiperOrigin-RevId: 316537044 Change-Id: Id5be0b771fbf37c0fc796f1514ed858a0e6d38f0
This commit is contained in:
parent
4381963d2d
commit
a8950d70bf
|
@ -410,7 +410,8 @@ class DebugIdentityV2Op : public OpKernel {
|
|||
: OpKernel(context),
|
||||
device_name_(context->device()->name()),
|
||||
output_slot_(-1),
|
||||
tensor_debug_mode_(0) {
|
||||
tensor_debug_mode_(0),
|
||||
tfdbg_run_id_() {
|
||||
std::vector<string> debug_urls;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("debug_urls", &debug_urls));
|
||||
for (const string& debug_url : debug_urls) {
|
||||
|
@ -435,14 +436,17 @@ class DebugIdentityV2Op : public OpKernel {
|
|||
circular_buffer_size_ =
|
||||
tfdbg::DebugEventsWriter::kDefaultCyclicBufferSize;
|
||||
}
|
||||
if (context->HasAttr("tfdbg_run_id")) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("tfdbg_run_id", &tfdbg_run_id_));
|
||||
}
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
const Tensor& tensor = context->input(0);
|
||||
for (const string& dump_root : dump_roots_) {
|
||||
tfdbg::DebugEventsWriter* debug_events_writer =
|
||||
tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root,
|
||||
circular_buffer_size_);
|
||||
tfdbg::DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root, tfdbg_run_id_, circular_buffer_size_);
|
||||
OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace(
|
||||
tfdbg_context_id_, device_name_, op_name_,
|
||||
output_slot_, tensor_debug_mode_, tensor));
|
||||
|
@ -458,6 +462,7 @@ class DebugIdentityV2Op : public OpKernel {
|
|||
int32 output_slot_;
|
||||
int32 tensor_debug_mode_;
|
||||
int64 circular_buffer_size_;
|
||||
string tfdbg_run_id_;
|
||||
};
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
|
|
@ -91,6 +91,7 @@ REGISTER_OP("DebugIdentityV2")
|
|||
.Attr("tensor_debug_mode: int = -1")
|
||||
.Attr("debug_urls: list(string) = []")
|
||||
.Attr("circular_buffer_size: int = 1000")
|
||||
.Attr("tfdbg_run_id: string = ''")
|
||||
.SetIsStateful()
|
||||
.SetShapeFn(shape_inference::UnchangedShape);
|
||||
|
||||
|
|
|
@ -115,6 +115,12 @@ message DebugMetadata {
|
|||
// Version of the DebugEvent file format.
|
||||
// Has a format of "debug.Event:<number>", e.g., "debug.Event:1".
|
||||
string file_version = 2;
|
||||
|
||||
// A unique ID for the current run of tfdbg.
|
||||
// A run of tfdbg is defined as a TensorFlow job instrumented by tfdbg.
|
||||
// Multiple hosts in a distributed TensorFlow job instrumented by tfdbg
|
||||
// have the same ID.
|
||||
string tfdbg_run_id = 3;
|
||||
}
|
||||
|
||||
// Content of a source file involved in the execution of the debugged TensorFlow
|
||||
|
|
|
@ -122,23 +122,31 @@ DebugEventsWriter::~DebugEventsWriter() { Close().IgnoreError(); }
|
|||
|
||||
// static
|
||||
DebugEventsWriter* DebugEventsWriter::GetDebugEventsWriter(
|
||||
const string& dump_root, int64 circular_buffer_size) {
|
||||
const string& dump_root, const string& tfdbg_run_id,
|
||||
int64 circular_buffer_size) {
|
||||
mutex_lock l(DebugEventsWriter::factory_mu_);
|
||||
std::unordered_map<string, std::unique_ptr<DebugEventsWriter>>* writer_pool =
|
||||
DebugEventsWriter::GetDebugEventsWriterMap();
|
||||
if (writer_pool->find(dump_root) == writer_pool->end()) {
|
||||
std::unique_ptr<DebugEventsWriter> writer(
|
||||
new DebugEventsWriter(dump_root, circular_buffer_size));
|
||||
new DebugEventsWriter(dump_root, tfdbg_run_id, circular_buffer_size));
|
||||
writer_pool->insert(std::make_pair(dump_root, std::move(writer)));
|
||||
}
|
||||
return (*writer_pool)[dump_root].get();
|
||||
}
|
||||
|
||||
// static
|
||||
DebugEventsWriter* DebugEventsWriter::GetDebugEventsWriter(
|
||||
const string& dump_root) {
|
||||
return DebugEventsWriter::GetDebugEventsWriter(dump_root,
|
||||
kDefaultCyclicBufferSize);
|
||||
Status DebugEventsWriter::LookUpDebugEventsWriter(
|
||||
const string& dump_root, DebugEventsWriter** debug_events_writer) {
|
||||
mutex_lock l(DebugEventsWriter::factory_mu_);
|
||||
std::unordered_map<string, std::unique_ptr<DebugEventsWriter>>* writer_pool =
|
||||
DebugEventsWriter::GetDebugEventsWriterMap();
|
||||
if (writer_pool->find(dump_root) == writer_pool->end()) {
|
||||
return errors::FailedPrecondition(
|
||||
"No DebugEventsWriter has been created at dump root ", dump_root);
|
||||
}
|
||||
*debug_events_writer = (*writer_pool)[dump_root].get();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status DebugEventsWriter::Init() {
|
||||
|
@ -179,6 +187,7 @@ Status DebugEventsWriter::Init() {
|
|||
metadata->set_tensorflow_version(TF_VERSION_STRING);
|
||||
metadata->set_file_version(
|
||||
strings::Printf("%s%d", kVersionPrefix, kCurrentFormatVersion));
|
||||
metadata->set_tfdbg_run_id(tfdbg_run_id_);
|
||||
TF_RETURN_IF_ERROR(SerializeAndWriteDebugEvent(&debug_event, METADATA));
|
||||
TF_RETURN_WITH_CONTEXT_IF_ERROR(
|
||||
metadata_writer_->Flush(), "Failed to flush debug event metadata writer");
|
||||
|
@ -457,9 +466,11 @@ DebugEventsWriter::GetDebugEventsWriterMap() {
|
|||
}
|
||||
|
||||
DebugEventsWriter::DebugEventsWriter(const string& dump_root,
|
||||
const string& tfdbg_run_id,
|
||||
int64 circular_buffer_size)
|
||||
: env_(Env::Default()),
|
||||
dump_root_(dump_root),
|
||||
tfdbg_run_id_(tfdbg_run_id),
|
||||
is_initialized_(false),
|
||||
initialization_mu_(),
|
||||
circular_buffer_size_(circular_buffer_size),
|
||||
|
|
|
@ -93,18 +93,27 @@ class DebugEventsWriter {
|
|||
// sets of six. The singleton pattern avoids storing multiple sets in a single
|
||||
// folder, which might cause confusion.
|
||||
//
|
||||
// If an instance of DebugEventsWriter has already been created at a
|
||||
// `dump_root`, calling this method with the same `dump_root` will return
|
||||
// the existing instance.
|
||||
//
|
||||
// Args:
|
||||
// dump_root: Dump root directory. If it doesn't exist, will be created.
|
||||
// tfdbg_run_id: Debugging run ID of the writer.
|
||||
// circular_buffer_size: Circular buffer size (in number of DebugEvent
|
||||
// protos). If set to a value <=0, will abolish the circular-buffer
|
||||
// behavior.
|
||||
// Returns:
|
||||
// A pointer to a DebugEventsWriter object: a per-dump_root singleton.
|
||||
static DebugEventsWriter* GetDebugEventsWriter(const string& dump_root,
|
||||
const string& tfdbg_run_id,
|
||||
int64 circular_buffer_size);
|
||||
// Same as the 2-arg factory method above, but uses the default circular
|
||||
// buffer size.
|
||||
static DebugEventsWriter* GetDebugEventsWriter(const string& dump_root);
|
||||
// Look up existing events writer by dump_root.
|
||||
// If no DebugEventsWriter has been created at the dump_root, a non-OK
|
||||
// Status will be returned. Else an OK status will be returned, with
|
||||
// the pointer to the existing instance provided by reference.
|
||||
static Status LookUpDebugEventsWriter(
|
||||
const string& dump_root, DebugEventsWriter** debug_events_writer);
|
||||
~DebugEventsWriter();
|
||||
|
||||
// Sets the debug event filenames and opens file for writing.
|
||||
|
@ -116,8 +125,8 @@ class DebugEventsWriter {
|
|||
// deleted by another process), this will open a new file.
|
||||
Status Init();
|
||||
|
||||
// The four DebugEvent fields below are written _without_ the circular buffer.
|
||||
// Source file contents are written to the *.source_files file.
|
||||
// The four DebugEvent fields below are written _without_ the circular
|
||||
// buffer. Source file contents are written to the *.source_files file.
|
||||
// Takes ownership of source_file.
|
||||
Status WriteSourceFile(SourceFile* source_file);
|
||||
// Stack frames are written to the *.code_locations file.
|
||||
|
@ -132,9 +141,8 @@ class DebugEventsWriter {
|
|||
|
||||
// The two DebugEvent fields below are written to the circular buffer
|
||||
// and saved to disk only at the FlushExecutionFiles() call.
|
||||
// Execution events (eager execution of an op or a tf.function) are written to
|
||||
// the *.execution file.
|
||||
// Takes ownership of execution.
|
||||
// Execution events (eager execution of an op or a tf.function) are written
|
||||
// to the *.execution file. Takes ownership of execution.
|
||||
Status WriteExecution(Execution* execution);
|
||||
// Graph execution traces (graph-internal tensor values or their summaries)
|
||||
// are written to the *.graph_execution_traces file.
|
||||
|
@ -151,8 +159,9 @@ class DebugEventsWriter {
|
|||
// which the trace concerns multiple tensors, this is an empty string.
|
||||
// output_slot: Output slot index of the op that this trace is concerned
|
||||
// with.
|
||||
// tensor_debug_mode: An integer that represents the tensor-debug mode enum.
|
||||
// tensor_value: The value of the tensor that describes the tensor(s)
|
||||
// tensor_debug_mode: An integer that represents the tensor-debug mode
|
||||
// enum. tensor_value: The value of the tensor that describes the
|
||||
// tensor(s)
|
||||
// that this trace is concerned with. The semantics of this tensor value
|
||||
// depends on the value of `tensor_debug_mode`.
|
||||
Status WriteGraphExecutionTrace(const string& tfdbg_context_id,
|
||||
|
@ -208,7 +217,8 @@ class DebugEventsWriter {
|
|||
// Guards calls to the GetDebugEventsWriter() method.
|
||||
static mutex factory_mu_;
|
||||
|
||||
DebugEventsWriter(const string& dump_root, int64 circular_buffer_size);
|
||||
DebugEventsWriter(const string& dump_root, const string& tfdbg_run_id,
|
||||
int64 circular_buffer_size);
|
||||
|
||||
// Get the path prefix. The same for all files, which differ only in the
|
||||
// suffix.
|
||||
|
@ -227,6 +237,7 @@ class DebugEventsWriter {
|
|||
|
||||
Env* env_;
|
||||
const string dump_root_;
|
||||
const string tfdbg_run_id_;
|
||||
|
||||
string file_prefix_;
|
||||
bool is_initialized_ TF_GUARDED_BY(initialization_mu_);
|
||||
|
|
|
@ -71,6 +71,7 @@ class DebugEventsWriterTest : public ::testing::Test {
|
|||
dump_root_ = io::JoinPath(
|
||||
testing::TmpDir(),
|
||||
strings::Printf("%010lld", static_cast<long long>(env()->NowMicros())));
|
||||
tfdbg_run_id_ = "test_tfdbg_run_id";
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
|
@ -85,14 +86,15 @@ class DebugEventsWriterTest : public ::testing::Test {
|
|||
}
|
||||
|
||||
string dump_root_;
|
||||
string tfdbg_run_id_;
|
||||
};
|
||||
|
||||
TEST_F(DebugEventsWriterTest, GetDebugEventsWriterSameRootGivesSameObject) {
|
||||
// Test the per-dump_root_ singleton pattern.
|
||||
DebugEventsWriter* writer_1 =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer_2 =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer_1 = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
DebugEventsWriter* writer_2 = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
EXPECT_EQ(writer_1, writer_2);
|
||||
}
|
||||
|
||||
|
@ -103,8 +105,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentGetDebugEventsWriterSameDumpRoot) {
|
|||
std::vector<DebugEventsWriter*> writers;
|
||||
mutex mu;
|
||||
auto fn = [this, &writers, &mu]() {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
{
|
||||
mutex_lock l(mu);
|
||||
writers.push_back(writer);
|
||||
|
@ -131,8 +133,9 @@ TEST_F(DebugEventsWriterTest, ConcurrentGetDebugEventsWriterDiffDumpRoots) {
|
|||
auto fn = [this, &counter, &writers, &mu]() {
|
||||
const string new_dump_root =
|
||||
io::JoinPath(dump_root_, strings::Printf("%ld", counter.fetch_add(1)));
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(new_dump_root);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
new_dump_root, tfdbg_run_id_,
|
||||
DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
{
|
||||
mutex_lock l(mu);
|
||||
writers.push_back(writer);
|
||||
|
@ -151,17 +154,17 @@ TEST_F(DebugEventsWriterTest, ConcurrentGetDebugEventsWriterDiffDumpRoots) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, GetDebugEventsWriterDifferentRoots) {
|
||||
// Test the DebugEventsWriters for different directories are different.
|
||||
DebugEventsWriter* writer_1 =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer_1 = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
const string dump_root_2 = io::JoinPath(dump_root_, "subdirectory");
|
||||
DebugEventsWriter* writer_2 =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_2);
|
||||
DebugEventsWriter* writer_2 = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_2, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
EXPECT_NE(writer_1, writer_2);
|
||||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, GetAndInitDebugEventsWriter) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
TF_ASSERT_OK(writer->Close());
|
||||
|
||||
|
@ -174,6 +177,8 @@ TEST_F(DebugEventsWriterTest, GetAndInitDebugEventsWriter) {
|
|||
const string file_version = actuals[0].debug_metadata().file_version();
|
||||
EXPECT_EQ(file_version.find(DebugEventsWriter::kVersionPrefix), 0);
|
||||
EXPECT_GT(file_version.size(), strlen(DebugEventsWriter::kVersionPrefix));
|
||||
// Check the tfdbg run ID.
|
||||
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||
|
||||
// Verify that the .source_files file has been created and is empty.
|
||||
ReadDebugEventProtos(writer, DebugEventFileType::SOURCE_FILES, &actuals);
|
||||
|
@ -182,22 +187,22 @@ TEST_F(DebugEventsWriterTest, GetAndInitDebugEventsWriter) {
|
|||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, CallingCloseWithoutInitIsOkay) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Close());
|
||||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, CallingCloseTwiceIsOkay) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Close());
|
||||
TF_ASSERT_OK(writer->Close());
|
||||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, ConcurrentInitCalls) {
|
||||
// Test that concurrent calls to Init() works correctly.
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
|
||||
thread::ThreadPool* thread_pool =
|
||||
new thread::ThreadPool(Env::Default(), "test_pool", 4);
|
||||
|
@ -218,6 +223,7 @@ TEST_F(DebugEventsWriterTest, ConcurrentInitCalls) {
|
|||
const string file_version = actuals[0].debug_metadata().file_version();
|
||||
EXPECT_EQ(file_version.find(DebugEventsWriter::kVersionPrefix), 0);
|
||||
EXPECT_GT(file_version.size(), strlen(DebugEventsWriter::kVersionPrefix));
|
||||
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||
|
||||
// Verify that the .source_files file has been created and is empty.
|
||||
ReadDebugEventProtos(writer, DebugEventFileType::SOURCE_FILES, &actuals);
|
||||
|
@ -227,14 +233,15 @@ TEST_F(DebugEventsWriterTest, ConcurrentInitCalls) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, InitTwiceDoesNotCreateNewMetadataFile) {
|
||||
// Test that Init() is idempotent.
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
std::vector<DebugEvent> actuals;
|
||||
ReadDebugEventProtos(writer, DebugEventFileType::METADATA, &actuals);
|
||||
EXPECT_EQ(actuals.size(), 1);
|
||||
EXPECT_GT(actuals[0].debug_metadata().tensorflow_version().length(), 0);
|
||||
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||
EXPECT_GE(actuals[0].debug_metadata().file_version().size(), 0);
|
||||
|
||||
string metadata_path_1 =
|
||||
|
@ -248,12 +255,13 @@ TEST_F(DebugEventsWriterTest, InitTwiceDoesNotCreateNewMetadataFile) {
|
|||
ReadDebugEventProtos(writer, DebugEventFileType::METADATA, &actuals);
|
||||
EXPECT_EQ(actuals.size(), 1);
|
||||
EXPECT_GT(actuals[0].debug_metadata().tensorflow_version().length(), 0);
|
||||
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||
EXPECT_GE(actuals[0].debug_metadata().file_version().size(), 0);
|
||||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, WriteSourceFile) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
SourceFile* source_file_1 = new SourceFile();
|
||||
|
@ -313,8 +321,8 @@ TEST_F(DebugEventsWriterTest, WriteSourceFile) {
|
|||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, WriteStackFramesFile) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
StackFrameWithId* stack_frame_1 = new StackFrameWithId();
|
||||
|
@ -375,8 +383,8 @@ TEST_F(DebugEventsWriterTest, WriteStackFramesFile) {
|
|||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, WriteGraphOpCreationAndDebuggedGraph) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
GraphOpCreation* graph_op_creation = new GraphOpCreation();
|
||||
|
@ -415,8 +423,8 @@ TEST_F(DebugEventsWriterTest, WriteGraphOpCreationAndDebuggedGraph) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheSameFile) {
|
||||
const size_t kConcurrentWrites = 100;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
thread::ThreadPool* thread_pool =
|
||||
|
@ -456,8 +464,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheSameFile) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, ConcurrentWriteAndFlushCallsToTheSameFile) {
|
||||
const size_t kConcurrentWrites = 100;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
thread::ThreadPool* thread_pool =
|
||||
|
@ -498,8 +506,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteAndFlushCallsToTheSameFile) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheDifferentFiles) {
|
||||
const int32 kConcurrentWrites = 30;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
thread::ThreadPool* thread_pool =
|
||||
|
@ -576,8 +584,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheDifferentFiles) {
|
|||
TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferNoFlush) {
|
||||
// Verify that no writing to disk happens until the flushing method is called.
|
||||
const size_t kCyclicBufferSize = 10;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
// First, try writing and flushing more debug events than the capacity
|
||||
|
@ -601,8 +609,8 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferNoFlush) {
|
|||
TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) {
|
||||
// Verify that writing to disk happens when the flushing method is called.
|
||||
const size_t kCyclicBufferSize = 10;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
// First, try writing and flushing more debug events than the capacity
|
||||
|
@ -673,8 +681,8 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) {
|
|||
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) {
|
||||
// Check no writing to disk happens before the flushing method is called.
|
||||
const size_t kCyclicBufferSize = 10;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
// First, try writing and flushing more debug events than the capacity
|
||||
|
@ -697,8 +705,8 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithoutPreviousInitCall) {
|
||||
const size_t kCyclicBufferSize = -1;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||
// NOTE(cais): `writer->Init()` is not called here before
|
||||
// WriteGraphExecutionTrace() is called. This test checks that this is okay
|
||||
// and the `GraphExecutionTrace` gets written correctly even without `Init()`
|
||||
|
@ -722,8 +730,8 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithoutPreviousInitCall) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
|
||||
const size_t kCyclicBufferSize = 10;
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
// First, try writing and flushing more debug events than the capacity
|
||||
|
@ -788,8 +796,8 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
|
|||
}
|
||||
|
||||
TEST_F(DebugEventsWriterTest, RegisterDeviceAndGetIdTrace) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
// Register and get some device IDs in a concurrent fashion.
|
||||
|
@ -833,8 +841,8 @@ TEST_F(DebugEventsWriterTest, RegisterDeviceAndGetIdTrace) {
|
|||
|
||||
TEST_F(DebugEventsWriterTest, DisableCyclicBufferBehavior) {
|
||||
const size_t kCyclicBufferSize = 0; // A value <= 0 disables cyclic behavior.
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||
TF_ASSERT_OK(writer->Init());
|
||||
|
||||
const size_t kNumEvents = 20;
|
||||
|
|
|
@ -29,9 +29,10 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
|||
using namespace tensorflow::tfdbg; // NOLINT(build/namespaces)
|
||||
|
||||
m.def("Init",
|
||||
[](const std::string& dump_root, const int64 circular_buffer_size) {
|
||||
[](const std::string& dump_root, const std::string& tfdbg_run_id,
|
||||
const int64 circular_buffer_size) {
|
||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||
dump_root, circular_buffer_size);
|
||||
dump_root, tfdbg_run_id, circular_buffer_size);
|
||||
if (!writer->Init().ok()) {
|
||||
throw py::value_error(tensorflow::strings::Printf(
|
||||
"Failed to initialize debug events writer at: %s",
|
||||
|
@ -41,8 +42,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
|||
m.def("WriteSourceFile",
|
||||
[](const std::string& dump_root, const py::object obj) {
|
||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(
|
||||
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->WriteSerializedNonExecutionDebugEvent(
|
||||
obj.attr("SerializeToString")().cast<std::string>(),
|
||||
tfdbg::DebugEventFileType::SOURCE_FILES);
|
||||
|
@ -50,8 +52,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
|||
m.def("WriteStackFrameWithId",
|
||||
[](const std::string& dump_root, const py::object& obj) {
|
||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(
|
||||
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->WriteSerializedNonExecutionDebugEvent(
|
||||
obj.attr("SerializeToString")().cast<std::string>(),
|
||||
tfdbg::DebugEventFileType::STACK_FRAMES);
|
||||
|
@ -59,8 +62,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
|||
m.def("WriteGraphOpCreation",
|
||||
[](const std::string& dump_root, const py::object& obj) {
|
||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(
|
||||
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->WriteSerializedNonExecutionDebugEvent(
|
||||
obj.attr("SerializeToString")().cast<std::string>(),
|
||||
tfdbg::DebugEventFileType::GRAPHS);
|
||||
|
@ -68,8 +72,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
|||
m.def("WriteDebuggedGraph",
|
||||
[](const std::string& dump_root, const py::object& obj) {
|
||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(
|
||||
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->WriteSerializedNonExecutionDebugEvent(
|
||||
obj.attr("SerializeToString")().cast<std::string>(),
|
||||
tfdbg::DebugEventFileType::GRAPHS);
|
||||
|
@ -77,8 +82,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
|||
m.def("WriteExecution",
|
||||
[](const std::string& dump_root, const py::object& obj) {
|
||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(
|
||||
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->WriteSerializedExecutionDebugEvent(
|
||||
obj.attr("SerializeToString")().cast<std::string>(),
|
||||
tfdbg::DebugEventFileType::EXECUTION);
|
||||
|
@ -86,31 +92,32 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
|||
m.def("WriteGraphExecutionTrace",
|
||||
[](const std::string& dump_root, const py::object& obj) {
|
||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(
|
||||
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->WriteSerializedExecutionDebugEvent(
|
||||
obj.attr("SerializeToString")().cast<std::string>(),
|
||||
tfdbg::DebugEventFileType::GRAPH_EXECUTION_TRACES);
|
||||
});
|
||||
m.def("RegisterDeviceAndGetId",
|
||||
[](const std::string& dump_root, const std::string& device_name) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
m.def("RegisterDeviceAndGetId", [](const std::string& dump_root,
|
||||
const std::string& device_name) {
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
return writer->RegisterDeviceAndGetId(device_name);
|
||||
});
|
||||
m.def("FlushNonExecutionFiles", [](const std::string& dump_root) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->FlushNonExecutionFiles();
|
||||
});
|
||||
m.def("FlushExecutionFiles", [](const std::string& dump_root) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->FlushExecutionFiles();
|
||||
});
|
||||
m.def("Close", [](const std::string& dump_root) {
|
||||
DebugEventsWriter* writer =
|
||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
||||
DebugEventsWriter* writer = nullptr;
|
||||
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||
writer->Close();
|
||||
});
|
||||
};
|
||||
|
|
|
@ -863,6 +863,7 @@ class DebugDataReader(object):
|
|||
debug_event = next(metadata_iter).debug_event
|
||||
self._starting_wall_time = debug_event.wall_time
|
||||
self._tensorflow_version = debug_event.debug_metadata.tensorflow_version
|
||||
self._tfdbg_run_id = debug_event.debug_metadata.tfdbg_run_id
|
||||
|
||||
def _load_source_files(self):
|
||||
"""Incrementally read the .source_files DebugEvent file."""
|
||||
|
@ -1071,6 +1072,10 @@ class DebugDataReader(object):
|
|||
"""
|
||||
return self._tensorflow_version
|
||||
|
||||
def tfdbg_run_id(self):
|
||||
"""Get the debugger run ID of the debugged TensorFlow program."""
|
||||
return self._tfdbg_run_id
|
||||
|
||||
def outermost_graphs(self):
|
||||
"""Get the number of outer most graphs read so far."""
|
||||
return [graph for graph in self._graph_by_id.values()
|
||||
|
|
|
@ -32,6 +32,7 @@ class DebugEventsWriter(object):
|
|||
|
||||
def __init__(self,
|
||||
dump_root,
|
||||
tfdbg_run_id,
|
||||
circular_buffer_size=DEFAULT_CIRCULAR_BUFFER_SIZE):
|
||||
"""Construct a DebugEventsWriter object.
|
||||
|
||||
|
@ -43,6 +44,7 @@ class DebugEventsWriter(object):
|
|||
Args:
|
||||
dump_root: The root directory for dumping debug data. If `dump_root` does
|
||||
not exist as a directory, it will be created.
|
||||
tfdbg_run_id: Debugger Run ID.
|
||||
circular_buffer_size: Size of the circular buffer for each of the two
|
||||
execution-related debug events files: with the following suffixes: -
|
||||
.execution - .graph_execution_traces If <= 0, the circular-buffer
|
||||
|
@ -51,7 +53,9 @@ class DebugEventsWriter(object):
|
|||
if not dump_root:
|
||||
raise ValueError("Empty or None dump root")
|
||||
self._dump_root = dump_root
|
||||
_pywrap_debug_events_writer.Init(self._dump_root, circular_buffer_size)
|
||||
self._tfdbg_run_id = tfdbg_run_id
|
||||
_pywrap_debug_events_writer.Init(self._dump_root, self._tfdbg_run_id,
|
||||
circular_buffer_size)
|
||||
|
||||
def WriteSourceFile(self, source_file):
|
||||
"""Write a SourceFile proto with the writer.
|
||||
|
|
|
@ -41,7 +41,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
|
||||
def testMultiThreadedConstructorCallWorks(self):
|
||||
def init_writer():
|
||||
debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id)
|
||||
|
||||
num_threads = 4
|
||||
threads = []
|
||||
|
@ -66,7 +66,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
self._readAndCheckMetadataFile()
|
||||
|
||||
def testWriteSourceFilesAndStackFrames(self):
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id)
|
||||
num_protos = 10
|
||||
for i in range(num_protos):
|
||||
source_file = debug_event_pb2.SourceFile()
|
||||
|
@ -99,7 +100,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
self.assertEqual(actuals[i].file_line_col.file_index, i * 10)
|
||||
|
||||
def testWriteGraphOpCreationAndDebuggedGraphs(self):
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id)
|
||||
num_op_creations = 10
|
||||
for i in range(num_op_creations):
|
||||
graph_op_creation = debug_event_pb2.GraphOpCreation()
|
||||
|
@ -122,7 +124,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
"deadbeaf")
|
||||
|
||||
def testConcurrentWritesToNonExecutionFilesWorks(self):
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id)
|
||||
|
||||
source_file_state = {"counter": 0, "lock": threading.Lock()}
|
||||
|
||||
|
@ -201,15 +204,18 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
|
||||
def testWriteAndReadMetadata(self):
|
||||
t0 = time.time()
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id)
|
||||
writer.Close()
|
||||
with debug_events_reader.DebugDataReader(self.dump_root) as reader:
|
||||
self.assertIsInstance(reader.starting_wall_time(), float)
|
||||
self.assertGreaterEqual(reader.starting_wall_time(), t0)
|
||||
self.assertEqual(reader.tensorflow_version(), versions.__version__)
|
||||
self.assertTrue(reader.tfdbg_run_id())
|
||||
|
||||
def testWriteExecutionEventsWithCircularBuffer(self):
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id)
|
||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||
for i in range(num_execution_events):
|
||||
execution = debug_event_pb2.Execution()
|
||||
|
@ -232,7 +238,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
|
||||
def testWriteExecutionEventsWithoutCircularBufferBehavior(self):
|
||||
# A circular buffer size of 0 abolishes the circular buffer behavior.
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id, 0)
|
||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||
for i in range(num_execution_events):
|
||||
execution = debug_event_pb2.Execution()
|
||||
|
@ -248,7 +255,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
self.assertEqual(execution.op_type, "OpType%d" % i)
|
||||
|
||||
def testWriteGraphExecutionTraceEventsWithCircularBuffer(self):
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id)
|
||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||
for i in range(num_execution_events):
|
||||
trace = debug_event_pb2.GraphExecutionTrace()
|
||||
|
@ -272,7 +280,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
|
||||
def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self):
|
||||
# A circular buffer size of 0 abolishes the circular buffer behavior.
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id, 0)
|
||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||
for i in range(num_execution_events):
|
||||
trace = debug_event_pb2.GraphExecutionTrace()
|
||||
|
@ -290,6 +299,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
def testConcurrentWritesToExecutionFiles(self):
|
||||
circular_buffer_size = 5
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id,
|
||||
circular_buffer_size)
|
||||
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
||||
graph_name="graph1")
|
||||
|
@ -345,7 +355,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
self.assertLen(op_names, len(set(op_names)))
|
||||
|
||||
def testConcurrentSourceFileRandomReads(self):
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id)
|
||||
|
||||
for i in range(100):
|
||||
source_file = debug_event_pb2.SourceFile(
|
||||
|
@ -376,6 +387,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
def testConcurrentExecutionUpdateAndRandomRead(self):
|
||||
circular_buffer_size = -1
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id,
|
||||
circular_buffer_size)
|
||||
|
||||
writer_state = {"counter": 0, "done": False}
|
||||
|
@ -410,6 +422,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
def testConcurrentExecutionRandomReads(self):
|
||||
circular_buffer_size = -1
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id,
|
||||
circular_buffer_size)
|
||||
|
||||
for i in range(100):
|
||||
|
@ -445,6 +458,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
def testConcurrentGraphExecutionTraceUpdateAndRandomRead(self):
|
||||
circular_buffer_size = -1
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id,
|
||||
circular_buffer_size)
|
||||
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
||||
graph_name="graph1")
|
||||
|
@ -487,6 +501,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
def testConcurrentGraphExecutionTraceRandomReads(self):
|
||||
circular_buffer_size = -1
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
self.tfdbg_run_id,
|
||||
circular_buffer_size)
|
||||
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
||||
graph_name="graph1")
|
||||
|
@ -534,7 +549,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
def testRangeReadingExecutions(self, begin, end, expected_begin,
|
||||
expected_end):
|
||||
writer = debug_events_writer.DebugEventsWriter(
|
||||
self.dump_root, circular_buffer_size=-1)
|
||||
self.dump_root, self.tfdbg_run_id, circular_buffer_size=-1)
|
||||
for i in range(5):
|
||||
execution = debug_event_pb2.Execution(op_type="OpType%d" % i)
|
||||
writer.WriteExecution(execution)
|
||||
|
@ -559,7 +574,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
|||
def testRangeReadingGraphExecutionTraces(self, begin, end, expected_begin,
|
||||
expected_end):
|
||||
writer = debug_events_writer.DebugEventsWriter(
|
||||
self.dump_root, circular_buffer_size=-1)
|
||||
self.dump_root, self.tfdbg_run_id, circular_buffer_size=-1)
|
||||
debugged_graph = debug_event_pb2.DebuggedGraph(
|
||||
graph_id="graph1", graph_name="graph1")
|
||||
writer.WriteDebuggedGraph(debugged_graph)
|
||||
|
|
|
@ -52,8 +52,9 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
|
|||
super(DebugIdentityV2OpTest, self).setUp()
|
||||
# Testing using a small circular-buffer size.
|
||||
self.circular_buffer_size = 4
|
||||
self.tfdbg_run_id = "test_tfdbg_run"
|
||||
self.writer = debug_events_writer.DebugEventsWriter(
|
||||
self.dump_root, self.circular_buffer_size)
|
||||
self.dump_root, self.tfdbg_run_id, self.circular_buffer_size)
|
||||
|
||||
def tearDown(self):
|
||||
self.writer.Close()
|
||||
|
@ -192,7 +193,8 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
|
|||
def testTwoDumpRoots(self):
|
||||
another_dump_root = os.path.join(self.dump_root, "another")
|
||||
another_debug_url = "file://%s" % another_dump_root
|
||||
another_writer = debug_events_writer.DebugEventsWriter(another_dump_root)
|
||||
another_writer = debug_events_writer.DebugEventsWriter(
|
||||
another_dump_root, "test_tfdbg_run")
|
||||
|
||||
@def_function.function
|
||||
def write_debug_trace(x):
|
||||
|
@ -264,6 +266,7 @@ class DebugIdentityV2OpUninitializedWriterTest(
|
|||
self.assertAllClose(
|
||||
write_debug_trace(np.array([i]).astype(np.float32)), [i**2.0])
|
||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||
"test_tfdbg_run",
|
||||
circular_buffer_size)
|
||||
writer.FlushNonExecutionFiles()
|
||||
writer.FlushExecutionFiles()
|
||||
|
|
|
@ -69,6 +69,10 @@ def _debug_identity_v2_grad(op, dy):
|
|||
return dy
|
||||
|
||||
|
||||
def _get_tfdbg_run_id():
|
||||
return str(uuid.uuid4())[:8]
|
||||
|
||||
|
||||
def _get_id():
|
||||
"""Get a short unique ID."""
|
||||
return str(uuid.uuid4())
|
||||
|
@ -88,6 +92,7 @@ class _DumpingCallback(object):
|
|||
op_regex,
|
||||
tensor_dtypes):
|
||||
self._dump_root = dump_root
|
||||
self._tfdbg_run_id = _get_tfdbg_run_id()
|
||||
self._tensor_debug_mode = tensor_debug_mode
|
||||
self._circular_buffer_size = circular_buffer_size
|
||||
self._op_regex = op_regex
|
||||
|
@ -148,6 +153,10 @@ class _DumpingCallback(object):
|
|||
self._dump_root = dump_root
|
||||
self._writer = None
|
||||
|
||||
@property
|
||||
def tfdbg_run_id(self):
|
||||
return self._tfdbg_run_id
|
||||
|
||||
@property
|
||||
def tensor_debug_mode(self):
|
||||
return self._tensor_debug_mode
|
||||
|
@ -161,6 +170,7 @@ class _DumpingCallback(object):
|
|||
if not self._writer:
|
||||
self._writer = debug_events_writer.DebugEventsWriter(
|
||||
self._dump_root,
|
||||
self._tfdbg_run_id,
|
||||
circular_buffer_size=self._circular_buffer_size)
|
||||
return self._writer
|
||||
|
||||
|
@ -365,6 +375,8 @@ class _DumpingCallback(object):
|
|||
if tf_compat.forward_compatible(2020, 6, 24):
|
||||
debug_identity_op_kwargs[
|
||||
"circular_buffer_size"] = self._circular_buffer_size
|
||||
if tf_compat.forward_compatible(2020, 7, 1):
|
||||
debug_identity_op_kwargs["tfdbg_run_id"] = self._tfdbg_run_id
|
||||
if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
|
||||
if (not self._should_dump_tensor(op_type, tensor.dtype) or
|
||||
not tensor.dtype.is_numpy_compatible):
|
||||
|
@ -873,7 +885,8 @@ def disable_dump_debug_info():
|
|||
"""
|
||||
if hasattr(_state, "dumping_callback"):
|
||||
dump_root = _state.dumping_callback.dump_root
|
||||
debug_events_writer.DebugEventsWriter(dump_root).Close()
|
||||
tfdbg_run_id = _state.dumping_callback.tfdbg_run_id
|
||||
debug_events_writer.DebugEventsWriter(dump_root, tfdbg_run_id).Close()
|
||||
op_callbacks.remove_op_callback(_state.dumping_callback.callback)
|
||||
function_lib.remove_function_callback(
|
||||
_state.dumping_callback.function_callback)
|
||||
|
|
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
|||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
|
||||
from tensorflow.python.debug.lib import check_numerics_callback
|
||||
from tensorflow.python.debug.lib import debug_events_reader
|
||||
|
@ -35,6 +36,7 @@ class DumpingCallbackTestBase(test_util.TensorFlowTestCase):
|
|||
def setUp(self):
|
||||
super(DumpingCallbackTestBase, self).setUp()
|
||||
self.dump_root = tempfile.mkdtemp()
|
||||
self.tfdbg_run_id = str(uuid.uuid4())
|
||||
|
||||
def tearDown(self):
|
||||
if os.path.isdir(self.dump_root):
|
||||
|
|
|
@ -982,7 +982,7 @@ tf_module {
|
|||
}
|
||||
member_method {
|
||||
name: "DebugIdentityV2"
|
||||
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
|
||||
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'tfdbg_run_id\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "DebugNanCount"
|
||||
|
|
|
@ -982,7 +982,7 @@ tf_module {
|
|||
}
|
||||
member_method {
|
||||
name: "DebugIdentityV2"
|
||||
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
|
||||
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'tfdbg_run_id\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "DebugNanCount"
|
||||
|
|
Loading…
Reference in New Issue