[tfdbg2] Add tfdbg_run_id to metadata of data dumps
- A data dump file set generated by tfdbg2 can contain multiple subsets when there are multiple hosts involved in the instrumented TensorFlow job (e.g., TPUs and Parameter Servers). Currently, there is no bit in those subset of files that indicates they belong to the same instrumented TF job. - This CL addresses this problem by adding a field to the metadata proto used by those files (`tfdbg_run_id`) - The DebugEventsWriter code is revised, so that this new field is written to the metadata file of the file set on the writer's construction. - Also in this CL: remove the previous 1-arg `GetDebugEventsWriter(dump_root)` that creates the writer object if it doesn't exist at the specified dump_root. Replace it with `LookUpDebugEventsWriter(dump_root)` that only looks up the writer object and returns a non-OK status if such an object hasn't been created at `dump_root`. This makes the code less error prone by keeping only the fully-explicit, 3-arg `GetDebugEventsWriter()`. PiperOrigin-RevId: 316537044 Change-Id: Id5be0b771fbf37c0fc796f1514ed858a0e6d38f0
This commit is contained in:
parent
4381963d2d
commit
a8950d70bf
|
@ -410,7 +410,8 @@ class DebugIdentityV2Op : public OpKernel {
|
||||||
: OpKernel(context),
|
: OpKernel(context),
|
||||||
device_name_(context->device()->name()),
|
device_name_(context->device()->name()),
|
||||||
output_slot_(-1),
|
output_slot_(-1),
|
||||||
tensor_debug_mode_(0) {
|
tensor_debug_mode_(0),
|
||||||
|
tfdbg_run_id_() {
|
||||||
std::vector<string> debug_urls;
|
std::vector<string> debug_urls;
|
||||||
OP_REQUIRES_OK(context, context->GetAttr("debug_urls", &debug_urls));
|
OP_REQUIRES_OK(context, context->GetAttr("debug_urls", &debug_urls));
|
||||||
for (const string& debug_url : debug_urls) {
|
for (const string& debug_url : debug_urls) {
|
||||||
|
@ -435,14 +436,17 @@ class DebugIdentityV2Op : public OpKernel {
|
||||||
circular_buffer_size_ =
|
circular_buffer_size_ =
|
||||||
tfdbg::DebugEventsWriter::kDefaultCyclicBufferSize;
|
tfdbg::DebugEventsWriter::kDefaultCyclicBufferSize;
|
||||||
}
|
}
|
||||||
|
if (context->HasAttr("tfdbg_run_id")) {
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("tfdbg_run_id", &tfdbg_run_id_));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compute(OpKernelContext* context) override {
|
void Compute(OpKernelContext* context) override {
|
||||||
const Tensor& tensor = context->input(0);
|
const Tensor& tensor = context->input(0);
|
||||||
for (const string& dump_root : dump_roots_) {
|
for (const string& dump_root : dump_roots_) {
|
||||||
tfdbg::DebugEventsWriter* debug_events_writer =
|
tfdbg::DebugEventsWriter* debug_events_writer =
|
||||||
tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root,
|
tfdbg::DebugEventsWriter::GetDebugEventsWriter(
|
||||||
circular_buffer_size_);
|
dump_root, tfdbg_run_id_, circular_buffer_size_);
|
||||||
OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace(
|
OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace(
|
||||||
tfdbg_context_id_, device_name_, op_name_,
|
tfdbg_context_id_, device_name_, op_name_,
|
||||||
output_slot_, tensor_debug_mode_, tensor));
|
output_slot_, tensor_debug_mode_, tensor));
|
||||||
|
@ -458,6 +462,7 @@ class DebugIdentityV2Op : public OpKernel {
|
||||||
int32 output_slot_;
|
int32 output_slot_;
|
||||||
int32 tensor_debug_mode_;
|
int32 tensor_debug_mode_;
|
||||||
int64 circular_buffer_size_;
|
int64 circular_buffer_size_;
|
||||||
|
string tfdbg_run_id_;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||||
|
|
|
@ -91,6 +91,7 @@ REGISTER_OP("DebugIdentityV2")
|
||||||
.Attr("tensor_debug_mode: int = -1")
|
.Attr("tensor_debug_mode: int = -1")
|
||||||
.Attr("debug_urls: list(string) = []")
|
.Attr("debug_urls: list(string) = []")
|
||||||
.Attr("circular_buffer_size: int = 1000")
|
.Attr("circular_buffer_size: int = 1000")
|
||||||
|
.Attr("tfdbg_run_id: string = ''")
|
||||||
.SetIsStateful()
|
.SetIsStateful()
|
||||||
.SetShapeFn(shape_inference::UnchangedShape);
|
.SetShapeFn(shape_inference::UnchangedShape);
|
||||||
|
|
||||||
|
|
|
@ -115,6 +115,12 @@ message DebugMetadata {
|
||||||
// Version of the DebugEvent file format.
|
// Version of the DebugEvent file format.
|
||||||
// Has a format of "debug.Event:<number>", e.g., "debug.Event:1".
|
// Has a format of "debug.Event:<number>", e.g., "debug.Event:1".
|
||||||
string file_version = 2;
|
string file_version = 2;
|
||||||
|
|
||||||
|
// A unique ID for the current run of tfdbg.
|
||||||
|
// A run of tfdbg is defined as a TensorFlow job instrumented by tfdbg.
|
||||||
|
// Multiple hosts in a distributed TensorFlow job instrumented by tfdbg
|
||||||
|
// have the same ID.
|
||||||
|
string tfdbg_run_id = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Content of a source file involved in the execution of the debugged TensorFlow
|
// Content of a source file involved in the execution of the debugged TensorFlow
|
||||||
|
|
|
@ -122,23 +122,31 @@ DebugEventsWriter::~DebugEventsWriter() { Close().IgnoreError(); }
|
||||||
|
|
||||||
// static
|
// static
|
||||||
DebugEventsWriter* DebugEventsWriter::GetDebugEventsWriter(
|
DebugEventsWriter* DebugEventsWriter::GetDebugEventsWriter(
|
||||||
const string& dump_root, int64 circular_buffer_size) {
|
const string& dump_root, const string& tfdbg_run_id,
|
||||||
|
int64 circular_buffer_size) {
|
||||||
mutex_lock l(DebugEventsWriter::factory_mu_);
|
mutex_lock l(DebugEventsWriter::factory_mu_);
|
||||||
std::unordered_map<string, std::unique_ptr<DebugEventsWriter>>* writer_pool =
|
std::unordered_map<string, std::unique_ptr<DebugEventsWriter>>* writer_pool =
|
||||||
DebugEventsWriter::GetDebugEventsWriterMap();
|
DebugEventsWriter::GetDebugEventsWriterMap();
|
||||||
if (writer_pool->find(dump_root) == writer_pool->end()) {
|
if (writer_pool->find(dump_root) == writer_pool->end()) {
|
||||||
std::unique_ptr<DebugEventsWriter> writer(
|
std::unique_ptr<DebugEventsWriter> writer(
|
||||||
new DebugEventsWriter(dump_root, circular_buffer_size));
|
new DebugEventsWriter(dump_root, tfdbg_run_id, circular_buffer_size));
|
||||||
writer_pool->insert(std::make_pair(dump_root, std::move(writer)));
|
writer_pool->insert(std::make_pair(dump_root, std::move(writer)));
|
||||||
}
|
}
|
||||||
return (*writer_pool)[dump_root].get();
|
return (*writer_pool)[dump_root].get();
|
||||||
}
|
}
|
||||||
|
|
||||||
// static
|
// static
|
||||||
DebugEventsWriter* DebugEventsWriter::GetDebugEventsWriter(
|
Status DebugEventsWriter::LookUpDebugEventsWriter(
|
||||||
const string& dump_root) {
|
const string& dump_root, DebugEventsWriter** debug_events_writer) {
|
||||||
return DebugEventsWriter::GetDebugEventsWriter(dump_root,
|
mutex_lock l(DebugEventsWriter::factory_mu_);
|
||||||
kDefaultCyclicBufferSize);
|
std::unordered_map<string, std::unique_ptr<DebugEventsWriter>>* writer_pool =
|
||||||
|
DebugEventsWriter::GetDebugEventsWriterMap();
|
||||||
|
if (writer_pool->find(dump_root) == writer_pool->end()) {
|
||||||
|
return errors::FailedPrecondition(
|
||||||
|
"No DebugEventsWriter has been created at dump root ", dump_root);
|
||||||
|
}
|
||||||
|
*debug_events_writer = (*writer_pool)[dump_root].get();
|
||||||
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DebugEventsWriter::Init() {
|
Status DebugEventsWriter::Init() {
|
||||||
|
@ -179,6 +187,7 @@ Status DebugEventsWriter::Init() {
|
||||||
metadata->set_tensorflow_version(TF_VERSION_STRING);
|
metadata->set_tensorflow_version(TF_VERSION_STRING);
|
||||||
metadata->set_file_version(
|
metadata->set_file_version(
|
||||||
strings::Printf("%s%d", kVersionPrefix, kCurrentFormatVersion));
|
strings::Printf("%s%d", kVersionPrefix, kCurrentFormatVersion));
|
||||||
|
metadata->set_tfdbg_run_id(tfdbg_run_id_);
|
||||||
TF_RETURN_IF_ERROR(SerializeAndWriteDebugEvent(&debug_event, METADATA));
|
TF_RETURN_IF_ERROR(SerializeAndWriteDebugEvent(&debug_event, METADATA));
|
||||||
TF_RETURN_WITH_CONTEXT_IF_ERROR(
|
TF_RETURN_WITH_CONTEXT_IF_ERROR(
|
||||||
metadata_writer_->Flush(), "Failed to flush debug event metadata writer");
|
metadata_writer_->Flush(), "Failed to flush debug event metadata writer");
|
||||||
|
@ -457,9 +466,11 @@ DebugEventsWriter::GetDebugEventsWriterMap() {
|
||||||
}
|
}
|
||||||
|
|
||||||
DebugEventsWriter::DebugEventsWriter(const string& dump_root,
|
DebugEventsWriter::DebugEventsWriter(const string& dump_root,
|
||||||
|
const string& tfdbg_run_id,
|
||||||
int64 circular_buffer_size)
|
int64 circular_buffer_size)
|
||||||
: env_(Env::Default()),
|
: env_(Env::Default()),
|
||||||
dump_root_(dump_root),
|
dump_root_(dump_root),
|
||||||
|
tfdbg_run_id_(tfdbg_run_id),
|
||||||
is_initialized_(false),
|
is_initialized_(false),
|
||||||
initialization_mu_(),
|
initialization_mu_(),
|
||||||
circular_buffer_size_(circular_buffer_size),
|
circular_buffer_size_(circular_buffer_size),
|
||||||
|
|
|
@ -93,18 +93,27 @@ class DebugEventsWriter {
|
||||||
// sets of six. The singleton pattern avoids storing multiple sets in a single
|
// sets of six. The singleton pattern avoids storing multiple sets in a single
|
||||||
// folder, which might cause confusion.
|
// folder, which might cause confusion.
|
||||||
//
|
//
|
||||||
|
// If an instance of DebugEventsWriter has already been created at a
|
||||||
|
// `dump_root`, calling this method with the same `dump_root` will return
|
||||||
|
// the existing instance.
|
||||||
|
//
|
||||||
// Args:
|
// Args:
|
||||||
// dump_root: Dump root directory. If it doesn't exist, will be created.
|
// dump_root: Dump root directory. If it doesn't exist, will be created.
|
||||||
|
// tfdbg_run_id: Debugging run ID of the writer.
|
||||||
// circular_buffer_size: Circular buffer size (in number of DebugEvent
|
// circular_buffer_size: Circular buffer size (in number of DebugEvent
|
||||||
// protos). If set to a value <=0, will abolish the circular-buffer
|
// protos). If set to a value <=0, will abolish the circular-buffer
|
||||||
// behavior.
|
// behavior.
|
||||||
// Returns:
|
// Returns:
|
||||||
// A pointer to a DebugEventsWriter object: a per-dump_root singleton.
|
// A pointer to a DebugEventsWriter object: a per-dump_root singleton.
|
||||||
static DebugEventsWriter* GetDebugEventsWriter(const string& dump_root,
|
static DebugEventsWriter* GetDebugEventsWriter(const string& dump_root,
|
||||||
|
const string& tfdbg_run_id,
|
||||||
int64 circular_buffer_size);
|
int64 circular_buffer_size);
|
||||||
// Same as the 2-arg factory method above, but uses the default circular
|
// Look up existing events writer by dump_root.
|
||||||
// buffer size.
|
// If no DebugEventsWriter has been created at the dump_root, a non-OK
|
||||||
static DebugEventsWriter* GetDebugEventsWriter(const string& dump_root);
|
// Status will be returned. Else an OK status will be returned, with
|
||||||
|
// the pointer to the existing instance provided by reference.
|
||||||
|
static Status LookUpDebugEventsWriter(
|
||||||
|
const string& dump_root, DebugEventsWriter** debug_events_writer);
|
||||||
~DebugEventsWriter();
|
~DebugEventsWriter();
|
||||||
|
|
||||||
// Sets the debug event filenames and opens file for writing.
|
// Sets the debug event filenames and opens file for writing.
|
||||||
|
@ -116,8 +125,8 @@ class DebugEventsWriter {
|
||||||
// deleted by another process), this will open a new file.
|
// deleted by another process), this will open a new file.
|
||||||
Status Init();
|
Status Init();
|
||||||
|
|
||||||
// The four DebugEvent fields below are written _without_ the circular buffer.
|
// The four DebugEvent fields below are written _without_ the circular
|
||||||
// Source file contents are written to the *.source_files file.
|
// buffer. Source file contents are written to the *.source_files file.
|
||||||
// Takes ownership of source_file.
|
// Takes ownership of source_file.
|
||||||
Status WriteSourceFile(SourceFile* source_file);
|
Status WriteSourceFile(SourceFile* source_file);
|
||||||
// Stack frames are written to the *.code_locations file.
|
// Stack frames are written to the *.code_locations file.
|
||||||
|
@ -132,9 +141,8 @@ class DebugEventsWriter {
|
||||||
|
|
||||||
// The two DebugEvent fields below are written to the circular buffer
|
// The two DebugEvent fields below are written to the circular buffer
|
||||||
// and saved to disk only at the FlushExecutionFiles() call.
|
// and saved to disk only at the FlushExecutionFiles() call.
|
||||||
// Execution events (eager execution of an op or a tf.function) are written to
|
// Execution events (eager execution of an op or a tf.function) are written
|
||||||
// the *.execution file.
|
// to the *.execution file. Takes ownership of execution.
|
||||||
// Takes ownership of execution.
|
|
||||||
Status WriteExecution(Execution* execution);
|
Status WriteExecution(Execution* execution);
|
||||||
// Graph execution traces (graph-internal tensor values or their summaries)
|
// Graph execution traces (graph-internal tensor values or their summaries)
|
||||||
// are written to the *.graph_execution_traces file.
|
// are written to the *.graph_execution_traces file.
|
||||||
|
@ -151,8 +159,9 @@ class DebugEventsWriter {
|
||||||
// which the trace concerns multiple tensors, this is an empty string.
|
// which the trace concerns multiple tensors, this is an empty string.
|
||||||
// output_slot: Output slot index of the op that this trace is concerned
|
// output_slot: Output slot index of the op that this trace is concerned
|
||||||
// with.
|
// with.
|
||||||
// tensor_debug_mode: An integer that represents the tensor-debug mode enum.
|
// tensor_debug_mode: An integer that represents the tensor-debug mode
|
||||||
// tensor_value: The value of the tensor that describes the tensor(s)
|
// enum. tensor_value: The value of the tensor that describes the
|
||||||
|
// tensor(s)
|
||||||
// that this trace is concerned with. The semantics of this tensor value
|
// that this trace is concerned with. The semantics of this tensor value
|
||||||
// depends on the value of `tensor_debug_mode`.
|
// depends on the value of `tensor_debug_mode`.
|
||||||
Status WriteGraphExecutionTrace(const string& tfdbg_context_id,
|
Status WriteGraphExecutionTrace(const string& tfdbg_context_id,
|
||||||
|
@ -208,7 +217,8 @@ class DebugEventsWriter {
|
||||||
// Guards calls to the GetDebugEventsWriter() method.
|
// Guards calls to the GetDebugEventsWriter() method.
|
||||||
static mutex factory_mu_;
|
static mutex factory_mu_;
|
||||||
|
|
||||||
DebugEventsWriter(const string& dump_root, int64 circular_buffer_size);
|
DebugEventsWriter(const string& dump_root, const string& tfdbg_run_id,
|
||||||
|
int64 circular_buffer_size);
|
||||||
|
|
||||||
// Get the path prefix. The same for all files, which differ only in the
|
// Get the path prefix. The same for all files, which differ only in the
|
||||||
// suffix.
|
// suffix.
|
||||||
|
@ -227,6 +237,7 @@ class DebugEventsWriter {
|
||||||
|
|
||||||
Env* env_;
|
Env* env_;
|
||||||
const string dump_root_;
|
const string dump_root_;
|
||||||
|
const string tfdbg_run_id_;
|
||||||
|
|
||||||
string file_prefix_;
|
string file_prefix_;
|
||||||
bool is_initialized_ TF_GUARDED_BY(initialization_mu_);
|
bool is_initialized_ TF_GUARDED_BY(initialization_mu_);
|
||||||
|
|
|
@ -71,6 +71,7 @@ class DebugEventsWriterTest : public ::testing::Test {
|
||||||
dump_root_ = io::JoinPath(
|
dump_root_ = io::JoinPath(
|
||||||
testing::TmpDir(),
|
testing::TmpDir(),
|
||||||
strings::Printf("%010lld", static_cast<long long>(env()->NowMicros())));
|
strings::Printf("%010lld", static_cast<long long>(env()->NowMicros())));
|
||||||
|
tfdbg_run_id_ = "test_tfdbg_run_id";
|
||||||
}
|
}
|
||||||
|
|
||||||
void TearDown() override {
|
void TearDown() override {
|
||||||
|
@ -85,14 +86,15 @@ class DebugEventsWriterTest : public ::testing::Test {
|
||||||
}
|
}
|
||||||
|
|
||||||
string dump_root_;
|
string dump_root_;
|
||||||
|
string tfdbg_run_id_;
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, GetDebugEventsWriterSameRootGivesSameObject) {
|
TEST_F(DebugEventsWriterTest, GetDebugEventsWriterSameRootGivesSameObject) {
|
||||||
// Test the per-dump_root_ singleton pattern.
|
// Test the per-dump_root_ singleton pattern.
|
||||||
DebugEventsWriter* writer_1 =
|
DebugEventsWriter* writer_1 = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
DebugEventsWriter* writer_2 =
|
DebugEventsWriter* writer_2 = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
EXPECT_EQ(writer_1, writer_2);
|
EXPECT_EQ(writer_1, writer_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,8 +105,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentGetDebugEventsWriterSameDumpRoot) {
|
||||||
std::vector<DebugEventsWriter*> writers;
|
std::vector<DebugEventsWriter*> writers;
|
||||||
mutex mu;
|
mutex mu;
|
||||||
auto fn = [this, &writers, &mu]() {
|
auto fn = [this, &writers, &mu]() {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
{
|
{
|
||||||
mutex_lock l(mu);
|
mutex_lock l(mu);
|
||||||
writers.push_back(writer);
|
writers.push_back(writer);
|
||||||
|
@ -131,8 +133,9 @@ TEST_F(DebugEventsWriterTest, ConcurrentGetDebugEventsWriterDiffDumpRoots) {
|
||||||
auto fn = [this, &counter, &writers, &mu]() {
|
auto fn = [this, &counter, &writers, &mu]() {
|
||||||
const string new_dump_root =
|
const string new_dump_root =
|
||||||
io::JoinPath(dump_root_, strings::Printf("%ld", counter.fetch_add(1)));
|
io::JoinPath(dump_root_, strings::Printf("%ld", counter.fetch_add(1)));
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(new_dump_root);
|
new_dump_root, tfdbg_run_id_,
|
||||||
|
DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
{
|
{
|
||||||
mutex_lock l(mu);
|
mutex_lock l(mu);
|
||||||
writers.push_back(writer);
|
writers.push_back(writer);
|
||||||
|
@ -151,17 +154,17 @@ TEST_F(DebugEventsWriterTest, ConcurrentGetDebugEventsWriterDiffDumpRoots) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, GetDebugEventsWriterDifferentRoots) {
|
TEST_F(DebugEventsWriterTest, GetDebugEventsWriterDifferentRoots) {
|
||||||
// Test the DebugEventsWriters for different directories are different.
|
// Test the DebugEventsWriters for different directories are different.
|
||||||
DebugEventsWriter* writer_1 =
|
DebugEventsWriter* writer_1 = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
const string dump_root_2 = io::JoinPath(dump_root_, "subdirectory");
|
const string dump_root_2 = io::JoinPath(dump_root_, "subdirectory");
|
||||||
DebugEventsWriter* writer_2 =
|
DebugEventsWriter* writer_2 = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_2);
|
dump_root_2, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
EXPECT_NE(writer_1, writer_2);
|
EXPECT_NE(writer_1, writer_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, GetAndInitDebugEventsWriter) {
|
TEST_F(DebugEventsWriterTest, GetAndInitDebugEventsWriter) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
TF_ASSERT_OK(writer->Close());
|
TF_ASSERT_OK(writer->Close());
|
||||||
|
|
||||||
|
@ -174,6 +177,8 @@ TEST_F(DebugEventsWriterTest, GetAndInitDebugEventsWriter) {
|
||||||
const string file_version = actuals[0].debug_metadata().file_version();
|
const string file_version = actuals[0].debug_metadata().file_version();
|
||||||
EXPECT_EQ(file_version.find(DebugEventsWriter::kVersionPrefix), 0);
|
EXPECT_EQ(file_version.find(DebugEventsWriter::kVersionPrefix), 0);
|
||||||
EXPECT_GT(file_version.size(), strlen(DebugEventsWriter::kVersionPrefix));
|
EXPECT_GT(file_version.size(), strlen(DebugEventsWriter::kVersionPrefix));
|
||||||
|
// Check the tfdbg run ID.
|
||||||
|
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||||
|
|
||||||
// Verify that the .source_files file has been created and is empty.
|
// Verify that the .source_files file has been created and is empty.
|
||||||
ReadDebugEventProtos(writer, DebugEventFileType::SOURCE_FILES, &actuals);
|
ReadDebugEventProtos(writer, DebugEventFileType::SOURCE_FILES, &actuals);
|
||||||
|
@ -182,22 +187,22 @@ TEST_F(DebugEventsWriterTest, GetAndInitDebugEventsWriter) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, CallingCloseWithoutInitIsOkay) {
|
TEST_F(DebugEventsWriterTest, CallingCloseWithoutInitIsOkay) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Close());
|
TF_ASSERT_OK(writer->Close());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, CallingCloseTwiceIsOkay) {
|
TEST_F(DebugEventsWriterTest, CallingCloseTwiceIsOkay) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Close());
|
TF_ASSERT_OK(writer->Close());
|
||||||
TF_ASSERT_OK(writer->Close());
|
TF_ASSERT_OK(writer->Close());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, ConcurrentInitCalls) {
|
TEST_F(DebugEventsWriterTest, ConcurrentInitCalls) {
|
||||||
// Test that concurrent calls to Init() works correctly.
|
// Test that concurrent calls to Init() works correctly.
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
|
|
||||||
thread::ThreadPool* thread_pool =
|
thread::ThreadPool* thread_pool =
|
||||||
new thread::ThreadPool(Env::Default(), "test_pool", 4);
|
new thread::ThreadPool(Env::Default(), "test_pool", 4);
|
||||||
|
@ -218,6 +223,7 @@ TEST_F(DebugEventsWriterTest, ConcurrentInitCalls) {
|
||||||
const string file_version = actuals[0].debug_metadata().file_version();
|
const string file_version = actuals[0].debug_metadata().file_version();
|
||||||
EXPECT_EQ(file_version.find(DebugEventsWriter::kVersionPrefix), 0);
|
EXPECT_EQ(file_version.find(DebugEventsWriter::kVersionPrefix), 0);
|
||||||
EXPECT_GT(file_version.size(), strlen(DebugEventsWriter::kVersionPrefix));
|
EXPECT_GT(file_version.size(), strlen(DebugEventsWriter::kVersionPrefix));
|
||||||
|
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||||
|
|
||||||
// Verify that the .source_files file has been created and is empty.
|
// Verify that the .source_files file has been created and is empty.
|
||||||
ReadDebugEventProtos(writer, DebugEventFileType::SOURCE_FILES, &actuals);
|
ReadDebugEventProtos(writer, DebugEventFileType::SOURCE_FILES, &actuals);
|
||||||
|
@ -227,14 +233,15 @@ TEST_F(DebugEventsWriterTest, ConcurrentInitCalls) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, InitTwiceDoesNotCreateNewMetadataFile) {
|
TEST_F(DebugEventsWriterTest, InitTwiceDoesNotCreateNewMetadataFile) {
|
||||||
// Test that Init() is idempotent.
|
// Test that Init() is idempotent.
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
std::vector<DebugEvent> actuals;
|
std::vector<DebugEvent> actuals;
|
||||||
ReadDebugEventProtos(writer, DebugEventFileType::METADATA, &actuals);
|
ReadDebugEventProtos(writer, DebugEventFileType::METADATA, &actuals);
|
||||||
EXPECT_EQ(actuals.size(), 1);
|
EXPECT_EQ(actuals.size(), 1);
|
||||||
EXPECT_GT(actuals[0].debug_metadata().tensorflow_version().length(), 0);
|
EXPECT_GT(actuals[0].debug_metadata().tensorflow_version().length(), 0);
|
||||||
|
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||||
EXPECT_GE(actuals[0].debug_metadata().file_version().size(), 0);
|
EXPECT_GE(actuals[0].debug_metadata().file_version().size(), 0);
|
||||||
|
|
||||||
string metadata_path_1 =
|
string metadata_path_1 =
|
||||||
|
@ -248,12 +255,13 @@ TEST_F(DebugEventsWriterTest, InitTwiceDoesNotCreateNewMetadataFile) {
|
||||||
ReadDebugEventProtos(writer, DebugEventFileType::METADATA, &actuals);
|
ReadDebugEventProtos(writer, DebugEventFileType::METADATA, &actuals);
|
||||||
EXPECT_EQ(actuals.size(), 1);
|
EXPECT_EQ(actuals.size(), 1);
|
||||||
EXPECT_GT(actuals[0].debug_metadata().tensorflow_version().length(), 0);
|
EXPECT_GT(actuals[0].debug_metadata().tensorflow_version().length(), 0);
|
||||||
|
EXPECT_EQ(actuals[0].debug_metadata().tfdbg_run_id(), "test_tfdbg_run_id");
|
||||||
EXPECT_GE(actuals[0].debug_metadata().file_version().size(), 0);
|
EXPECT_GE(actuals[0].debug_metadata().file_version().size(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, WriteSourceFile) {
|
TEST_F(DebugEventsWriterTest, WriteSourceFile) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
SourceFile* source_file_1 = new SourceFile();
|
SourceFile* source_file_1 = new SourceFile();
|
||||||
|
@ -313,8 +321,8 @@ TEST_F(DebugEventsWriterTest, WriteSourceFile) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, WriteStackFramesFile) {
|
TEST_F(DebugEventsWriterTest, WriteStackFramesFile) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
StackFrameWithId* stack_frame_1 = new StackFrameWithId();
|
StackFrameWithId* stack_frame_1 = new StackFrameWithId();
|
||||||
|
@ -375,8 +383,8 @@ TEST_F(DebugEventsWriterTest, WriteStackFramesFile) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, WriteGraphOpCreationAndDebuggedGraph) {
|
TEST_F(DebugEventsWriterTest, WriteGraphOpCreationAndDebuggedGraph) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
GraphOpCreation* graph_op_creation = new GraphOpCreation();
|
GraphOpCreation* graph_op_creation = new GraphOpCreation();
|
||||||
|
@ -415,8 +423,8 @@ TEST_F(DebugEventsWriterTest, WriteGraphOpCreationAndDebuggedGraph) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheSameFile) {
|
TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheSameFile) {
|
||||||
const size_t kConcurrentWrites = 100;
|
const size_t kConcurrentWrites = 100;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
thread::ThreadPool* thread_pool =
|
thread::ThreadPool* thread_pool =
|
||||||
|
@ -456,8 +464,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheSameFile) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, ConcurrentWriteAndFlushCallsToTheSameFile) {
|
TEST_F(DebugEventsWriterTest, ConcurrentWriteAndFlushCallsToTheSameFile) {
|
||||||
const size_t kConcurrentWrites = 100;
|
const size_t kConcurrentWrites = 100;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
thread::ThreadPool* thread_pool =
|
thread::ThreadPool* thread_pool =
|
||||||
|
@ -498,8 +506,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteAndFlushCallsToTheSameFile) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheDifferentFiles) {
|
TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheDifferentFiles) {
|
||||||
const int32 kConcurrentWrites = 30;
|
const int32 kConcurrentWrites = 30;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
thread::ThreadPool* thread_pool =
|
thread::ThreadPool* thread_pool =
|
||||||
|
@ -576,8 +584,8 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheDifferentFiles) {
|
||||||
TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferNoFlush) {
|
TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferNoFlush) {
|
||||||
// Verify that no writing to disk happens until the flushing method is called.
|
// Verify that no writing to disk happens until the flushing method is called.
|
||||||
const size_t kCyclicBufferSize = 10;
|
const size_t kCyclicBufferSize = 10;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
// First, try writing and flushing more debug events than the capacity
|
// First, try writing and flushing more debug events than the capacity
|
||||||
|
@ -601,8 +609,8 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferNoFlush) {
|
||||||
TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) {
|
TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) {
|
||||||
// Verify that writing to disk happens when the flushing method is called.
|
// Verify that writing to disk happens when the flushing method is called.
|
||||||
const size_t kCyclicBufferSize = 10;
|
const size_t kCyclicBufferSize = 10;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
// First, try writing and flushing more debug events than the capacity
|
// First, try writing and flushing more debug events than the capacity
|
||||||
|
@ -673,8 +681,8 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) {
|
||||||
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) {
|
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) {
|
||||||
// Check no writing to disk happens before the flushing method is called.
|
// Check no writing to disk happens before the flushing method is called.
|
||||||
const size_t kCyclicBufferSize = 10;
|
const size_t kCyclicBufferSize = 10;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
// First, try writing and flushing more debug events than the capacity
|
// First, try writing and flushing more debug events than the capacity
|
||||||
|
@ -697,8 +705,8 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithoutPreviousInitCall) {
|
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithoutPreviousInitCall) {
|
||||||
const size_t kCyclicBufferSize = -1;
|
const size_t kCyclicBufferSize = -1;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||||
// NOTE(cais): `writer->Init()` is not called here before
|
// NOTE(cais): `writer->Init()` is not called here before
|
||||||
// WriteGraphExecutionTrace() is called. This test checks that this is okay
|
// WriteGraphExecutionTrace() is called. This test checks that this is okay
|
||||||
// and the `GraphExecutionTrace` gets written correctly even without `Init()`
|
// and the `GraphExecutionTrace` gets written correctly even without `Init()`
|
||||||
|
@ -722,8 +730,8 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithoutPreviousInitCall) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
|
TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
|
||||||
const size_t kCyclicBufferSize = 10;
|
const size_t kCyclicBufferSize = 10;
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
// First, try writing and flushing more debug events than the capacity
|
// First, try writing and flushing more debug events than the capacity
|
||||||
|
@ -788,8 +796,8 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, RegisterDeviceAndGetIdTrace) {
|
TEST_F(DebugEventsWriterTest, RegisterDeviceAndGetIdTrace) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_);
|
dump_root_, tfdbg_run_id_, DebugEventsWriter::kDefaultCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
// Register and get some device IDs in a concurrent fashion.
|
// Register and get some device IDs in a concurrent fashion.
|
||||||
|
@ -833,8 +841,8 @@ TEST_F(DebugEventsWriterTest, RegisterDeviceAndGetIdTrace) {
|
||||||
|
|
||||||
TEST_F(DebugEventsWriterTest, DisableCyclicBufferBehavior) {
|
TEST_F(DebugEventsWriterTest, DisableCyclicBufferBehavior) {
|
||||||
const size_t kCyclicBufferSize = 0; // A value <= 0 disables cyclic behavior.
|
const size_t kCyclicBufferSize = 0; // A value <= 0 disables cyclic behavior.
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
|
dump_root_, tfdbg_run_id_, kCyclicBufferSize);
|
||||||
TF_ASSERT_OK(writer->Init());
|
TF_ASSERT_OK(writer->Init());
|
||||||
|
|
||||||
const size_t kNumEvents = 20;
|
const size_t kNumEvents = 20;
|
||||||
|
|
|
@ -29,9 +29,10 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
||||||
using namespace tensorflow::tfdbg; // NOLINT(build/namespaces)
|
using namespace tensorflow::tfdbg; // NOLINT(build/namespaces)
|
||||||
|
|
||||||
m.def("Init",
|
m.def("Init",
|
||||||
[](const std::string& dump_root, const int64 circular_buffer_size) {
|
[](const std::string& dump_root, const std::string& tfdbg_run_id,
|
||||||
|
const int64 circular_buffer_size) {
|
||||||
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
|
||||||
dump_root, circular_buffer_size);
|
dump_root, tfdbg_run_id, circular_buffer_size);
|
||||||
if (!writer->Init().ok()) {
|
if (!writer->Init().ok()) {
|
||||||
throw py::value_error(tensorflow::strings::Printf(
|
throw py::value_error(tensorflow::strings::Printf(
|
||||||
"Failed to initialize debug events writer at: %s",
|
"Failed to initialize debug events writer at: %s",
|
||||||
|
@ -41,8 +42,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
||||||
m.def("WriteSourceFile",
|
m.def("WriteSourceFile",
|
||||||
[](const std::string& dump_root, const py::object obj) {
|
[](const std::string& dump_root, const py::object obj) {
|
||||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(
|
||||||
|
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->WriteSerializedNonExecutionDebugEvent(
|
writer->WriteSerializedNonExecutionDebugEvent(
|
||||||
obj.attr("SerializeToString")().cast<std::string>(),
|
obj.attr("SerializeToString")().cast<std::string>(),
|
||||||
tfdbg::DebugEventFileType::SOURCE_FILES);
|
tfdbg::DebugEventFileType::SOURCE_FILES);
|
||||||
|
@ -50,8 +52,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
||||||
m.def("WriteStackFrameWithId",
|
m.def("WriteStackFrameWithId",
|
||||||
[](const std::string& dump_root, const py::object& obj) {
|
[](const std::string& dump_root, const py::object& obj) {
|
||||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(
|
||||||
|
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->WriteSerializedNonExecutionDebugEvent(
|
writer->WriteSerializedNonExecutionDebugEvent(
|
||||||
obj.attr("SerializeToString")().cast<std::string>(),
|
obj.attr("SerializeToString")().cast<std::string>(),
|
||||||
tfdbg::DebugEventFileType::STACK_FRAMES);
|
tfdbg::DebugEventFileType::STACK_FRAMES);
|
||||||
|
@ -59,8 +62,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
||||||
m.def("WriteGraphOpCreation",
|
m.def("WriteGraphOpCreation",
|
||||||
[](const std::string& dump_root, const py::object& obj) {
|
[](const std::string& dump_root, const py::object& obj) {
|
||||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(
|
||||||
|
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->WriteSerializedNonExecutionDebugEvent(
|
writer->WriteSerializedNonExecutionDebugEvent(
|
||||||
obj.attr("SerializeToString")().cast<std::string>(),
|
obj.attr("SerializeToString")().cast<std::string>(),
|
||||||
tfdbg::DebugEventFileType::GRAPHS);
|
tfdbg::DebugEventFileType::GRAPHS);
|
||||||
|
@ -68,8 +72,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
||||||
m.def("WriteDebuggedGraph",
|
m.def("WriteDebuggedGraph",
|
||||||
[](const std::string& dump_root, const py::object& obj) {
|
[](const std::string& dump_root, const py::object& obj) {
|
||||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(
|
||||||
|
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->WriteSerializedNonExecutionDebugEvent(
|
writer->WriteSerializedNonExecutionDebugEvent(
|
||||||
obj.attr("SerializeToString")().cast<std::string>(),
|
obj.attr("SerializeToString")().cast<std::string>(),
|
||||||
tfdbg::DebugEventFileType::GRAPHS);
|
tfdbg::DebugEventFileType::GRAPHS);
|
||||||
|
@ -77,8 +82,9 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
||||||
m.def("WriteExecution",
|
m.def("WriteExecution",
|
||||||
[](const std::string& dump_root, const py::object& obj) {
|
[](const std::string& dump_root, const py::object& obj) {
|
||||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(
|
||||||
|
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->WriteSerializedExecutionDebugEvent(
|
writer->WriteSerializedExecutionDebugEvent(
|
||||||
obj.attr("SerializeToString")().cast<std::string>(),
|
obj.attr("SerializeToString")().cast<std::string>(),
|
||||||
tfdbg::DebugEventFileType::EXECUTION);
|
tfdbg::DebugEventFileType::EXECUTION);
|
||||||
|
@ -86,31 +92,32 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
|
||||||
m.def("WriteGraphExecutionTrace",
|
m.def("WriteGraphExecutionTrace",
|
||||||
[](const std::string& dump_root, const py::object& obj) {
|
[](const std::string& dump_root, const py::object& obj) {
|
||||||
CheckProtoType(obj, "tensorflow.DebugEvent");
|
CheckProtoType(obj, "tensorflow.DebugEvent");
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(
|
||||||
|
DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->WriteSerializedExecutionDebugEvent(
|
writer->WriteSerializedExecutionDebugEvent(
|
||||||
obj.attr("SerializeToString")().cast<std::string>(),
|
obj.attr("SerializeToString")().cast<std::string>(),
|
||||||
tfdbg::DebugEventFileType::GRAPH_EXECUTION_TRACES);
|
tfdbg::DebugEventFileType::GRAPH_EXECUTION_TRACES);
|
||||||
});
|
});
|
||||||
m.def("RegisterDeviceAndGetId",
|
m.def("RegisterDeviceAndGetId", [](const std::string& dump_root,
|
||||||
[](const std::string& dump_root, const std::string& device_name) {
|
const std::string& device_name) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
return writer->RegisterDeviceAndGetId(device_name);
|
return writer->RegisterDeviceAndGetId(device_name);
|
||||||
});
|
});
|
||||||
m.def("FlushNonExecutionFiles", [](const std::string& dump_root) {
|
m.def("FlushNonExecutionFiles", [](const std::string& dump_root) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->FlushNonExecutionFiles();
|
writer->FlushNonExecutionFiles();
|
||||||
});
|
});
|
||||||
m.def("FlushExecutionFiles", [](const std::string& dump_root) {
|
m.def("FlushExecutionFiles", [](const std::string& dump_root) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->FlushExecutionFiles();
|
writer->FlushExecutionFiles();
|
||||||
});
|
});
|
||||||
m.def("Close", [](const std::string& dump_root) {
|
m.def("Close", [](const std::string& dump_root) {
|
||||||
DebugEventsWriter* writer =
|
DebugEventsWriter* writer = nullptr;
|
||||||
DebugEventsWriter::GetDebugEventsWriter(dump_root);
|
TF_CHECK_OK(DebugEventsWriter::LookUpDebugEventsWriter(dump_root, &writer));
|
||||||
writer->Close();
|
writer->Close();
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
|
@ -863,6 +863,7 @@ class DebugDataReader(object):
|
||||||
debug_event = next(metadata_iter).debug_event
|
debug_event = next(metadata_iter).debug_event
|
||||||
self._starting_wall_time = debug_event.wall_time
|
self._starting_wall_time = debug_event.wall_time
|
||||||
self._tensorflow_version = debug_event.debug_metadata.tensorflow_version
|
self._tensorflow_version = debug_event.debug_metadata.tensorflow_version
|
||||||
|
self._tfdbg_run_id = debug_event.debug_metadata.tfdbg_run_id
|
||||||
|
|
||||||
def _load_source_files(self):
|
def _load_source_files(self):
|
||||||
"""Incrementally read the .source_files DebugEvent file."""
|
"""Incrementally read the .source_files DebugEvent file."""
|
||||||
|
@ -1071,6 +1072,10 @@ class DebugDataReader(object):
|
||||||
"""
|
"""
|
||||||
return self._tensorflow_version
|
return self._tensorflow_version
|
||||||
|
|
||||||
|
def tfdbg_run_id(self):
|
||||||
|
"""Get the debugger run ID of the debugged TensorFlow program."""
|
||||||
|
return self._tfdbg_run_id
|
||||||
|
|
||||||
def outermost_graphs(self):
|
def outermost_graphs(self):
|
||||||
"""Get the number of outer most graphs read so far."""
|
"""Get the number of outer most graphs read so far."""
|
||||||
return [graph for graph in self._graph_by_id.values()
|
return [graph for graph in self._graph_by_id.values()
|
||||||
|
|
|
@ -32,6 +32,7 @@ class DebugEventsWriter(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
dump_root,
|
dump_root,
|
||||||
|
tfdbg_run_id,
|
||||||
circular_buffer_size=DEFAULT_CIRCULAR_BUFFER_SIZE):
|
circular_buffer_size=DEFAULT_CIRCULAR_BUFFER_SIZE):
|
||||||
"""Construct a DebugEventsWriter object.
|
"""Construct a DebugEventsWriter object.
|
||||||
|
|
||||||
|
@ -43,6 +44,7 @@ class DebugEventsWriter(object):
|
||||||
Args:
|
Args:
|
||||||
dump_root: The root directory for dumping debug data. If `dump_root` does
|
dump_root: The root directory for dumping debug data. If `dump_root` does
|
||||||
not exist as a directory, it will be created.
|
not exist as a directory, it will be created.
|
||||||
|
tfdbg_run_id: Debugger Run ID.
|
||||||
circular_buffer_size: Size of the circular buffer for each of the two
|
circular_buffer_size: Size of the circular buffer for each of the two
|
||||||
execution-related debug events files: with the following suffixes: -
|
execution-related debug events files: with the following suffixes: -
|
||||||
.execution - .graph_execution_traces If <= 0, the circular-buffer
|
.execution - .graph_execution_traces If <= 0, the circular-buffer
|
||||||
|
@ -51,7 +53,9 @@ class DebugEventsWriter(object):
|
||||||
if not dump_root:
|
if not dump_root:
|
||||||
raise ValueError("Empty or None dump root")
|
raise ValueError("Empty or None dump root")
|
||||||
self._dump_root = dump_root
|
self._dump_root = dump_root
|
||||||
_pywrap_debug_events_writer.Init(self._dump_root, circular_buffer_size)
|
self._tfdbg_run_id = tfdbg_run_id
|
||||||
|
_pywrap_debug_events_writer.Init(self._dump_root, self._tfdbg_run_id,
|
||||||
|
circular_buffer_size)
|
||||||
|
|
||||||
def WriteSourceFile(self, source_file):
|
def WriteSourceFile(self, source_file):
|
||||||
"""Write a SourceFile proto with the writer.
|
"""Write a SourceFile proto with the writer.
|
||||||
|
|
|
@ -41,7 +41,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
|
|
||||||
def testMultiThreadedConstructorCallWorks(self):
|
def testMultiThreadedConstructorCallWorks(self):
|
||||||
def init_writer():
|
def init_writer():
|
||||||
debug_events_writer.DebugEventsWriter(self.dump_root)
|
debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id)
|
||||||
|
|
||||||
num_threads = 4
|
num_threads = 4
|
||||||
threads = []
|
threads = []
|
||||||
|
@ -66,7 +66,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
self._readAndCheckMetadataFile()
|
self._readAndCheckMetadataFile()
|
||||||
|
|
||||||
def testWriteSourceFilesAndStackFrames(self):
|
def testWriteSourceFilesAndStackFrames(self):
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id)
|
||||||
num_protos = 10
|
num_protos = 10
|
||||||
for i in range(num_protos):
|
for i in range(num_protos):
|
||||||
source_file = debug_event_pb2.SourceFile()
|
source_file = debug_event_pb2.SourceFile()
|
||||||
|
@ -99,7 +100,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
self.assertEqual(actuals[i].file_line_col.file_index, i * 10)
|
self.assertEqual(actuals[i].file_line_col.file_index, i * 10)
|
||||||
|
|
||||||
def testWriteGraphOpCreationAndDebuggedGraphs(self):
|
def testWriteGraphOpCreationAndDebuggedGraphs(self):
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id)
|
||||||
num_op_creations = 10
|
num_op_creations = 10
|
||||||
for i in range(num_op_creations):
|
for i in range(num_op_creations):
|
||||||
graph_op_creation = debug_event_pb2.GraphOpCreation()
|
graph_op_creation = debug_event_pb2.GraphOpCreation()
|
||||||
|
@ -122,7 +124,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
"deadbeaf")
|
"deadbeaf")
|
||||||
|
|
||||||
def testConcurrentWritesToNonExecutionFilesWorks(self):
|
def testConcurrentWritesToNonExecutionFilesWorks(self):
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id)
|
||||||
|
|
||||||
source_file_state = {"counter": 0, "lock": threading.Lock()}
|
source_file_state = {"counter": 0, "lock": threading.Lock()}
|
||||||
|
|
||||||
|
@ -201,15 +204,18 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
|
|
||||||
def testWriteAndReadMetadata(self):
|
def testWriteAndReadMetadata(self):
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id)
|
||||||
writer.Close()
|
writer.Close()
|
||||||
with debug_events_reader.DebugDataReader(self.dump_root) as reader:
|
with debug_events_reader.DebugDataReader(self.dump_root) as reader:
|
||||||
self.assertIsInstance(reader.starting_wall_time(), float)
|
self.assertIsInstance(reader.starting_wall_time(), float)
|
||||||
self.assertGreaterEqual(reader.starting_wall_time(), t0)
|
self.assertGreaterEqual(reader.starting_wall_time(), t0)
|
||||||
self.assertEqual(reader.tensorflow_version(), versions.__version__)
|
self.assertEqual(reader.tensorflow_version(), versions.__version__)
|
||||||
|
self.assertTrue(reader.tfdbg_run_id())
|
||||||
|
|
||||||
def testWriteExecutionEventsWithCircularBuffer(self):
|
def testWriteExecutionEventsWithCircularBuffer(self):
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id)
|
||||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||||
for i in range(num_execution_events):
|
for i in range(num_execution_events):
|
||||||
execution = debug_event_pb2.Execution()
|
execution = debug_event_pb2.Execution()
|
||||||
|
@ -232,7 +238,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
|
|
||||||
def testWriteExecutionEventsWithoutCircularBufferBehavior(self):
|
def testWriteExecutionEventsWithoutCircularBufferBehavior(self):
|
||||||
# A circular buffer size of 0 abolishes the circular buffer behavior.
|
# A circular buffer size of 0 abolishes the circular buffer behavior.
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id, 0)
|
||||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||||
for i in range(num_execution_events):
|
for i in range(num_execution_events):
|
||||||
execution = debug_event_pb2.Execution()
|
execution = debug_event_pb2.Execution()
|
||||||
|
@ -248,7 +255,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
self.assertEqual(execution.op_type, "OpType%d" % i)
|
self.assertEqual(execution.op_type, "OpType%d" % i)
|
||||||
|
|
||||||
def testWriteGraphExecutionTraceEventsWithCircularBuffer(self):
|
def testWriteGraphExecutionTraceEventsWithCircularBuffer(self):
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id)
|
||||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||||
for i in range(num_execution_events):
|
for i in range(num_execution_events):
|
||||||
trace = debug_event_pb2.GraphExecutionTrace()
|
trace = debug_event_pb2.GraphExecutionTrace()
|
||||||
|
@ -272,7 +280,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
|
|
||||||
def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self):
|
def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self):
|
||||||
# A circular buffer size of 0 abolishes the circular buffer behavior.
|
# A circular buffer size of 0 abolishes the circular buffer behavior.
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id, 0)
|
||||||
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
|
||||||
for i in range(num_execution_events):
|
for i in range(num_execution_events):
|
||||||
trace = debug_event_pb2.GraphExecutionTrace()
|
trace = debug_event_pb2.GraphExecutionTrace()
|
||||||
|
@ -290,6 +299,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
def testConcurrentWritesToExecutionFiles(self):
|
def testConcurrentWritesToExecutionFiles(self):
|
||||||
circular_buffer_size = 5
|
circular_buffer_size = 5
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id,
|
||||||
circular_buffer_size)
|
circular_buffer_size)
|
||||||
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
||||||
graph_name="graph1")
|
graph_name="graph1")
|
||||||
|
@ -345,7 +355,8 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
self.assertLen(op_names, len(set(op_names)))
|
self.assertLen(op_names, len(set(op_names)))
|
||||||
|
|
||||||
def testConcurrentSourceFileRandomReads(self):
|
def testConcurrentSourceFileRandomReads(self):
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root)
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id)
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
source_file = debug_event_pb2.SourceFile(
|
source_file = debug_event_pb2.SourceFile(
|
||||||
|
@ -376,6 +387,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
def testConcurrentExecutionUpdateAndRandomRead(self):
|
def testConcurrentExecutionUpdateAndRandomRead(self):
|
||||||
circular_buffer_size = -1
|
circular_buffer_size = -1
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id,
|
||||||
circular_buffer_size)
|
circular_buffer_size)
|
||||||
|
|
||||||
writer_state = {"counter": 0, "done": False}
|
writer_state = {"counter": 0, "done": False}
|
||||||
|
@ -410,6 +422,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
def testConcurrentExecutionRandomReads(self):
|
def testConcurrentExecutionRandomReads(self):
|
||||||
circular_buffer_size = -1
|
circular_buffer_size = -1
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id,
|
||||||
circular_buffer_size)
|
circular_buffer_size)
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
|
@ -445,6 +458,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
def testConcurrentGraphExecutionTraceUpdateAndRandomRead(self):
|
def testConcurrentGraphExecutionTraceUpdateAndRandomRead(self):
|
||||||
circular_buffer_size = -1
|
circular_buffer_size = -1
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id,
|
||||||
circular_buffer_size)
|
circular_buffer_size)
|
||||||
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
||||||
graph_name="graph1")
|
graph_name="graph1")
|
||||||
|
@ -487,6 +501,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
def testConcurrentGraphExecutionTraceRandomReads(self):
|
def testConcurrentGraphExecutionTraceRandomReads(self):
|
||||||
circular_buffer_size = -1
|
circular_buffer_size = -1
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
self.tfdbg_run_id,
|
||||||
circular_buffer_size)
|
circular_buffer_size)
|
||||||
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
|
||||||
graph_name="graph1")
|
graph_name="graph1")
|
||||||
|
@ -534,7 +549,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
def testRangeReadingExecutions(self, begin, end, expected_begin,
|
def testRangeReadingExecutions(self, begin, end, expected_begin,
|
||||||
expected_end):
|
expected_end):
|
||||||
writer = debug_events_writer.DebugEventsWriter(
|
writer = debug_events_writer.DebugEventsWriter(
|
||||||
self.dump_root, circular_buffer_size=-1)
|
self.dump_root, self.tfdbg_run_id, circular_buffer_size=-1)
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
execution = debug_event_pb2.Execution(op_type="OpType%d" % i)
|
execution = debug_event_pb2.Execution(op_type="OpType%d" % i)
|
||||||
writer.WriteExecution(execution)
|
writer.WriteExecution(execution)
|
||||||
|
@ -559,7 +574,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase,
|
||||||
def testRangeReadingGraphExecutionTraces(self, begin, end, expected_begin,
|
def testRangeReadingGraphExecutionTraces(self, begin, end, expected_begin,
|
||||||
expected_end):
|
expected_end):
|
||||||
writer = debug_events_writer.DebugEventsWriter(
|
writer = debug_events_writer.DebugEventsWriter(
|
||||||
self.dump_root, circular_buffer_size=-1)
|
self.dump_root, self.tfdbg_run_id, circular_buffer_size=-1)
|
||||||
debugged_graph = debug_event_pb2.DebuggedGraph(
|
debugged_graph = debug_event_pb2.DebuggedGraph(
|
||||||
graph_id="graph1", graph_name="graph1")
|
graph_id="graph1", graph_name="graph1")
|
||||||
writer.WriteDebuggedGraph(debugged_graph)
|
writer.WriteDebuggedGraph(debugged_graph)
|
||||||
|
|
|
@ -52,8 +52,9 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
|
||||||
super(DebugIdentityV2OpTest, self).setUp()
|
super(DebugIdentityV2OpTest, self).setUp()
|
||||||
# Testing using a small circular-buffer size.
|
# Testing using a small circular-buffer size.
|
||||||
self.circular_buffer_size = 4
|
self.circular_buffer_size = 4
|
||||||
|
self.tfdbg_run_id = "test_tfdbg_run"
|
||||||
self.writer = debug_events_writer.DebugEventsWriter(
|
self.writer = debug_events_writer.DebugEventsWriter(
|
||||||
self.dump_root, self.circular_buffer_size)
|
self.dump_root, self.tfdbg_run_id, self.circular_buffer_size)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.writer.Close()
|
self.writer.Close()
|
||||||
|
@ -192,7 +193,8 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
|
||||||
def testTwoDumpRoots(self):
|
def testTwoDumpRoots(self):
|
||||||
another_dump_root = os.path.join(self.dump_root, "another")
|
another_dump_root = os.path.join(self.dump_root, "another")
|
||||||
another_debug_url = "file://%s" % another_dump_root
|
another_debug_url = "file://%s" % another_dump_root
|
||||||
another_writer = debug_events_writer.DebugEventsWriter(another_dump_root)
|
another_writer = debug_events_writer.DebugEventsWriter(
|
||||||
|
another_dump_root, "test_tfdbg_run")
|
||||||
|
|
||||||
@def_function.function
|
@def_function.function
|
||||||
def write_debug_trace(x):
|
def write_debug_trace(x):
|
||||||
|
@ -264,6 +266,7 @@ class DebugIdentityV2OpUninitializedWriterTest(
|
||||||
self.assertAllClose(
|
self.assertAllClose(
|
||||||
write_debug_trace(np.array([i]).astype(np.float32)), [i**2.0])
|
write_debug_trace(np.array([i]).astype(np.float32)), [i**2.0])
|
||||||
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
|
||||||
|
"test_tfdbg_run",
|
||||||
circular_buffer_size)
|
circular_buffer_size)
|
||||||
writer.FlushNonExecutionFiles()
|
writer.FlushNonExecutionFiles()
|
||||||
writer.FlushExecutionFiles()
|
writer.FlushExecutionFiles()
|
||||||
|
|
|
@ -69,6 +69,10 @@ def _debug_identity_v2_grad(op, dy):
|
||||||
return dy
|
return dy
|
||||||
|
|
||||||
|
|
||||||
|
def _get_tfdbg_run_id():
|
||||||
|
return str(uuid.uuid4())[:8]
|
||||||
|
|
||||||
|
|
||||||
def _get_id():
|
def _get_id():
|
||||||
"""Get a short unique ID."""
|
"""Get a short unique ID."""
|
||||||
return str(uuid.uuid4())
|
return str(uuid.uuid4())
|
||||||
|
@ -88,6 +92,7 @@ class _DumpingCallback(object):
|
||||||
op_regex,
|
op_regex,
|
||||||
tensor_dtypes):
|
tensor_dtypes):
|
||||||
self._dump_root = dump_root
|
self._dump_root = dump_root
|
||||||
|
self._tfdbg_run_id = _get_tfdbg_run_id()
|
||||||
self._tensor_debug_mode = tensor_debug_mode
|
self._tensor_debug_mode = tensor_debug_mode
|
||||||
self._circular_buffer_size = circular_buffer_size
|
self._circular_buffer_size = circular_buffer_size
|
||||||
self._op_regex = op_regex
|
self._op_regex = op_regex
|
||||||
|
@ -148,6 +153,10 @@ class _DumpingCallback(object):
|
||||||
self._dump_root = dump_root
|
self._dump_root = dump_root
|
||||||
self._writer = None
|
self._writer = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tfdbg_run_id(self):
|
||||||
|
return self._tfdbg_run_id
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tensor_debug_mode(self):
|
def tensor_debug_mode(self):
|
||||||
return self._tensor_debug_mode
|
return self._tensor_debug_mode
|
||||||
|
@ -161,6 +170,7 @@ class _DumpingCallback(object):
|
||||||
if not self._writer:
|
if not self._writer:
|
||||||
self._writer = debug_events_writer.DebugEventsWriter(
|
self._writer = debug_events_writer.DebugEventsWriter(
|
||||||
self._dump_root,
|
self._dump_root,
|
||||||
|
self._tfdbg_run_id,
|
||||||
circular_buffer_size=self._circular_buffer_size)
|
circular_buffer_size=self._circular_buffer_size)
|
||||||
return self._writer
|
return self._writer
|
||||||
|
|
||||||
|
@ -365,6 +375,8 @@ class _DumpingCallback(object):
|
||||||
if tf_compat.forward_compatible(2020, 6, 24):
|
if tf_compat.forward_compatible(2020, 6, 24):
|
||||||
debug_identity_op_kwargs[
|
debug_identity_op_kwargs[
|
||||||
"circular_buffer_size"] = self._circular_buffer_size
|
"circular_buffer_size"] = self._circular_buffer_size
|
||||||
|
if tf_compat.forward_compatible(2020, 7, 1):
|
||||||
|
debug_identity_op_kwargs["tfdbg_run_id"] = self._tfdbg_run_id
|
||||||
if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
|
if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
|
||||||
if (not self._should_dump_tensor(op_type, tensor.dtype) or
|
if (not self._should_dump_tensor(op_type, tensor.dtype) or
|
||||||
not tensor.dtype.is_numpy_compatible):
|
not tensor.dtype.is_numpy_compatible):
|
||||||
|
@ -873,7 +885,8 @@ def disable_dump_debug_info():
|
||||||
"""
|
"""
|
||||||
if hasattr(_state, "dumping_callback"):
|
if hasattr(_state, "dumping_callback"):
|
||||||
dump_root = _state.dumping_callback.dump_root
|
dump_root = _state.dumping_callback.dump_root
|
||||||
debug_events_writer.DebugEventsWriter(dump_root).Close()
|
tfdbg_run_id = _state.dumping_callback.tfdbg_run_id
|
||||||
|
debug_events_writer.DebugEventsWriter(dump_root, tfdbg_run_id).Close()
|
||||||
op_callbacks.remove_op_callback(_state.dumping_callback.callback)
|
op_callbacks.remove_op_callback(_state.dumping_callback.callback)
|
||||||
function_lib.remove_function_callback(
|
function_lib.remove_function_callback(
|
||||||
_state.dumping_callback.function_callback)
|
_state.dumping_callback.function_callback)
|
||||||
|
|
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import uuid
|
||||||
|
|
||||||
from tensorflow.python.debug.lib import check_numerics_callback
|
from tensorflow.python.debug.lib import check_numerics_callback
|
||||||
from tensorflow.python.debug.lib import debug_events_reader
|
from tensorflow.python.debug.lib import debug_events_reader
|
||||||
|
@ -35,6 +36,7 @@ class DumpingCallbackTestBase(test_util.TensorFlowTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
super(DumpingCallbackTestBase, self).setUp()
|
super(DumpingCallbackTestBase, self).setUp()
|
||||||
self.dump_root = tempfile.mkdtemp()
|
self.dump_root = tempfile.mkdtemp()
|
||||||
|
self.tfdbg_run_id = str(uuid.uuid4())
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
if os.path.isdir(self.dump_root):
|
if os.path.isdir(self.dump_root):
|
||||||
|
|
|
@ -982,7 +982,7 @@ tf_module {
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "DebugIdentityV2"
|
name: "DebugIdentityV2"
|
||||||
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
|
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'tfdbg_run_id\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'\', \'None\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "DebugNanCount"
|
name: "DebugNanCount"
|
||||||
|
|
|
@ -982,7 +982,7 @@ tf_module {
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "DebugIdentityV2"
|
name: "DebugIdentityV2"
|
||||||
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
|
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'tfdbg_run_id\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'\', \'None\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "DebugNanCount"
|
name: "DebugNanCount"
|
||||||
|
|
Loading…
Reference in New Issue