From b76565b39dc23116d8f6c2593d213ce9e0b0d413 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Sep 2017 17:30:55 -0700 Subject: [PATCH] Some profiler fixes and cleanup. PiperOrigin-RevId: 168069346 --- tensorflow/core/profiler/README.md | 2 +- .../core/profiler/g3doc/command_line.md | 7 +- .../profiler/internal/print_model_analysis.cc | 1 - .../core/profiler/internal/tfprof_node.h | 87 ++++++++++++------- .../core/profiler/internal/tfprof_stats.cc | 36 ++++++-- .../core/profiler/internal/tfprof_timeline.cc | 8 +- .../profiler/internal/tfprof_timeline_test.cc | 4 +- tensorflow/core/profiler/tfprof_log.proto | 2 + .../profiler/internal/run_metadata_test.py | 2 + tensorflow/python/profiler/model_analyzer.py | 3 +- .../python/profiler/model_analyzer_test.py | 2 +- tensorflow/python/profiler/option_builder.py | 2 +- tensorflow/python/profiler/profile_context.py | 9 +- 13 files changed, 115 insertions(+), 50 deletions(-) diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md index 2ddac0a79b7..78557994172 100644 --- a/tensorflow/core/profiler/README.md +++ b/tensorflow/core/profiler/README.md @@ -216,7 +216,7 @@ seq2seq_attention_model.py:363:build_graph:self._add_train_o..., cpu: 1.28sec, a ```shell # The following example generates a timeline. -tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile= +tfprof> graph -step -1 -max_depth 100000 -output timeline:outfile= generating trace file. diff --git a/tensorflow/core/profiler/g3doc/command_line.md b/tensorflow/core/profiler/g3doc/command_line.md index fb4207c7841..d41ac7290db 100644 --- a/tensorflow/core/profiler/g3doc/command_line.md +++ b/tensorflow/core/profiler/g3doc/command_line.md @@ -14,7 +14,12 @@ ### Command Line Inputs -tfprof command line tool uses the following inputs: +tfprof command line tool uses the following input: + +--profile_path: A ProfileProto binary proto file. +See QuickStart on generating the file. + +THE OLD WAY BELOW IS DEPRECATED: --graph_path: GraphDef proto file (required). Used to build in-memory data structure of the model. For example, graph.pbtxt written by tf.Supervisor diff --git a/tensorflow/core/profiler/internal/print_model_analysis.cc b/tensorflow/core/profiler/internal/print_model_analysis.cc index ddf3c7f1f28..575ae182ee8 100644 --- a/tensorflow/core/profiler/internal/print_model_analysis.cc +++ b/tensorflow/core/profiler/internal/print_model_analysis.cc @@ -84,7 +84,6 @@ string RunProfile(const string& command, const string& options, } // namespace bool NewProfiler(const string* graph, const string* op_log) { - CHECK(!tf_stat) << "Currently only 1 living tfprof profiler is allowed"; CHECK(graph) << "graph mustn't be null"; std::unique_ptr graph_ptr(new GraphDef()); if (!graph_ptr->ParseFromString(*graph)) { diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h index 55d53f39237..95d199e5b90 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.h +++ b/tensorflow/core/profiler/internal/tfprof_node.h @@ -175,22 +175,22 @@ class ExecStep { std::map> output_memory_; }; -#define GRAPH_NODE_BYTES(type) \ - do { \ - if (execs_.empty()) { \ - return 0; \ - } \ - if (step >= 0) { \ - auto exec = execs_.find(step); \ - CHECK(exec != execs_.end()) << "unknown step " << step; \ - return exec->second.type##_bytes(); \ - } \ - \ - int64 bytes = 0; \ - for (const auto& exec : execs_) { \ - bytes += exec.second.type##_bytes(); \ - } \ - return bytes / execs_.size(); \ +#define GRAPH_NODE_BYTES(type) \ + do { \ + if (execs_.empty()) { \ + return 0; \ + } \ + if (step >= 0) { \ + auto exec = execs_.find(step); \ + if (exec == execs_.end()) return 0; \ + return exec->second.type##_bytes(); \ + } \ + \ + int64 bytes = 0; \ + for (const auto& exec : execs_) { \ + bytes += exec.second.type##_bytes(); \ + } \ + return bytes / execs_.size(); \ } while (0) class TFGraphNode { @@ -372,7 +372,9 @@ class TFGraphNode { } if (step >= 0) { auto exec = execs_.find(step); - CHECK(exec != execs_.end()); + if (exec == execs_.end()) { + return 0; + } return exec->second.run_count(); } int64 total_run_count = 0; @@ -390,7 +392,9 @@ class TFGraphNode { } if (step >= 0) { auto exec = execs_.find(step); - CHECK(exec != execs_.end()); + if (exec == execs_.end()) { + return 0; + } return exec->second.exec_micros(); } @@ -410,7 +414,9 @@ class TFGraphNode { } if (step >= 0) { auto exec = execs_.find(step); - CHECK(exec != execs_.end()); + if (exec == execs_.end()) { + return 0; + } return exec->second.accelerator_exec_micros(); } @@ -430,7 +436,9 @@ class TFGraphNode { } if (step >= 0) { auto exec = execs_.find(step); - CHECK(exec != execs_.end()); + if (exec == execs_.end()) { + return 0; + } return exec->second.cpu_exec_micros(); } @@ -448,20 +456,26 @@ class TFGraphNode { int64 all_start_micros(int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return 0; + } return exec->second.all_start_micros(); } int64 latest_end_micros(int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return 0; + } return exec->second.latest_end_micros(); } const std::map>>& op_execs( int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return empty_op_execs_; + } return exec->second.op_execs(); } @@ -469,33 +483,45 @@ class TFGraphNode { int64 accelerator_temp_bytes(int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return 0; + } return exec->second.accelerator_temp_bytes(); } int64 host_temp_bytes(int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return 0; + } return exec->second.host_temp_bytes(); } int64 accelerator_persistent_bytes(int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return 0; + } return exec->second.accelerator_persistent_bytes(); } int64 host_persistent_bytes(int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return 0; + } return exec->second.host_persistent_bytes(); } const std::map>& output_memory( int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return empty_output_memory_; + } return exec->second.output_memory(); } int64 allocator_bytes_in_use(int64 step) const { auto exec = execs_.find(step); - CHECK(exec != execs_.end()) << "unknown step " << step; + if (exec == execs_.end()) { + return 0; + } return exec->second.allocator_bytes_in_use(); } @@ -566,6 +592,9 @@ class TFGraphNode { std::set op_types_; std::map execs_; + + std::map> empty_output_memory_; + std::map>> empty_op_execs_; }; class TFMultiGraphNode { diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc index 5b549583446..eb84bada135 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.cc +++ b/tensorflow/core/profiler/internal/tfprof_stats.cc @@ -88,6 +88,9 @@ TFStats::TFStats(const string& filename, node_pb.second.name(), std::move(node))); } has_code_traces_ = profile.has_trace(); + for (int64 s : profile.steps()) { + steps_.insert(s); + } } void TFStats::BuildView(const string& cmd) { @@ -136,6 +139,14 @@ const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd, if (cmd == kCmds[0]) { return scope_view_->Show(opts); } else if (cmd == kCmds[1]) { + if (opts.step < 0 && opts.output_type == kOutput[0]) { + for (int64 step : steps_) { + Options nopts = opts; + nopts.step = step; + graph_view_->Show(nopts); + } + return empty_graph_node_; + } return graph_view_->Show(opts); } else { fprintf(stderr, "Unknown command: %s\n", cmd.c_str()); @@ -148,7 +159,11 @@ const MultiGraphNodeProto& TFStats::ShowMultiGraphNode( if (!Validate(opts)) { return empty_multi_graph_node_; } - if (cmd == kCmds[2] && has_code_traces()) { + if (cmd == kCmds[2]) { + if (!has_code_traces()) { + fprintf(stderr, "No code trace information\n"); + return empty_multi_graph_node_; + } return code_view_->Show(opts); } else if (cmd == kCmds[3]) { return op_view_->Show(opts); @@ -212,7 +227,9 @@ void TFStats::AddOpLogProto(std::unique_ptr op_log) { } if (entry.has_code_def()) { has_code_traces_ = true; - node->second->AddCode(entry.code_def()); + if (node->second->code().traces_size() == 0) { + node->second->AddCode(entry.code_def()); + } } } } @@ -258,9 +275,11 @@ void TFStats::WriteProfile(const string& filename) { } (*profile.mutable_nodes())[it->second->id()].MergeFrom( it->second->ToProto(nodes_map_)); - if (it->second->code().traces_size() > 0) { - profile.set_has_trace(true); - } + } + + profile.set_has_trace(has_code_traces_); + for (int64 s : steps_) { + profile.add_steps(s); } Status s = WriteStringToFile(Env::Default(), filename, profile.SerializeAsString()); @@ -271,7 +290,12 @@ void TFStats::WriteProfile(const string& filename) { bool TFStats::Validate(const Options& opts) const { if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) { - fprintf(stderr, "Options -step=%lld not found\n", opts.step); + fprintf(stderr, + "Options -step=%lld not found.\nAvailable steps: ", opts.step); + for (int64 s : steps_) { + fprintf(stderr, "%lld ", s); + } + fprintf(stderr, "\n"); return false; } return true; diff --git a/tensorflow/core/profiler/internal/tfprof_timeline.cc b/tensorflow/core/profiler/internal/tfprof_timeline.cc index f3934860d9a..1732574cc41 100644 --- a/tensorflow/core/profiler/internal/tfprof_timeline.cc +++ b/tensorflow/core/profiler/internal/tfprof_timeline.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/profiler/internal/tfprof_utils.h" namespace tensorflow { @@ -303,11 +304,12 @@ void Timeline::GenerateCodeTimeline(const CodeNode* node) { } void Timeline::OutputTimeline() { + string outfile = strings::Printf("%s_%lld", outfile_.c_str(), step()); Status s = - WriteStringToFile(Env::Default(), outfile_, chrome_formatter_.Format()); + WriteStringToFile(Env::Default(), outfile, chrome_formatter_.Format()); if (!s.ok()) { fprintf(stderr, "Failed to write timeline file: %s\nError: %s\n", - outfile_.c_str(), s.ToString().c_str()); + outfile.c_str(), s.ToString().c_str()); return; } fprintf(stdout, "\n******************************************************\n"); @@ -315,7 +317,7 @@ void Timeline::OutputTimeline() { "Timeline file is written to %s.\n" "Open a Chrome browser, enter URL chrome://tracing and " "load the timeline file.", - outfile_.c_str()); + outfile.c_str()); fprintf(stdout, "\n******************************************************\n"); fflush(stdout); } diff --git a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc index 5dd440e9a26..babae395bad 100644 --- a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc +++ b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc @@ -70,7 +70,7 @@ TEST_F(TFProfTimelineTest, GraphView) { tf_stats_->ShowGraphNode("graph", opts); string dump_str; - TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str)); + TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file + "_0", &dump_str)); EXPECT_EQ(1754536562981488144ull, Hash64(dump_str)); } @@ -84,7 +84,7 @@ TEST_F(TFProfTimelineTest, ScopeView) { tf_stats_->ShowGraphNode("scope", opts); string dump_str; - TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str)); + TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file + "_0", &dump_str)); EXPECT_EQ(17545174915963890413ull, Hash64(dump_str)); } diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto index 1ce5a5eecf7..ae571e2540b 100644 --- a/tensorflow/core/profiler/tfprof_log.proto +++ b/tensorflow/core/profiler/tfprof_log.proto @@ -42,6 +42,8 @@ message ProfileProto { map nodes = 1; // Whether or not has code traces. bool has_trace = 2; + // Traced steps. + repeated int64 steps = 3; } message ProfileNode { diff --git a/tensorflow/python/profiler/internal/run_metadata_test.py b/tensorflow/python/profiler/internal/run_metadata_test.py index 1e26a9897eb..c0de08cad6b 100644 --- a/tensorflow/python/profiler/internal/run_metadata_test.py +++ b/tensorflow/python/profiler/internal/run_metadata_test.py @@ -70,6 +70,7 @@ def _run_model(): opts = builder.time_and_memory() opts['min_micros'] = 0 opts['min_bytes'] = 0 + opts['order_by'] = 'name' opts['output'] = 'none' _ = sess.run(y, options=config_pb2.RunOptions( @@ -95,6 +96,7 @@ def _run_loop_model(): run_metadata=run_meta) opts = builder.time_and_memory() + opts['order_by'] = 'name' opts['output'] = 'none' tfprof_node = model_analyzer.profile( diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py index 98d3e58f2af..2071325c7bb 100644 --- a/tensorflow/python/profiler/model_analyzer.py +++ b/tensorflow/python/profiler/model_analyzer.py @@ -180,8 +180,7 @@ class Profiler(object): """ # pylint: disable=protected-access op_log = tfprof_logger._merge_default_with_oplog( - self._graph, run_meta=run_meta, add_trace=False, - add_trainable_var=False) + self._graph, run_meta=run_meta) # pylint: enable=protected-access # TODO(xpan): P1: Better to find the current graph. print_mdl.AddStep( diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index dcdda1ffa25..494ba2e2a0d 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -305,7 +305,7 @@ class PrintModelAnalysisTest(test.TestCase): _ = model_analyzer.profile( sess.graph, run_meta, cmd='graph', options=opts) - with gfile.Open(outfile, 'r') as f: + with gfile.Open(outfile + '_0', 'r') as f: # Test that a json file is created. # TODO(xpan): tfprof Timeline isn't quite correct on Windows. # Investigate why. diff --git a/tensorflow/python/profiler/option_builder.py b/tensorflow/python/profiler/option_builder.py index 641895ffe54..13942ad6a2a 100644 --- a/tensorflow/python/profiler/option_builder.py +++ b/tensorflow/python/profiler/option_builder.py @@ -177,7 +177,7 @@ class ProfileOptionBuilder(object): 'min_params': 0, 'min_float_ops': 0, 'min_occurrence': 0, - 'order_by': 'name', + 'order_by': 'micros', 'account_type_regexes': ['.*'], 'start_name_regexes': ['.*'], 'trim_name_regexes': [], diff --git a/tensorflow/python/profiler/profile_context.py b/tensorflow/python/profiler/profile_context.py index 07adcb9c3f5..49fa22e3479 100644 --- a/tensorflow/python/profiler/profile_context.py +++ b/tensorflow/python/profiler/profile_context.py @@ -20,6 +20,7 @@ from __future__ import print_function import contextlib import os +import threading from tensorflow.core.protobuf import config_pb2 from tensorflow.python import pywrap_tensorflow as print_mdl @@ -163,6 +164,7 @@ class ProfileContext(object): self._traced_steps = 0 self._auto_profiles = [] self._profiler = None + self._lock = threading.Lock() def add_auto_profiling(self, cmd, options, profile_steps): """Traces and profiles at some session run steps. @@ -181,9 +183,10 @@ class ProfileContext(object): @property def profiler(self): """Returns the current profiler object.""" - if not self._profiler: - self._profiler = model_analyzer.Profiler(ops.get_default_graph()) - return self._profiler + with self._lock: + if not self._profiler: + self._profiler = model_analyzer.Profiler(ops.get_default_graph()) + return self._profiler def trace_next_step(self): """Enables tracing and add traces to profiler at next step."""