Add support to generate pprof results to tf.profiler

A fun thing is, it can not only profile time,memory
but also parameters, etc.

PiperOrigin-RevId: 163767517
This commit is contained in:
A. Unique TensorFlower 2017-07-31 16:41:46 -07:00 committed by TensorFlower Gardener
parent 78a90370ef
commit edac90c7c7
20 changed files with 667 additions and 147 deletions

View File

@ -181,6 +181,7 @@ seq2seq_attention_model.py:363:build_graph:self._add_train_o..., cpu: 1.28sec, a
### Visualize time and memory.
```
# The following example generates a timeline.
tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile=<filename>
generating trace file.
@ -191,9 +192,29 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
******************************************************
```
<left>
[CodeTimeline](g3doc/graph_timeline.png)
[Timeline](g3doc/graph_timeline.png)
</left>
```
# The following example generates a pprof graph (only supported by code view).
# Since TensorFlow runs the graph instead of Python code, the pprof graph
# doesn't profile the statistics of Python, but the TensorFlow graph
# nodes created by the Python call stack.
# Nevertheless, it pops critical Python code path for us.
#
# `-trim_name_regexes` trims the python call stack, which are always the same
# for the leaves.
# `-select accelerator_micros` pick accelerator time for pprof graph. User
# can also generate memory profile using `-select bytes`
tfprof> code -max_depth 100 -trim_name_regexes '^ops.py.*' -select accelerator_micros -output pprof:outfile=<filename>
# Use pprof to visualize the generated file.
pprof -png --nodecount=20 --sample_index=1 <filename>
```
<left>
[PprofGraph](g3doc/pprof.jpg)
</left>
### Feature Request and Bug Report

Binary file not shown.

After

Width:  |  Height:  |  Size: 261 KiB

View File

@ -21,6 +21,8 @@ limitations under the License.
#include "tensorflow/c/c_api.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/io/zlib_compression_options.h"
#include "tensorflow/core/lib/io/zlib_outputbuffer.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
@ -46,66 +48,335 @@ string GetTraceString(const CodeDef::Trace& trace) {
}
return ntrace;
}
// StringTable maps each string to an id.
class StringTable {
public:
StringTable() {
// Pprof requires first entry in string_table to be ''.
string_id_[""] = 0;
all_strings_.push_back("");
}
// Returns the index of a string. If not found, inserts the string and
// return the inserted index.
uint64 GetIndex(const string& str) {
auto idx = string_id_.find(str);
if (idx != string_id_.end()) {
return idx->second;
}
all_strings_.push_back(str);
return string_id_.insert(std::pair<string, int64>(str, string_id_.size()))
.first->second;
}
const std::vector<string>& strings() const { return all_strings_; }
private:
std::map<string, uint64> string_id_;
std::vector<string> all_strings_;
};
// FunctionTable maps each function to an id.
class FunctionTable {
public:
explicit FunctionTable(StringTable* string_table)
: string_table_(string_table) {}
// Returns the index of a function. If not found, adds a function proto
// and returns the function index.
uint64 GetIndex(const string& file_path, const string& func_name,
uint64 func_start_line) {
auto key = std::tuple<string, string, uint64>(file_path, func_name,
func_start_line);
auto idx = function_table_.find(key);
if (idx != function_table_.end()) {
return idx->second.id();
}
pprof::Function* func_pb = &function_table_[key];
// function index should start from 1.
func_pb->set_id(function_table_.size());
func_pb->set_name(string_table_->GetIndex(func_name));
func_pb->set_filename(string_table_->GetIndex(file_path));
func_pb->set_start_line(func_start_line);
return func_pb->id();
}
const std::map<std::tuple<string, string, uint64>, pprof::Function>&
functions() const {
return function_table_;
}
private:
StringTable* string_table_;
std::map<std::tuple<string, string, uint64>, pprof::Function> function_table_;
};
// LocationTable maps each function call to an id.
class LocationTable {
public:
explicit LocationTable(FunctionTable* function_table)
: function_table_(function_table) {}
// Returns the index of a function call localtion. If not found, adds a
// location proto and returns the location index.
uint64 GetIndex(const string& file_path, uint64 line_number,
const string& called_function_name,
const string& called_file_path,
uint64 called_func_start_line) {
auto key = std::tuple<string, string, uint64>(
file_path, called_function_name, line_number);
auto idx = location_table_.find(key);
if (idx != location_table_.end()) {
return idx->second.id();
}
pprof::Location* location_pb = &location_table_[key];
location_pb->set_id(location_table_.size());
pprof::Line* line_pb = location_pb->add_line();
line_pb->set_function_id(function_table_->GetIndex(
called_file_path, called_function_name, called_func_start_line));
line_pb->set_line(line_number);
return location_pb->id();
}
const std::map<std::tuple<string, string, uint64>, pprof::Location>&
locations() const {
return location_table_;
}
private:
FunctionTable* function_table_;
std::map<std::tuple<string, string, uint64>, pprof::Location> location_table_;
};
// Samples stores samples of all calls. A sample is a single call trace,
// that is, the call path from top caller to the leaf callee.
class Samples {
public:
explicit Samples(StringTable* string_table, const Options* opts)
: string_table_(string_table), opts_(opts) {}
// 'node' is the leaf of the displayed trace. It includes all graph nodes
// created by it. 'location_ids' contains
// the call stack, from callee to caller.
// This method adds the statistics of graph nodes created by the python
// call.
void Add(const CodeNode* node, const std::vector<uint64>& location_ids) {
// displayed leaf might not be true leaf. Retrive the true leaves for
// stats.
std::vector<const CodeNode*> all_leaf = FetchAllLeaf(node);
CHECK(!all_leaf.empty()) << node->name();
for (const CodeNode* cn : all_leaf) {
for (auto gn_it : cn->node->graph_nodes()) {
const TFGraphNode* gn = gn_it.second;
pprof::Sample* sample_pb = &sample_table_[gn->name()];
for (uint64 id : location_ids) {
sample_pb->mutable_location_id()->Add(id);
}
pprof::Label* label_pb = sample_pb->mutable_label()->Add();
label_pb->set_key(string_table_->GetIndex("node_name"));
label_pb->set_str(string_table_->GetIndex(gn->name()));
sample_pb->mutable_value()->Add(1);
string type = *opts_->select.begin();
if (type == kShown[1]) {
sample_pb->mutable_value()->Add(gn->exec_micros(node->node->step()));
} else if (type == kShown[9]) {
sample_pb->mutable_value()->Add(
gn->accelerator_exec_micros(node->node->step()));
} else if (type == kShown[10]) {
sample_pb->mutable_value()->Add(
gn->cpu_exec_micros(node->node->step()));
} else if (type == kShown[0]) {
sample_pb->mutable_value()->Add(
gn->requested_bytes(node->node->step()));
} else if (type == kShown[2]) {
sample_pb->mutable_value()->Add(gn->parameters());
} else if (type == kShown[3]) {
sample_pb->mutable_value()->Add(gn->float_ops(node->node->step()));
} else {
fprintf(stderr, "pprof doesn't support -select=%s\n", type.c_str());
}
}
}
}
const std::map<string, pprof::Sample>& samples() const {
return sample_table_;
}
private:
std::vector<const CodeNode*> FetchAllLeaf(const CodeNode* root) {
if (root->children.empty()) {
return {root};
}
std::vector<const CodeNode*> ret;
for (auto& n : root->children) {
std::vector<const CodeNode*> nodes = FetchAllLeaf(n);
ret.insert(ret.end(), nodes.begin(), nodes.end());
}
return ret;
}
StringTable* string_table_;
const Options* opts_;
std::map<string, pprof::Sample> sample_table_;
};
class PprofProfileImpl : public PprofProfile {
public:
explicit PprofProfileImpl(const Options* opts)
: opts_(opts),
func_table_(new FunctionTable(&string_table_)),
loc_table_(new LocationTable(func_table_.get())),
samples_(new Samples(&string_table_, opts)) {}
uint64 AddLocation(const CodeNode* callee, const CodeNode* caller) override {
const string& file_path = caller->trace->file();
uint64 lineno = caller->trace->lineno();
const string& callee_file_path = callee->trace->file();
const string& callee_function = callee->trace->function();
uint64 callee_func_start_line = callee->trace->func_start_line();
return loc_table_->GetIndex(file_path, lineno, callee_function,
callee_file_path, callee_func_start_line);
}
void AddSample(const CodeNode* leaf, std::vector<uint64>* call_ids) override {
std::vector<uint64> reversed_call_ids;
std::reverse_copy(call_ids->begin(), call_ids->end(),
std::back_inserter(reversed_call_ids));
samples_->Add(leaf, reversed_call_ids);
}
Status WritePprofProfile(const string& filename) override {
pprof::Profile profile_pb;
Build(&profile_pb);
std::unique_ptr<WritableFile> file;
Status s = Env::Default()->NewWritableFile(filename, &file);
if (!s.ok()) return s;
int32 buf_size = 1024 * 1024;
io::ZlibOutputBuffer* zlib_output_buffer = new io::ZlibOutputBuffer(
file.get(), buf_size, buf_size, io::ZlibCompressionOptions::GZIP());
s = zlib_output_buffer->Init();
if (!s.ok()) return s;
s = zlib_output_buffer->Append(profile_pb.SerializeAsString());
if (!s.ok()) return s;
s = zlib_output_buffer->Close();
if (!s.ok()) return s;
fprintf(stdout, "\nRun pprof -png --nodecount=20 --sample_index=1 <%s>\n",
filename.c_str());
return s;
}
private:
void Build(pprof::Profile* profile_pb) {
string sample_type_description = "count";
auto sample_type = profile_pb->mutable_sample_type()->Add();
sample_type->set_type(string_table_.GetIndex(sample_type_description));
sample_type->set_unit(string_table_.GetIndex("count"));
string type = *opts_->select.begin();
sample_type_description = type;
sample_type = profile_pb->mutable_sample_type()->Add();
sample_type->set_type(string_table_.GetIndex(sample_type_description));
if (type == kShown[1] || type == kShown[9] || type == kShown[10]) {
sample_type->set_unit(string_table_.GetIndex("microseconds"));
if (type == kShown[1]) {
profile_pb->mutable_comment()->Add(string_table_.GetIndex(
"Sum of accelerator execution time and cpu execution time."));
} else if (type == kShown[9]) {
profile_pb->mutable_comment()->Add(
string_table_.GetIndex("Accelerator execution time."));
} else if (type == kShown[10]) {
profile_pb->mutable_comment()->Add(
string_table_.GetIndex("CPU execution time."));
}
} else if (type == kShown[0]) {
sample_type->set_unit(string_table_.GetIndex("bytes"));
profile_pb->mutable_comment()->Add(
string_table_.GetIndex("Sum of operation output memory."));
} else if (type == kShown[2]) {
sample_type->set_unit(string_table_.GetIndex("count"));
profile_pb->mutable_comment()->Add(
string_table_.GetIndex("Model parameters."));
} else if (type == kShown[3]) {
sample_type->set_unit(string_table_.GetIndex("count"));
profile_pb->mutable_comment()->Add(string_table_.GetIndex(
"Model float operations (Only available if defined)."));
} else {
fprintf(stderr, "pprof doesn't support selecting: %s\n", type.c_str());
}
for (const string& str : string_table_.strings()) {
*profile_pb->mutable_string_table()->Add() = str;
}
for (const auto& sample_it : samples_->samples()) {
// TODO(xpan): Consider swap.
profile_pb->mutable_sample()->Add()->MergeFrom(sample_it.second);
}
for (const auto& function_it : func_table_->functions()) {
profile_pb->mutable_function()->Add()->MergeFrom(function_it.second);
}
for (const auto& location_it : loc_table_->locations()) {
profile_pb->mutable_location()->Add()->MergeFrom(location_it.second);
}
}
const Options* opts_;
StringTable string_table_;
std::unique_ptr<FunctionTable> func_table_;
std::unique_ptr<LocationTable> loc_table_;
std::unique_ptr<Samples> samples_;
};
} // namespace
void TFCode::AddNode(TFGraphNode* node) {
if (node->code().traces_size() == 0) {
return;
}
TFMultiGraphNode* pre_trace_node = nullptr;
if (!root_) {
graph_root_.reset(new TFMultiGraphNode(kTFProfRoot));
root_.reset(new CodeNode(graph_root_.get(), nullptr));
}
CodeNode* pre_code_node = root_.get();
// TODO(xpan): Consider to release CodeDef after TFCode is built. It
// takes a lot of memory.
for (int i = 0; i < node->code().traces_size(); ++i) {
// Unlike op name, which is globally unique, trace name is only unique
// w.r.t. it's parent.
const string& trace = GetTraceString(node->code().traces(i));
if (i == 0) {
if (!trace_root_) {
trace_root_.reset(new TFMultiGraphNode(trace));
}
CHECK(trace_root_->name() == trace) << "Different trace root";
pre_trace_node = trace_root_.get();
continue;
}
pre_trace_node->AddChildren(trace);
TFMultiGraphNode* trace_node = pre_trace_node->children().at(trace).get();
pre_code_node = pre_code_node->AddChildren(trace, &node->code().traces(i));
if (i == node->code().traces_size() - 1) {
trace_node->AddGraphNode(node);
pre_code_node->node->AddGraphNode(node);
}
pre_trace_node = trace_node;
}
}
void TFCode::Build() {
if (root_) {
return;
}
tfprof_trace_root_.reset(new TFMultiGraphNode(kTFProfRoot));
root_.reset(new CodeNode(tfprof_trace_root_.get()));
if (trace_root_) {
code_root_ = BuildCodeNodes(trace_root_.get());
root_->children.push_back(code_root_);
}
}
CodeNode* TFCode::BuildCodeNodes(TFMultiGraphNode* root) {
auto code_root = std::unique_ptr<CodeNode>(new CodeNode(root));
CodeNode* code_root_ptr = code_root.get();
code_nodes_.insert(std::move(code_root));
for (auto it = root->children().cbegin(); it != root->children().cend();
++it) {
code_root_ptr->children.push_back(BuildCodeNodes(it->second.get()));
}
return code_root_ptr;
}
const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
Timeline* timeline) {
std::vector<CodeNode*> roots = Account(root_->children, opts);
root_->ResetTotalStats();
if (opts.output_type == kOutput[3]) {
if (opts.select.size() != 1) {
fprintf(stderr, "Can only select 1 attribute for pprof output.\n");
return root_.get();
}
string select = *opts.select.begin();
if (select != kShown[0] && select != kShown[1] && select != kShown[2] &&
select != kShown[3] && select != kShown[9] && select != kShown[10]) {
fprintf(stderr, "pprof doesn't support -select=%s\n", select.c_str());
return root_.get();
}
}
std::vector<CodeNode*> roots = Account(root_->children, opts);
root_->show_children.clear();
for (CodeNode* n : roots) {
root_->AggregateTotalStats(n);
@ -121,21 +392,46 @@ const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
CodeNode* root = PrintScope({root_.get()}, opts, 1, 0)[0];
root->formatted_str = FormatLegend(opts) + root->formatted_str;
Format(root->show_children, &root->formatted_str, root->mutable_proto());
if (timeline) {
timeline->GenerateCodeTimeline(root);
if (opts.output_type == kOutput[3]) {
std::vector<uint64> call_ids;
pprof_profile_.reset(new PprofProfileImpl(&opts));
Format(root, root->show_children, opts, &root->formatted_str,
root->mutable_proto(), &call_ids);
Status s = pprof_profile_->WritePprofProfile(
opts.output_options.at(kPprofOpts[0]));
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
} else {
Format(root, root->show_children, opts, &root->formatted_str,
root->mutable_proto(), nullptr);
if (timeline) {
timeline->GenerateCodeTimeline(root);
}
}
return root;
}
void TFCode::Format(const std::vector<CodeNode*> roots, string* display_str,
MultiGraphNodeProto* proto) {
for (CodeNode* node : roots) {
void TFCode::Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
const Options& opts, string* display_str,
MultiGraphNodeProto* proto, std::vector<uint64>* call_ids) {
if (nodes.empty() && root->trace && opts.output_type == kOutput[3]) {
pprof_profile_->AddSample(root, call_ids);
}
for (CodeNode* node : nodes) {
if (root->trace && opts.output_type == kOutput[3]) {
uint64 loc_id = pprof_profile_->AddLocation(node, root);
call_ids->push_back(loc_id);
}
display_str->append(node->formatted_str);
MultiGraphNodeProto* child = proto->add_children();
child->MergeFrom(node->proto());
Format(node->show_children, display_str, child);
Format(node, node->show_children, opts, display_str, child, call_ids);
if (root->trace && opts.output_type == kOutput[3]) {
call_ids->pop_back();
}
}
}
@ -170,14 +466,15 @@ std::vector<CodeNode*> TFCode::PrintScope(const std::vector<CodeNode*> roots,
std::vector<CodeNode*> show_nodes;
for (CodeNode* node : roots) {
if (ShouldTrim(node, opts.trim_name_regexes) || depth > opts.max_depth) {
continue;
}
int ident = last_ident;
bool show = ShouldShow(node, opts, depth);
if (show) ident += 2;
std::vector<CodeNode*> show_cnodes;
if (!ShouldTrim(node, opts.trim_name_regexes) && depth <= opts.max_depth) {
show_cnodes = PrintScope(node->show_children, opts, depth + 1, ident);
}
std::vector<CodeNode*> show_cnodes =
PrintScope(node->show_children, opts, depth + 1, ident);
if (show) {
node->show_children.clear();
if (opts.account_displayed_op_only) {

View File

@ -32,15 +32,29 @@ limitations under the License.
#include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
#include "tensorflow/core/profiler/internal/tfprof_timeline.h"
#include "tensorflow/core/profiler/internal/tfprof_utils.h"
#include "tensorflow/core/profiler/profile.pb.h"
#include "tensorflow/core/profiler/tfprof_log.pb.h"
#include "tensorflow/core/profiler/tfprof_output.pb.h"
namespace tensorflow {
namespace tfprof {
class PprofProfile {
public:
virtual ~PprofProfile() {}
virtual uint64 AddLocation(const CodeNode* callee,
const CodeNode* caller) = 0;
virtual void AddSample(const CodeNode* leaf,
std::vector<uint64>* call_ids) = 0;
virtual Status WritePprofProfile(const string& filename) = 0;
};
class TFCode : public TFMultiShow {
public:
explicit TFCode() : code_root_(nullptr), trace_root_(nullptr) {}
TFCode() {}
~TFCode() override {}
void AddNode(TFGraphNode* node) override;
@ -48,8 +62,6 @@ class TFCode : public TFMultiShow {
void Build() override;
private:
CodeNode* BuildCodeNodes(TFMultiGraphNode* root);
const ShowMultiNode* ShowInternal(const Options& opts,
Timeline* timeline) override;
@ -63,16 +75,15 @@ class TFCode : public TFMultiShow {
std::vector<CodeNode*> Account(const std::vector<CodeNode*>& roots,
const Options& opts);
void Format(const std::vector<CodeNode*> roots, string* display_str,
MultiGraphNodeProto* proto);
void Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
const Options& opts, string* display_str,
MultiGraphNodeProto* proto, std::vector<uint64>* call_ids);
string FormatNode(CodeNode* node, const Options& opts, int64 indent);
std::unique_ptr<CodeNode> root_;
CodeNode* code_root_;
std::unique_ptr<TFMultiGraphNode> trace_root_;
std::unique_ptr<TFMultiGraphNode> tfprof_trace_root_;
std::set<std::unique_ptr<CodeNode>> code_nodes_;
std::unique_ptr<TFMultiGraphNode> graph_root_;
std::unique_ptr<PprofProfile> pprof_profile_;
};
} // namespace tfprof
} // namespace tensorflow

View File

@ -72,6 +72,11 @@ void TFGraph::Build() {
const ShowNode* TFGraph::ShowInternal(const Options& opts, Timeline* timeline) {
root_->ResetTotalStats();
root_->show_children.clear();
if (opts.output_type == kOutput[3]) {
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
return root_;
}
if (timeline && timeline->step() < 0) {
// TODO(xpan): Maybe pick a default step for users.
fprintf(stderr,

View File

@ -340,6 +340,27 @@ class TFGraphNode {
return exec->second.allocator_bytes_in_use();
}
int64 parameters() const {
if (!shape().empty()) {
int64 params = 1;
bool complete_shape = true;
for (int64 d : shape()) {
// Sometimes parameters could be <0 when a dim is unknown.
if (d < 0) {
complete_shape = false;
break;
}
params *= d;
}
if (complete_shape) {
return params;
} else {
fprintf(stderr, "Incomplete shape.\n");
}
}
return 0;
}
int64 float_ops(int64 step) const {
// If not run, return static analysis.
if (execs_.empty()) {
@ -400,12 +421,14 @@ class TFMultiGraphNode {
public:
TFMultiGraphNode(const string& name)
: name_(name),
step_(-1),
run_count_(0),
exec_micros_(0),
accelerator_exec_micros_(0),
cpu_exec_micros_(0),
requested_bytes_(0),
float_ops_(0) {}
float_ops_(0),
parameters_(0) {}
bool SnapshotNodes(int64 step, const std::vector<string>& type_regexes) {
run_count_ = 0;
@ -415,11 +438,13 @@ class TFMultiGraphNode {
requested_bytes_ = 0;
float_ops_ = 0;
parameters_ = 0;
op_types_.clear();
shapes_.clear();
devices_.clear();
snapshot_nodes_.clear();
step_ = step;
std::vector<const TFGraphNode*> nodes = pick_nodes(type_regexes);
if (nodes.empty()) {
@ -436,6 +461,7 @@ class TFMultiGraphNode {
requested_bytes_ += node->requested_bytes(step);
float_ops_ += node->float_ops(step);
parameters_ += node->parameters();
if (node->shape().size() > 0) {
shapes_.push_back(node->shape());
}
@ -445,6 +471,8 @@ class TFMultiGraphNode {
return true;
}
int64 step() const { return step_; }
void AddGraphNode(const TFGraphNode* node) {
if (nodes_.find(node->name()) != nodes_.end()) {
return;
@ -456,16 +484,6 @@ class TFMultiGraphNode {
return snapshot_nodes_;
}
void AddChildren(const string& name) {
if (children_.find(name) != children_.end()) {
return;
}
children_[name].reset(new TFMultiGraphNode(name));
}
const std::map<string, std::unique_ptr<TFMultiGraphNode>>& children() const {
return children_;
}
const string& name() const { return name_; }
int64 run_count() const { return run_count_; }
@ -477,6 +495,8 @@ class TFMultiGraphNode {
int64 float_ops() const { return float_ops_; }
int64 parameters() const { return parameters_; }
const std::set<string>& devices() const { return devices_; }
const std::set<string>& op_types() const { return op_types_; }
@ -511,6 +531,7 @@ class TFMultiGraphNode {
}
const string name_;
int64 step_;
// Snapshot based on type_regexes
std::set<string> op_types_;
int64 run_count_;
@ -520,13 +541,13 @@ class TFMultiGraphNode {
int64 requested_bytes_;
int64 float_ops_;
int64 parameters_;
std::set<string> devices_;
std::vector<std::vector<int64>> shapes_;
std::map<string, const TFGraphNode*> snapshot_nodes_;
// Overall data held by the TFMultiGraphNode.
std::map<string, const TFGraphNode*> nodes_;
std::map<string, std::unique_ptr<TFMultiGraphNode>> children_;
};
bool IsPlacedOnAccelerator(const string& device);

View File

@ -45,25 +45,7 @@ void ShowNode::ReInit(int64 step) {
(*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom(
VecToShapeProto(inp.second));
}
proto_.clear_parameters();
if (!node->shape().empty()) {
int64 params = 1;
bool complete_shape = true;
for (int64 d : node->shape()) {
// Sometimes parameters could be <0 when a dim is unknown.
if (d < 0) {
complete_shape = false;
break;
}
params *= d;
}
if (complete_shape) {
mutable_proto()->set_parameters(proto_.parameters() + params);
} else {
fprintf(stderr, "Incomplete shape.");
}
}
proto_.set_parameters(node->parameters());
}
GraphNodeProto* ShowNode::mutable_proto() { return &proto_; }
@ -114,6 +96,8 @@ void ShowNode::AddSelfToTotalStats() {
}
void ShowNode::ResetTotalStats() {
formatted_str.clear();
mutable_proto()->set_total_definition_count(0);
mutable_proto()->set_total_run_count(0);
mutable_proto()->set_total_exec_micros(0);
@ -153,26 +137,7 @@ bool ShowMultiNode::ReInit(int64 step,
mutable_proto()->set_requested_bytes(node->requested_bytes());
mutable_proto()->set_float_ops(node->float_ops());
mutable_proto()->clear_parameters();
if (!node->shapes().empty()) {
for (const std::vector<int64>& shape : node->shapes()) {
int64 params = 1;
bool complete_shape = true;
for (int64 d : shape) {
// Sometimes parameters could be <0 when a dim is unknown.
if (d < 0) {
complete_shape = false;
break;
}
params *= d;
}
if (complete_shape) {
mutable_proto()->set_parameters(proto().parameters() + params);
} else {
fprintf(stderr, "Incomplete shape.");
}
}
}
mutable_proto()->set_parameters(node->parameters());
return has_matched_type;
}
@ -216,6 +181,7 @@ void ShowMultiNode::AddSelfToTotalStats() {
}
void ShowMultiNode::ResetTotalStats() {
formatted_str.clear();
mutable_proto()->set_total_exec_micros(0);
mutable_proto()->set_total_accelerator_exec_micros(0);
mutable_proto()->set_total_cpu_exec_micros(0);

View File

@ -111,11 +111,31 @@ class ShowMultiNode {
class CodeNode : public ShowMultiNode {
public:
explicit CodeNode(TFMultiGraphNode* node) : ShowMultiNode(node) {}
explicit CodeNode(TFMultiGraphNode* node, const CodeDef::Trace* trace)
: ShowMultiNode(node), trace(trace) {}
~CodeNode() override {}
CodeNode* AddChildren(const string& name, const CodeDef::Trace* trace) {
auto it = children_.find(name);
if (it != children_.end()) {
return it->second.get();
}
graph_children_.push_back(
std::unique_ptr<TFMultiGraphNode>(new TFMultiGraphNode(name)));
auto child = &children_[name];
child->reset(new CodeNode(graph_children_.back().get(), trace));
children.push_back(child->get());
return child->get();
}
const CodeDef::Trace* trace;
std::vector<CodeNode*> children;
std::vector<CodeNode*> show_children;
private:
std::vector<std::unique_ptr<TFMultiGraphNode>> graph_children_;
std::map<string, std::unique_ptr<CodeNode>> children_;
};
class OpNode : public ShowMultiNode {

View File

@ -103,8 +103,13 @@ void TFOp::Build() {
}
const ShowMultiNode* TFOp::ShowInternal(const Options& opts,
Timeline* timeline) {
Timeline* timeline) {
root_->ResetTotalStats();
if (opts.output_type == kOutput[3]) {
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
return root_.get();
}
if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) {
root_->formatted_str = FormatNode(root_.get(), root_.get(), opts);
}

View File

@ -84,6 +84,13 @@ tensorflow::Status ParseOutput(const string& output_opt, string* output_type,
required_options.insert(kFileRequiredOpts,
kFileRequiredOpts + sizeof(kFileRequiredOpts) /
sizeof(*kFileRequiredOpts));
} else if (*output_type == kOutput[3]) {
valid_options.insert(kPprofOpts,
kPprofOpts + sizeof(kPprofOpts) / sizeof(*kPprofOpts));
required_options.insert(
kPprofRequiredOpts,
kPprofRequiredOpts +
sizeof(kPprofRequiredOpts) / sizeof(*kPprofRequiredOpts));
}
for (const string& kv_str : kv_split) {

View File

@ -62,7 +62,8 @@ static const char* const kCmds[] = {
"scope", "graph", "code", "op", "advise", "set", "help",
};
static const char* const kOutput[] = {"timeline", "stdout", "file", "none"};
static const char* const kOutput[] = {"timeline", "stdout", "file", "pprof",
"none"};
static const char* const kTimelineOpts[] = {
"outfile",
@ -78,6 +79,14 @@ static const char* const kFileRequiredOpts[] = {
"outfile",
};
static const char* const kPprofOpts[] = {
"outfile",
};
static const char* const kPprofRequiredOpts[] = {
"outfile",
};
struct Options {
public:
static tensorflow::Status FromProtoStr(const string& opts_proto_str,

View File

@ -78,8 +78,13 @@ void TFScope::Build() {
}
const ShowNode* TFScope::ShowInternal(const Options& opts, Timeline* timeline) {
std::vector<ScopeNode*> roots = Account(root_->children, opts);
root_->ResetTotalStats();
if (opts.output_type == kOutput[3]) {
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
return root_;
}
std::vector<ScopeNode*> roots = Account(root_->children, opts);
root_->show_children.clear();
for (ScopeNode* n : roots) {
root_->AggregateTotalStats(n);

View File

@ -26,25 +26,27 @@ namespace tensorflow {
namespace tfprof {
const GraphNodeProto& TFShow::Show(const Options& opts) {
if (opts.output_type == kOutput[3]) {
return ShowInternal(opts, nullptr)->proto();
} else if (opts.output_type == kOutput[0]) {
if (opts.output_type == kOutput[0]) {
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
return ShowInternal(opts, &timeline)->proto();
} else if (opts.output_type == kOutput[2]) {
const ShowNode* root = ShowInternal(opts, nullptr);
Status s =
WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
root->formatted_str);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
return root->proto();
} else {
const ShowNode* root = ShowInternal(opts, nullptr);
printf("%s", root->formatted_str.c_str());
fflush(stdout);
return root->proto();
const ShowNode* ret = ShowInternal(opts, nullptr);
if (opts.output_type == kOutput[1]) {
printf("%s", ret->formatted_str.c_str());
fflush(stdout);
} else if (opts.output_type == kOutput[2]) {
Status s = WriteStringToFile(Env::Default(),
opts.output_options.at(kFileOpts[0]),
ret->formatted_str);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
} else if (opts.output_type == kOutput[3] ||
opts.output_type == kOutput[4]) {
} else {
fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
}
return ret->proto();
}
}

View File

@ -28,25 +28,27 @@ namespace tensorflow {
namespace tfprof {
const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) {
if (opts.output_type == kOutput[3]) {
return ShowInternal(opts, nullptr)->proto();
} else if (opts.output_type == kOutput[0]) {
if (opts.output_type == kOutput[0]) {
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
return ShowInternal(opts, &timeline)->proto();
} else if (opts.output_type == kOutput[2]) {
const ShowMultiNode* root = ShowInternal(opts, nullptr);
Status s =
WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
root->formatted_str);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
return root->proto();
} else {
const ShowMultiNode* root = ShowInternal(opts, nullptr);
printf("%s", root->formatted_str.c_str());
fflush(stdout);
return root->proto();
const ShowMultiNode* ret = ShowInternal(opts, nullptr);
if (opts.output_type == kOutput[1]) {
printf("%s", ret->formatted_str.c_str());
fflush(stdout);
} else if (opts.output_type == kOutput[2]) {
Status s = WriteStringToFile(Env::Default(),
opts.output_options.at(kFileOpts[0]),
ret->formatted_str);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
} else if (opts.output_type == kOutput[3] ||
opts.output_type == kOutput[4]) {
} else {
fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
}
return ret->proto();
}
}

View File

@ -0,0 +1,71 @@
// This proto intends to match format expected by pprof tool.
syntax = "proto3";
package tensorflow.tfprof.pprof;
message Profile {
repeated ValueType sample_type = 1;
repeated Sample sample = 2;
repeated Mapping mapping = 3;
repeated Location location = 4;
repeated Function function = 5;
repeated string string_table = 6;
int64 drop_frames = 7;
int64 keep_frames = 8;
int64 time_nanos = 9;
int64 duration_nanos = 10;
ValueType period_type = 11;
int64 period = 12;
repeated int64 comment = 13;
int64 default_sample_type = 14;
}
message ValueType {
int64 type = 1;
int64 unit = 2;
}
message Sample {
repeated uint64 location_id = 1;
repeated int64 value = 2;
repeated Label label = 3;
}
message Label {
int64 key = 1;
int64 str = 2;
int64 num = 3;
}
message Mapping {
uint64 id = 1;
uint64 memory_start = 2;
uint64 memory_limit = 3;
uint64 file_offset = 4;
int64 filename = 5;
int64 build_id = 6;
bool has_functions = 7;
bool has_filenames = 8;
bool has_line_numbers = 9;
bool has_inline_frames = 10;
}
message Location {
uint64 id = 1;
uint64 mapping_id = 2;
uint64 address = 3;
repeated Line line = 4;
}
message Line {
uint64 function_id = 1;
int64 line = 2;
}
message Function {
uint64 id = 1;
int64 name = 2;
int64 system_name = 3;
int64 filename = 4;
int64 start_line = 5;
}

View File

@ -10,6 +10,7 @@ message CodeDef {
int32 lineno = 2;
string function = 3;
string line = 4;
int32 func_start_line = 5;
}
}

View File

@ -17,7 +17,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import io
import os
from tensorflow.core.profiler import profile_pb2
from tensorflow.core.protobuf import config_pb2
from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.client import session
@ -200,7 +204,6 @@ class PrintModelAnalysisTest(test.TestCase):
def testTimeline(self):
ops.reset_default_graph()
opts = builder.trainable_variables_parameter()
outfile = os.path.join(test.get_temp_dir(), 'timeline')
opts = (builder(builder.trainable_variables_parameter())
.with_max_depth(100000)
@ -312,6 +315,61 @@ class PrintModelAnalysisTest(test.TestCase):
checker = advice_pb.checkers['ExpensiveOperationChecker']
self.assertGreater(len(checker.reports), 0)
def pprof_test_helper(self, attribute, should_fail=False):
ops.reset_default_graph()
outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz')
opts = (builder(builder.time_and_memory())
.select([attribute])
.with_max_depth(100000)
.with_node_names(trim_name_regexes=['ops.py.*'])
.with_pprof_output(outfile).build())
with session.Session() as sess:
x = lib.BuildFullModel()
sess.run(variables.global_variables_initializer())
run_meta = config_pb2.RunMetadata()
_ = sess.run(
x,
options=config_pb2.RunOptions(
trace_level=config_pb2.RunOptions.FULL_TRACE),
run_metadata=run_meta)
_ = model_analyzer.profile(
sess.graph, run_meta, cmd='code', options=opts)
if should_fail:
self.assertFalse(gfile.Exists(outfile))
return
profile_pb = profile_pb2.Profile()
with gfile.Open(outfile, 'rb') as f:
with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf:
profile_pb.ParseFromString(gzipf.read())
self.assertGreater(len(profile_pb.sample), 10)
self.assertGreater(len(profile_pb.location), 10)
self.assertGreater(len(profile_pb.function), 10)
self.assertGreater(len(profile_pb.string_table), 30)
has_rnn = False
has_loop = False
for s in profile_pb.string_table:
if s.find('rnn') > 0:
has_rnn = True
if s.find('while') > 0:
has_loop = True
self.assertFalse(s.startswith('ops.py'))
self.assertTrue(has_rnn)
self.assertTrue(has_loop)
def testPprof(self):
for attr in ['micros', 'bytes', 'accelerator_micros', 'cpu_micros',
'params', 'float_ops']:
self.pprof_test_helper(attr)
for attr in ['op_types', 'device', 'input_shapes']:
self.pprof_test_helper(attr, True)
if __name__ == '__main__':
test.main()

View File

@ -353,6 +353,20 @@ class ProfileOptionBuilder(object):
self._options['output'] = 'timeline:outfile=%s' % timeline_file
return self
def with_pprof_output(self, pprof_file):
"""Generate a pprof profile gzip file.
To use the pprof file:
pprof -png --nodecount=20 --sample_index=1 <pprof_file>
Args:
pprof_file: filename for output, usually suffixed with .pb.gz.
Returns:
self.
"""
self._options['output'] = 'pprof:outfile=%s' % pprof_file
return self
def order_by(self, attribute):
# pylint: disable=line-too-long
"""Order the displayed profiler nodes based on a attribute.

View File

@ -98,12 +98,13 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
add_entry = True
if add_trace:
for tb in op.traceback:
for tb in op.traceback_with_start_lines:
trace = entry.code_def.traces.add()
trace.file = tb[0] if tb[0] else 'none'
trace.lineno = tb[1] if tb[1] else -1
trace.function = tb[2] if tb[2] else 'none'
trace.line = tb[3] if tb[3] else 'none'
trace.func_start_line = tb[4] if tb[4] else -1
add_entry = True
if add_entry:

View File

@ -74,6 +74,10 @@ tf_class {
name: "with_node_names"
argspec: "args=[\'self\', \'start_name_regexes\', \'show_name_regexes\', \'hide_name_regexes\', \'trim_name_regexes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
}
member_method {
name: "with_pprof_output"
argspec: "args=[\'self\', \'pprof_file\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "with_stdout_output"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"