Add support to generate pprof results to tf.profiler
A fun thing is, it can not only profile time,memory but also parameters, etc. PiperOrigin-RevId: 163767517
This commit is contained in:
parent
78a90370ef
commit
edac90c7c7
@ -181,6 +181,7 @@ seq2seq_attention_model.py:363:build_graph:self._add_train_o..., cpu: 1.28sec, a
|
||||
|
||||
### Visualize time and memory.
|
||||
```
|
||||
# The following example generates a timeline.
|
||||
tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile=<filename>
|
||||
|
||||
generating trace file.
|
||||
@ -191,9 +192,29 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
|
||||
******************************************************
|
||||
```
|
||||
<left>
|
||||
[CodeTimeline](g3doc/graph_timeline.png)
|
||||
[Timeline](g3doc/graph_timeline.png)
|
||||
</left>
|
||||
|
||||
```
|
||||
# The following example generates a pprof graph (only supported by code view).
|
||||
# Since TensorFlow runs the graph instead of Python code, the pprof graph
|
||||
# doesn't profile the statistics of Python, but the TensorFlow graph
|
||||
# nodes created by the Python call stack.
|
||||
# Nevertheless, it pops critical Python code path for us.
|
||||
#
|
||||
# `-trim_name_regexes` trims the python call stack, which are always the same
|
||||
# for the leaves.
|
||||
# `-select accelerator_micros` pick accelerator time for pprof graph. User
|
||||
# can also generate memory profile using `-select bytes`
|
||||
tfprof> code -max_depth 100 -trim_name_regexes '^ops.py.*' -select accelerator_micros -output pprof:outfile=<filename>
|
||||
|
||||
# Use pprof to visualize the generated file.
|
||||
pprof -png --nodecount=20 --sample_index=1 <filename>
|
||||
```
|
||||
|
||||
<left>
|
||||
[PprofGraph](g3doc/pprof.jpg)
|
||||
</left>
|
||||
|
||||
### Feature Request and Bug Report
|
||||
|
||||
|
BIN
tensorflow/core/profiler/g3doc/pprof.jpg
Normal file
BIN
tensorflow/core/profiler/g3doc/pprof.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 261 KiB |
@ -21,6 +21,8 @@ limitations under the License.
|
||||
#include "tensorflow/c/c_api.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/lib/io/zlib_compression_options.h"
|
||||
#include "tensorflow/core/lib/io/zlib_outputbuffer.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
@ -46,66 +48,335 @@ string GetTraceString(const CodeDef::Trace& trace) {
|
||||
}
|
||||
return ntrace;
|
||||
}
|
||||
|
||||
// StringTable maps each string to an id.
|
||||
class StringTable {
|
||||
public:
|
||||
StringTable() {
|
||||
// Pprof requires first entry in string_table to be ''.
|
||||
string_id_[""] = 0;
|
||||
all_strings_.push_back("");
|
||||
}
|
||||
|
||||
// Returns the index of a string. If not found, inserts the string and
|
||||
// return the inserted index.
|
||||
uint64 GetIndex(const string& str) {
|
||||
auto idx = string_id_.find(str);
|
||||
if (idx != string_id_.end()) {
|
||||
return idx->second;
|
||||
}
|
||||
all_strings_.push_back(str);
|
||||
return string_id_.insert(std::pair<string, int64>(str, string_id_.size()))
|
||||
.first->second;
|
||||
}
|
||||
|
||||
const std::vector<string>& strings() const { return all_strings_; }
|
||||
|
||||
private:
|
||||
std::map<string, uint64> string_id_;
|
||||
std::vector<string> all_strings_;
|
||||
};
|
||||
|
||||
// FunctionTable maps each function to an id.
|
||||
class FunctionTable {
|
||||
public:
|
||||
explicit FunctionTable(StringTable* string_table)
|
||||
: string_table_(string_table) {}
|
||||
|
||||
// Returns the index of a function. If not found, adds a function proto
|
||||
// and returns the function index.
|
||||
uint64 GetIndex(const string& file_path, const string& func_name,
|
||||
uint64 func_start_line) {
|
||||
auto key = std::tuple<string, string, uint64>(file_path, func_name,
|
||||
func_start_line);
|
||||
auto idx = function_table_.find(key);
|
||||
if (idx != function_table_.end()) {
|
||||
return idx->second.id();
|
||||
}
|
||||
pprof::Function* func_pb = &function_table_[key];
|
||||
// function index should start from 1.
|
||||
func_pb->set_id(function_table_.size());
|
||||
func_pb->set_name(string_table_->GetIndex(func_name));
|
||||
func_pb->set_filename(string_table_->GetIndex(file_path));
|
||||
func_pb->set_start_line(func_start_line);
|
||||
return func_pb->id();
|
||||
}
|
||||
|
||||
const std::map<std::tuple<string, string, uint64>, pprof::Function>&
|
||||
functions() const {
|
||||
return function_table_;
|
||||
}
|
||||
|
||||
private:
|
||||
StringTable* string_table_;
|
||||
std::map<std::tuple<string, string, uint64>, pprof::Function> function_table_;
|
||||
};
|
||||
|
||||
// LocationTable maps each function call to an id.
|
||||
class LocationTable {
|
||||
public:
|
||||
explicit LocationTable(FunctionTable* function_table)
|
||||
: function_table_(function_table) {}
|
||||
|
||||
// Returns the index of a function call localtion. If not found, adds a
|
||||
// location proto and returns the location index.
|
||||
uint64 GetIndex(const string& file_path, uint64 line_number,
|
||||
const string& called_function_name,
|
||||
const string& called_file_path,
|
||||
uint64 called_func_start_line) {
|
||||
auto key = std::tuple<string, string, uint64>(
|
||||
file_path, called_function_name, line_number);
|
||||
auto idx = location_table_.find(key);
|
||||
if (idx != location_table_.end()) {
|
||||
return idx->second.id();
|
||||
}
|
||||
pprof::Location* location_pb = &location_table_[key];
|
||||
location_pb->set_id(location_table_.size());
|
||||
pprof::Line* line_pb = location_pb->add_line();
|
||||
line_pb->set_function_id(function_table_->GetIndex(
|
||||
called_file_path, called_function_name, called_func_start_line));
|
||||
line_pb->set_line(line_number);
|
||||
return location_pb->id();
|
||||
}
|
||||
|
||||
const std::map<std::tuple<string, string, uint64>, pprof::Location>&
|
||||
locations() const {
|
||||
return location_table_;
|
||||
}
|
||||
|
||||
private:
|
||||
FunctionTable* function_table_;
|
||||
std::map<std::tuple<string, string, uint64>, pprof::Location> location_table_;
|
||||
};
|
||||
|
||||
// Samples stores samples of all calls. A sample is a single call trace,
|
||||
// that is, the call path from top caller to the leaf callee.
|
||||
class Samples {
|
||||
public:
|
||||
explicit Samples(StringTable* string_table, const Options* opts)
|
||||
: string_table_(string_table), opts_(opts) {}
|
||||
|
||||
// 'node' is the leaf of the displayed trace. It includes all graph nodes
|
||||
// created by it. 'location_ids' contains
|
||||
// the call stack, from callee to caller.
|
||||
// This method adds the statistics of graph nodes created by the python
|
||||
// call.
|
||||
void Add(const CodeNode* node, const std::vector<uint64>& location_ids) {
|
||||
// displayed leaf might not be true leaf. Retrive the true leaves for
|
||||
// stats.
|
||||
std::vector<const CodeNode*> all_leaf = FetchAllLeaf(node);
|
||||
CHECK(!all_leaf.empty()) << node->name();
|
||||
|
||||
for (const CodeNode* cn : all_leaf) {
|
||||
for (auto gn_it : cn->node->graph_nodes()) {
|
||||
const TFGraphNode* gn = gn_it.second;
|
||||
pprof::Sample* sample_pb = &sample_table_[gn->name()];
|
||||
for (uint64 id : location_ids) {
|
||||
sample_pb->mutable_location_id()->Add(id);
|
||||
}
|
||||
pprof::Label* label_pb = sample_pb->mutable_label()->Add();
|
||||
label_pb->set_key(string_table_->GetIndex("node_name"));
|
||||
label_pb->set_str(string_table_->GetIndex(gn->name()));
|
||||
|
||||
sample_pb->mutable_value()->Add(1);
|
||||
string type = *opts_->select.begin();
|
||||
if (type == kShown[1]) {
|
||||
sample_pb->mutable_value()->Add(gn->exec_micros(node->node->step()));
|
||||
} else if (type == kShown[9]) {
|
||||
sample_pb->mutable_value()->Add(
|
||||
gn->accelerator_exec_micros(node->node->step()));
|
||||
} else if (type == kShown[10]) {
|
||||
sample_pb->mutable_value()->Add(
|
||||
gn->cpu_exec_micros(node->node->step()));
|
||||
} else if (type == kShown[0]) {
|
||||
sample_pb->mutable_value()->Add(
|
||||
gn->requested_bytes(node->node->step()));
|
||||
} else if (type == kShown[2]) {
|
||||
sample_pb->mutable_value()->Add(gn->parameters());
|
||||
} else if (type == kShown[3]) {
|
||||
sample_pb->mutable_value()->Add(gn->float_ops(node->node->step()));
|
||||
} else {
|
||||
fprintf(stderr, "pprof doesn't support -select=%s\n", type.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const std::map<string, pprof::Sample>& samples() const {
|
||||
return sample_table_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<const CodeNode*> FetchAllLeaf(const CodeNode* root) {
|
||||
if (root->children.empty()) {
|
||||
return {root};
|
||||
}
|
||||
std::vector<const CodeNode*> ret;
|
||||
for (auto& n : root->children) {
|
||||
std::vector<const CodeNode*> nodes = FetchAllLeaf(n);
|
||||
ret.insert(ret.end(), nodes.begin(), nodes.end());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
StringTable* string_table_;
|
||||
const Options* opts_;
|
||||
std::map<string, pprof::Sample> sample_table_;
|
||||
};
|
||||
|
||||
class PprofProfileImpl : public PprofProfile {
|
||||
public:
|
||||
explicit PprofProfileImpl(const Options* opts)
|
||||
: opts_(opts),
|
||||
func_table_(new FunctionTable(&string_table_)),
|
||||
loc_table_(new LocationTable(func_table_.get())),
|
||||
samples_(new Samples(&string_table_, opts)) {}
|
||||
|
||||
uint64 AddLocation(const CodeNode* callee, const CodeNode* caller) override {
|
||||
const string& file_path = caller->trace->file();
|
||||
uint64 lineno = caller->trace->lineno();
|
||||
const string& callee_file_path = callee->trace->file();
|
||||
const string& callee_function = callee->trace->function();
|
||||
uint64 callee_func_start_line = callee->trace->func_start_line();
|
||||
|
||||
return loc_table_->GetIndex(file_path, lineno, callee_function,
|
||||
callee_file_path, callee_func_start_line);
|
||||
}
|
||||
|
||||
void AddSample(const CodeNode* leaf, std::vector<uint64>* call_ids) override {
|
||||
std::vector<uint64> reversed_call_ids;
|
||||
std::reverse_copy(call_ids->begin(), call_ids->end(),
|
||||
std::back_inserter(reversed_call_ids));
|
||||
samples_->Add(leaf, reversed_call_ids);
|
||||
}
|
||||
|
||||
Status WritePprofProfile(const string& filename) override {
|
||||
pprof::Profile profile_pb;
|
||||
Build(&profile_pb);
|
||||
|
||||
std::unique_ptr<WritableFile> file;
|
||||
Status s = Env::Default()->NewWritableFile(filename, &file);
|
||||
if (!s.ok()) return s;
|
||||
|
||||
int32 buf_size = 1024 * 1024;
|
||||
io::ZlibOutputBuffer* zlib_output_buffer = new io::ZlibOutputBuffer(
|
||||
file.get(), buf_size, buf_size, io::ZlibCompressionOptions::GZIP());
|
||||
s = zlib_output_buffer->Init();
|
||||
if (!s.ok()) return s;
|
||||
s = zlib_output_buffer->Append(profile_pb.SerializeAsString());
|
||||
if (!s.ok()) return s;
|
||||
s = zlib_output_buffer->Close();
|
||||
if (!s.ok()) return s;
|
||||
fprintf(stdout, "\nRun pprof -png --nodecount=20 --sample_index=1 <%s>\n",
|
||||
filename.c_str());
|
||||
return s;
|
||||
}
|
||||
|
||||
private:
|
||||
void Build(pprof::Profile* profile_pb) {
|
||||
string sample_type_description = "count";
|
||||
auto sample_type = profile_pb->mutable_sample_type()->Add();
|
||||
sample_type->set_type(string_table_.GetIndex(sample_type_description));
|
||||
sample_type->set_unit(string_table_.GetIndex("count"));
|
||||
|
||||
string type = *opts_->select.begin();
|
||||
sample_type_description = type;
|
||||
sample_type = profile_pb->mutable_sample_type()->Add();
|
||||
sample_type->set_type(string_table_.GetIndex(sample_type_description));
|
||||
if (type == kShown[1] || type == kShown[9] || type == kShown[10]) {
|
||||
sample_type->set_unit(string_table_.GetIndex("microseconds"));
|
||||
if (type == kShown[1]) {
|
||||
profile_pb->mutable_comment()->Add(string_table_.GetIndex(
|
||||
"Sum of accelerator execution time and cpu execution time."));
|
||||
} else if (type == kShown[9]) {
|
||||
profile_pb->mutable_comment()->Add(
|
||||
string_table_.GetIndex("Accelerator execution time."));
|
||||
} else if (type == kShown[10]) {
|
||||
profile_pb->mutable_comment()->Add(
|
||||
string_table_.GetIndex("CPU execution time."));
|
||||
}
|
||||
} else if (type == kShown[0]) {
|
||||
sample_type->set_unit(string_table_.GetIndex("bytes"));
|
||||
profile_pb->mutable_comment()->Add(
|
||||
string_table_.GetIndex("Sum of operation output memory."));
|
||||
} else if (type == kShown[2]) {
|
||||
sample_type->set_unit(string_table_.GetIndex("count"));
|
||||
profile_pb->mutable_comment()->Add(
|
||||
string_table_.GetIndex("Model parameters."));
|
||||
} else if (type == kShown[3]) {
|
||||
sample_type->set_unit(string_table_.GetIndex("count"));
|
||||
profile_pb->mutable_comment()->Add(string_table_.GetIndex(
|
||||
"Model float operations (Only available if defined)."));
|
||||
} else {
|
||||
fprintf(stderr, "pprof doesn't support selecting: %s\n", type.c_str());
|
||||
}
|
||||
|
||||
for (const string& str : string_table_.strings()) {
|
||||
*profile_pb->mutable_string_table()->Add() = str;
|
||||
}
|
||||
for (const auto& sample_it : samples_->samples()) {
|
||||
// TODO(xpan): Consider swap.
|
||||
profile_pb->mutable_sample()->Add()->MergeFrom(sample_it.second);
|
||||
}
|
||||
for (const auto& function_it : func_table_->functions()) {
|
||||
profile_pb->mutable_function()->Add()->MergeFrom(function_it.second);
|
||||
}
|
||||
for (const auto& location_it : loc_table_->locations()) {
|
||||
profile_pb->mutable_location()->Add()->MergeFrom(location_it.second);
|
||||
}
|
||||
}
|
||||
|
||||
const Options* opts_;
|
||||
StringTable string_table_;
|
||||
std::unique_ptr<FunctionTable> func_table_;
|
||||
std::unique_ptr<LocationTable> loc_table_;
|
||||
std::unique_ptr<Samples> samples_;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void TFCode::AddNode(TFGraphNode* node) {
|
||||
if (node->code().traces_size() == 0) {
|
||||
return;
|
||||
}
|
||||
TFMultiGraphNode* pre_trace_node = nullptr;
|
||||
if (!root_) {
|
||||
graph_root_.reset(new TFMultiGraphNode(kTFProfRoot));
|
||||
root_.reset(new CodeNode(graph_root_.get(), nullptr));
|
||||
}
|
||||
|
||||
CodeNode* pre_code_node = root_.get();
|
||||
// TODO(xpan): Consider to release CodeDef after TFCode is built. It
|
||||
// takes a lot of memory.
|
||||
for (int i = 0; i < node->code().traces_size(); ++i) {
|
||||
// Unlike op name, which is globally unique, trace name is only unique
|
||||
// w.r.t. it's parent.
|
||||
const string& trace = GetTraceString(node->code().traces(i));
|
||||
if (i == 0) {
|
||||
if (!trace_root_) {
|
||||
trace_root_.reset(new TFMultiGraphNode(trace));
|
||||
}
|
||||
CHECK(trace_root_->name() == trace) << "Different trace root";
|
||||
pre_trace_node = trace_root_.get();
|
||||
continue;
|
||||
}
|
||||
pre_trace_node->AddChildren(trace);
|
||||
TFMultiGraphNode* trace_node = pre_trace_node->children().at(trace).get();
|
||||
|
||||
pre_code_node = pre_code_node->AddChildren(trace, &node->code().traces(i));
|
||||
if (i == node->code().traces_size() - 1) {
|
||||
trace_node->AddGraphNode(node);
|
||||
pre_code_node->node->AddGraphNode(node);
|
||||
}
|
||||
pre_trace_node = trace_node;
|
||||
}
|
||||
}
|
||||
|
||||
void TFCode::Build() {
|
||||
if (root_) {
|
||||
return;
|
||||
}
|
||||
tfprof_trace_root_.reset(new TFMultiGraphNode(kTFProfRoot));
|
||||
root_.reset(new CodeNode(tfprof_trace_root_.get()));
|
||||
|
||||
if (trace_root_) {
|
||||
code_root_ = BuildCodeNodes(trace_root_.get());
|
||||
root_->children.push_back(code_root_);
|
||||
}
|
||||
}
|
||||
|
||||
CodeNode* TFCode::BuildCodeNodes(TFMultiGraphNode* root) {
|
||||
auto code_root = std::unique_ptr<CodeNode>(new CodeNode(root));
|
||||
CodeNode* code_root_ptr = code_root.get();
|
||||
code_nodes_.insert(std::move(code_root));
|
||||
|
||||
for (auto it = root->children().cbegin(); it != root->children().cend();
|
||||
++it) {
|
||||
code_root_ptr->children.push_back(BuildCodeNodes(it->second.get()));
|
||||
}
|
||||
return code_root_ptr;
|
||||
}
|
||||
|
||||
const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
|
||||
Timeline* timeline) {
|
||||
std::vector<CodeNode*> roots = Account(root_->children, opts);
|
||||
root_->ResetTotalStats();
|
||||
if (opts.output_type == kOutput[3]) {
|
||||
if (opts.select.size() != 1) {
|
||||
fprintf(stderr, "Can only select 1 attribute for pprof output.\n");
|
||||
return root_.get();
|
||||
}
|
||||
string select = *opts.select.begin();
|
||||
if (select != kShown[0] && select != kShown[1] && select != kShown[2] &&
|
||||
select != kShown[3] && select != kShown[9] && select != kShown[10]) {
|
||||
fprintf(stderr, "pprof doesn't support -select=%s\n", select.c_str());
|
||||
return root_.get();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<CodeNode*> roots = Account(root_->children, opts);
|
||||
root_->show_children.clear();
|
||||
for (CodeNode* n : roots) {
|
||||
root_->AggregateTotalStats(n);
|
||||
@ -121,21 +392,46 @@ const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
|
||||
CodeNode* root = PrintScope({root_.get()}, opts, 1, 0)[0];
|
||||
|
||||
root->formatted_str = FormatLegend(opts) + root->formatted_str;
|
||||
Format(root->show_children, &root->formatted_str, root->mutable_proto());
|
||||
|
||||
if (timeline) {
|
||||
timeline->GenerateCodeTimeline(root);
|
||||
if (opts.output_type == kOutput[3]) {
|
||||
std::vector<uint64> call_ids;
|
||||
pprof_profile_.reset(new PprofProfileImpl(&opts));
|
||||
Format(root, root->show_children, opts, &root->formatted_str,
|
||||
root->mutable_proto(), &call_ids);
|
||||
Status s = pprof_profile_->WritePprofProfile(
|
||||
opts.output_options.at(kPprofOpts[0]));
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
}
|
||||
} else {
|
||||
Format(root, root->show_children, opts, &root->formatted_str,
|
||||
root->mutable_proto(), nullptr);
|
||||
if (timeline) {
|
||||
timeline->GenerateCodeTimeline(root);
|
||||
}
|
||||
}
|
||||
return root;
|
||||
}
|
||||
|
||||
void TFCode::Format(const std::vector<CodeNode*> roots, string* display_str,
|
||||
MultiGraphNodeProto* proto) {
|
||||
for (CodeNode* node : roots) {
|
||||
void TFCode::Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
|
||||
const Options& opts, string* display_str,
|
||||
MultiGraphNodeProto* proto, std::vector<uint64>* call_ids) {
|
||||
if (nodes.empty() && root->trace && opts.output_type == kOutput[3]) {
|
||||
pprof_profile_->AddSample(root, call_ids);
|
||||
}
|
||||
|
||||
for (CodeNode* node : nodes) {
|
||||
if (root->trace && opts.output_type == kOutput[3]) {
|
||||
uint64 loc_id = pprof_profile_->AddLocation(node, root);
|
||||
call_ids->push_back(loc_id);
|
||||
}
|
||||
display_str->append(node->formatted_str);
|
||||
MultiGraphNodeProto* child = proto->add_children();
|
||||
child->MergeFrom(node->proto());
|
||||
Format(node->show_children, display_str, child);
|
||||
Format(node, node->show_children, opts, display_str, child, call_ids);
|
||||
if (root->trace && opts.output_type == kOutput[3]) {
|
||||
call_ids->pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -170,14 +466,15 @@ std::vector<CodeNode*> TFCode::PrintScope(const std::vector<CodeNode*> roots,
|
||||
std::vector<CodeNode*> show_nodes;
|
||||
|
||||
for (CodeNode* node : roots) {
|
||||
if (ShouldTrim(node, opts.trim_name_regexes) || depth > opts.max_depth) {
|
||||
continue;
|
||||
}
|
||||
int ident = last_ident;
|
||||
bool show = ShouldShow(node, opts, depth);
|
||||
if (show) ident += 2;
|
||||
|
||||
std::vector<CodeNode*> show_cnodes;
|
||||
if (!ShouldTrim(node, opts.trim_name_regexes) && depth <= opts.max_depth) {
|
||||
show_cnodes = PrintScope(node->show_children, opts, depth + 1, ident);
|
||||
}
|
||||
std::vector<CodeNode*> show_cnodes =
|
||||
PrintScope(node->show_children, opts, depth + 1, ident);
|
||||
if (show) {
|
||||
node->show_children.clear();
|
||||
if (opts.account_displayed_op_only) {
|
||||
|
@ -32,15 +32,29 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
|
||||
#include "tensorflow/core/profiler/internal/tfprof_timeline.h"
|
||||
#include "tensorflow/core/profiler/internal/tfprof_utils.h"
|
||||
#include "tensorflow/core/profiler/profile.pb.h"
|
||||
#include "tensorflow/core/profiler/tfprof_log.pb.h"
|
||||
#include "tensorflow/core/profiler/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
class PprofProfile {
|
||||
public:
|
||||
virtual ~PprofProfile() {}
|
||||
|
||||
virtual uint64 AddLocation(const CodeNode* callee,
|
||||
const CodeNode* caller) = 0;
|
||||
|
||||
virtual void AddSample(const CodeNode* leaf,
|
||||
std::vector<uint64>* call_ids) = 0;
|
||||
|
||||
virtual Status WritePprofProfile(const string& filename) = 0;
|
||||
};
|
||||
|
||||
class TFCode : public TFMultiShow {
|
||||
public:
|
||||
explicit TFCode() : code_root_(nullptr), trace_root_(nullptr) {}
|
||||
TFCode() {}
|
||||
~TFCode() override {}
|
||||
|
||||
void AddNode(TFGraphNode* node) override;
|
||||
@ -48,8 +62,6 @@ class TFCode : public TFMultiShow {
|
||||
void Build() override;
|
||||
|
||||
private:
|
||||
CodeNode* BuildCodeNodes(TFMultiGraphNode* root);
|
||||
|
||||
const ShowMultiNode* ShowInternal(const Options& opts,
|
||||
Timeline* timeline) override;
|
||||
|
||||
@ -63,16 +75,15 @@ class TFCode : public TFMultiShow {
|
||||
std::vector<CodeNode*> Account(const std::vector<CodeNode*>& roots,
|
||||
const Options& opts);
|
||||
|
||||
void Format(const std::vector<CodeNode*> roots, string* display_str,
|
||||
MultiGraphNodeProto* proto);
|
||||
void Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
|
||||
const Options& opts, string* display_str,
|
||||
MultiGraphNodeProto* proto, std::vector<uint64>* call_ids);
|
||||
|
||||
string FormatNode(CodeNode* node, const Options& opts, int64 indent);
|
||||
|
||||
std::unique_ptr<CodeNode> root_;
|
||||
CodeNode* code_root_;
|
||||
std::unique_ptr<TFMultiGraphNode> trace_root_;
|
||||
std::unique_ptr<TFMultiGraphNode> tfprof_trace_root_;
|
||||
std::set<std::unique_ptr<CodeNode>> code_nodes_;
|
||||
std::unique_ptr<TFMultiGraphNode> graph_root_;
|
||||
std::unique_ptr<PprofProfile> pprof_profile_;
|
||||
};
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
@ -72,6 +72,11 @@ void TFGraph::Build() {
|
||||
const ShowNode* TFGraph::ShowInternal(const Options& opts, Timeline* timeline) {
|
||||
root_->ResetTotalStats();
|
||||
root_->show_children.clear();
|
||||
|
||||
if (opts.output_type == kOutput[3]) {
|
||||
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
|
||||
return root_;
|
||||
}
|
||||
if (timeline && timeline->step() < 0) {
|
||||
// TODO(xpan): Maybe pick a default step for users.
|
||||
fprintf(stderr,
|
||||
|
@ -340,6 +340,27 @@ class TFGraphNode {
|
||||
return exec->second.allocator_bytes_in_use();
|
||||
}
|
||||
|
||||
int64 parameters() const {
|
||||
if (!shape().empty()) {
|
||||
int64 params = 1;
|
||||
bool complete_shape = true;
|
||||
for (int64 d : shape()) {
|
||||
// Sometimes parameters could be <0 when a dim is unknown.
|
||||
if (d < 0) {
|
||||
complete_shape = false;
|
||||
break;
|
||||
}
|
||||
params *= d;
|
||||
}
|
||||
if (complete_shape) {
|
||||
return params;
|
||||
} else {
|
||||
fprintf(stderr, "Incomplete shape.\n");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64 float_ops(int64 step) const {
|
||||
// If not run, return static analysis.
|
||||
if (execs_.empty()) {
|
||||
@ -400,12 +421,14 @@ class TFMultiGraphNode {
|
||||
public:
|
||||
TFMultiGraphNode(const string& name)
|
||||
: name_(name),
|
||||
step_(-1),
|
||||
run_count_(0),
|
||||
exec_micros_(0),
|
||||
accelerator_exec_micros_(0),
|
||||
cpu_exec_micros_(0),
|
||||
requested_bytes_(0),
|
||||
float_ops_(0) {}
|
||||
float_ops_(0),
|
||||
parameters_(0) {}
|
||||
|
||||
bool SnapshotNodes(int64 step, const std::vector<string>& type_regexes) {
|
||||
run_count_ = 0;
|
||||
@ -415,11 +438,13 @@ class TFMultiGraphNode {
|
||||
|
||||
requested_bytes_ = 0;
|
||||
float_ops_ = 0;
|
||||
parameters_ = 0;
|
||||
op_types_.clear();
|
||||
shapes_.clear();
|
||||
devices_.clear();
|
||||
snapshot_nodes_.clear();
|
||||
|
||||
step_ = step;
|
||||
std::vector<const TFGraphNode*> nodes = pick_nodes(type_regexes);
|
||||
|
||||
if (nodes.empty()) {
|
||||
@ -436,6 +461,7 @@ class TFMultiGraphNode {
|
||||
|
||||
requested_bytes_ += node->requested_bytes(step);
|
||||
float_ops_ += node->float_ops(step);
|
||||
parameters_ += node->parameters();
|
||||
if (node->shape().size() > 0) {
|
||||
shapes_.push_back(node->shape());
|
||||
}
|
||||
@ -445,6 +471,8 @@ class TFMultiGraphNode {
|
||||
return true;
|
||||
}
|
||||
|
||||
int64 step() const { return step_; }
|
||||
|
||||
void AddGraphNode(const TFGraphNode* node) {
|
||||
if (nodes_.find(node->name()) != nodes_.end()) {
|
||||
return;
|
||||
@ -456,16 +484,6 @@ class TFMultiGraphNode {
|
||||
return snapshot_nodes_;
|
||||
}
|
||||
|
||||
void AddChildren(const string& name) {
|
||||
if (children_.find(name) != children_.end()) {
|
||||
return;
|
||||
}
|
||||
children_[name].reset(new TFMultiGraphNode(name));
|
||||
}
|
||||
const std::map<string, std::unique_ptr<TFMultiGraphNode>>& children() const {
|
||||
return children_;
|
||||
}
|
||||
|
||||
const string& name() const { return name_; }
|
||||
|
||||
int64 run_count() const { return run_count_; }
|
||||
@ -477,6 +495,8 @@ class TFMultiGraphNode {
|
||||
|
||||
int64 float_ops() const { return float_ops_; }
|
||||
|
||||
int64 parameters() const { return parameters_; }
|
||||
|
||||
const std::set<string>& devices() const { return devices_; }
|
||||
|
||||
const std::set<string>& op_types() const { return op_types_; }
|
||||
@ -511,6 +531,7 @@ class TFMultiGraphNode {
|
||||
}
|
||||
|
||||
const string name_;
|
||||
int64 step_;
|
||||
// Snapshot based on type_regexes
|
||||
std::set<string> op_types_;
|
||||
int64 run_count_;
|
||||
@ -520,13 +541,13 @@ class TFMultiGraphNode {
|
||||
|
||||
int64 requested_bytes_;
|
||||
int64 float_ops_;
|
||||
int64 parameters_;
|
||||
std::set<string> devices_;
|
||||
std::vector<std::vector<int64>> shapes_;
|
||||
std::map<string, const TFGraphNode*> snapshot_nodes_;
|
||||
|
||||
// Overall data held by the TFMultiGraphNode.
|
||||
std::map<string, const TFGraphNode*> nodes_;
|
||||
std::map<string, std::unique_ptr<TFMultiGraphNode>> children_;
|
||||
};
|
||||
|
||||
bool IsPlacedOnAccelerator(const string& device);
|
||||
|
@ -45,25 +45,7 @@ void ShowNode::ReInit(int64 step) {
|
||||
(*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom(
|
||||
VecToShapeProto(inp.second));
|
||||
}
|
||||
|
||||
proto_.clear_parameters();
|
||||
if (!node->shape().empty()) {
|
||||
int64 params = 1;
|
||||
bool complete_shape = true;
|
||||
for (int64 d : node->shape()) {
|
||||
// Sometimes parameters could be <0 when a dim is unknown.
|
||||
if (d < 0) {
|
||||
complete_shape = false;
|
||||
break;
|
||||
}
|
||||
params *= d;
|
||||
}
|
||||
if (complete_shape) {
|
||||
mutable_proto()->set_parameters(proto_.parameters() + params);
|
||||
} else {
|
||||
fprintf(stderr, "Incomplete shape.");
|
||||
}
|
||||
}
|
||||
proto_.set_parameters(node->parameters());
|
||||
}
|
||||
|
||||
GraphNodeProto* ShowNode::mutable_proto() { return &proto_; }
|
||||
@ -114,6 +96,8 @@ void ShowNode::AddSelfToTotalStats() {
|
||||
}
|
||||
|
||||
void ShowNode::ResetTotalStats() {
|
||||
formatted_str.clear();
|
||||
|
||||
mutable_proto()->set_total_definition_count(0);
|
||||
mutable_proto()->set_total_run_count(0);
|
||||
mutable_proto()->set_total_exec_micros(0);
|
||||
@ -153,26 +137,7 @@ bool ShowMultiNode::ReInit(int64 step,
|
||||
mutable_proto()->set_requested_bytes(node->requested_bytes());
|
||||
mutable_proto()->set_float_ops(node->float_ops());
|
||||
|
||||
mutable_proto()->clear_parameters();
|
||||
if (!node->shapes().empty()) {
|
||||
for (const std::vector<int64>& shape : node->shapes()) {
|
||||
int64 params = 1;
|
||||
bool complete_shape = true;
|
||||
for (int64 d : shape) {
|
||||
// Sometimes parameters could be <0 when a dim is unknown.
|
||||
if (d < 0) {
|
||||
complete_shape = false;
|
||||
break;
|
||||
}
|
||||
params *= d;
|
||||
}
|
||||
if (complete_shape) {
|
||||
mutable_proto()->set_parameters(proto().parameters() + params);
|
||||
} else {
|
||||
fprintf(stderr, "Incomplete shape.");
|
||||
}
|
||||
}
|
||||
}
|
||||
mutable_proto()->set_parameters(node->parameters());
|
||||
return has_matched_type;
|
||||
}
|
||||
|
||||
@ -216,6 +181,7 @@ void ShowMultiNode::AddSelfToTotalStats() {
|
||||
}
|
||||
|
||||
void ShowMultiNode::ResetTotalStats() {
|
||||
formatted_str.clear();
|
||||
mutable_proto()->set_total_exec_micros(0);
|
||||
mutable_proto()->set_total_accelerator_exec_micros(0);
|
||||
mutable_proto()->set_total_cpu_exec_micros(0);
|
||||
|
@ -111,11 +111,31 @@ class ShowMultiNode {
|
||||
|
||||
class CodeNode : public ShowMultiNode {
|
||||
public:
|
||||
explicit CodeNode(TFMultiGraphNode* node) : ShowMultiNode(node) {}
|
||||
explicit CodeNode(TFMultiGraphNode* node, const CodeDef::Trace* trace)
|
||||
: ShowMultiNode(node), trace(trace) {}
|
||||
~CodeNode() override {}
|
||||
|
||||
CodeNode* AddChildren(const string& name, const CodeDef::Trace* trace) {
|
||||
auto it = children_.find(name);
|
||||
if (it != children_.end()) {
|
||||
return it->second.get();
|
||||
}
|
||||
|
||||
graph_children_.push_back(
|
||||
std::unique_ptr<TFMultiGraphNode>(new TFMultiGraphNode(name)));
|
||||
auto child = &children_[name];
|
||||
child->reset(new CodeNode(graph_children_.back().get(), trace));
|
||||
children.push_back(child->get());
|
||||
return child->get();
|
||||
}
|
||||
|
||||
const CodeDef::Trace* trace;
|
||||
std::vector<CodeNode*> children;
|
||||
std::vector<CodeNode*> show_children;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<TFMultiGraphNode>> graph_children_;
|
||||
std::map<string, std::unique_ptr<CodeNode>> children_;
|
||||
};
|
||||
|
||||
class OpNode : public ShowMultiNode {
|
||||
|
@ -103,8 +103,13 @@ void TFOp::Build() {
|
||||
}
|
||||
|
||||
const ShowMultiNode* TFOp::ShowInternal(const Options& opts,
|
||||
Timeline* timeline) {
|
||||
Timeline* timeline) {
|
||||
root_->ResetTotalStats();
|
||||
if (opts.output_type == kOutput[3]) {
|
||||
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
|
||||
return root_.get();
|
||||
}
|
||||
|
||||
if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) {
|
||||
root_->formatted_str = FormatNode(root_.get(), root_.get(), opts);
|
||||
}
|
||||
|
@ -84,6 +84,13 @@ tensorflow::Status ParseOutput(const string& output_opt, string* output_type,
|
||||
required_options.insert(kFileRequiredOpts,
|
||||
kFileRequiredOpts + sizeof(kFileRequiredOpts) /
|
||||
sizeof(*kFileRequiredOpts));
|
||||
} else if (*output_type == kOutput[3]) {
|
||||
valid_options.insert(kPprofOpts,
|
||||
kPprofOpts + sizeof(kPprofOpts) / sizeof(*kPprofOpts));
|
||||
required_options.insert(
|
||||
kPprofRequiredOpts,
|
||||
kPprofRequiredOpts +
|
||||
sizeof(kPprofRequiredOpts) / sizeof(*kPprofRequiredOpts));
|
||||
}
|
||||
|
||||
for (const string& kv_str : kv_split) {
|
||||
|
@ -62,7 +62,8 @@ static const char* const kCmds[] = {
|
||||
"scope", "graph", "code", "op", "advise", "set", "help",
|
||||
};
|
||||
|
||||
static const char* const kOutput[] = {"timeline", "stdout", "file", "none"};
|
||||
static const char* const kOutput[] = {"timeline", "stdout", "file", "pprof",
|
||||
"none"};
|
||||
|
||||
static const char* const kTimelineOpts[] = {
|
||||
"outfile",
|
||||
@ -78,6 +79,14 @@ static const char* const kFileRequiredOpts[] = {
|
||||
"outfile",
|
||||
};
|
||||
|
||||
static const char* const kPprofOpts[] = {
|
||||
"outfile",
|
||||
};
|
||||
|
||||
static const char* const kPprofRequiredOpts[] = {
|
||||
"outfile",
|
||||
};
|
||||
|
||||
struct Options {
|
||||
public:
|
||||
static tensorflow::Status FromProtoStr(const string& opts_proto_str,
|
||||
|
@ -78,8 +78,13 @@ void TFScope::Build() {
|
||||
}
|
||||
|
||||
const ShowNode* TFScope::ShowInternal(const Options& opts, Timeline* timeline) {
|
||||
std::vector<ScopeNode*> roots = Account(root_->children, opts);
|
||||
root_->ResetTotalStats();
|
||||
if (opts.output_type == kOutput[3]) {
|
||||
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
|
||||
return root_;
|
||||
}
|
||||
|
||||
std::vector<ScopeNode*> roots = Account(root_->children, opts);
|
||||
root_->show_children.clear();
|
||||
for (ScopeNode* n : roots) {
|
||||
root_->AggregateTotalStats(n);
|
||||
|
@ -26,25 +26,27 @@ namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
const GraphNodeProto& TFShow::Show(const Options& opts) {
|
||||
if (opts.output_type == kOutput[3]) {
|
||||
return ShowInternal(opts, nullptr)->proto();
|
||||
} else if (opts.output_type == kOutput[0]) {
|
||||
if (opts.output_type == kOutput[0]) {
|
||||
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
|
||||
return ShowInternal(opts, &timeline)->proto();
|
||||
} else if (opts.output_type == kOutput[2]) {
|
||||
const ShowNode* root = ShowInternal(opts, nullptr);
|
||||
Status s =
|
||||
WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
|
||||
root->formatted_str);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
}
|
||||
return root->proto();
|
||||
} else {
|
||||
const ShowNode* root = ShowInternal(opts, nullptr);
|
||||
printf("%s", root->formatted_str.c_str());
|
||||
fflush(stdout);
|
||||
return root->proto();
|
||||
const ShowNode* ret = ShowInternal(opts, nullptr);
|
||||
if (opts.output_type == kOutput[1]) {
|
||||
printf("%s", ret->formatted_str.c_str());
|
||||
fflush(stdout);
|
||||
} else if (opts.output_type == kOutput[2]) {
|
||||
Status s = WriteStringToFile(Env::Default(),
|
||||
opts.output_options.at(kFileOpts[0]),
|
||||
ret->formatted_str);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
}
|
||||
} else if (opts.output_type == kOutput[3] ||
|
||||
opts.output_type == kOutput[4]) {
|
||||
} else {
|
||||
fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
|
||||
}
|
||||
return ret->proto();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -28,25 +28,27 @@ namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) {
|
||||
if (opts.output_type == kOutput[3]) {
|
||||
return ShowInternal(opts, nullptr)->proto();
|
||||
} else if (opts.output_type == kOutput[0]) {
|
||||
if (opts.output_type == kOutput[0]) {
|
||||
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
|
||||
return ShowInternal(opts, &timeline)->proto();
|
||||
} else if (opts.output_type == kOutput[2]) {
|
||||
const ShowMultiNode* root = ShowInternal(opts, nullptr);
|
||||
Status s =
|
||||
WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
|
||||
root->formatted_str);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
}
|
||||
return root->proto();
|
||||
} else {
|
||||
const ShowMultiNode* root = ShowInternal(opts, nullptr);
|
||||
printf("%s", root->formatted_str.c_str());
|
||||
fflush(stdout);
|
||||
return root->proto();
|
||||
const ShowMultiNode* ret = ShowInternal(opts, nullptr);
|
||||
if (opts.output_type == kOutput[1]) {
|
||||
printf("%s", ret->formatted_str.c_str());
|
||||
fflush(stdout);
|
||||
} else if (opts.output_type == kOutput[2]) {
|
||||
Status s = WriteStringToFile(Env::Default(),
|
||||
opts.output_options.at(kFileOpts[0]),
|
||||
ret->formatted_str);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
}
|
||||
} else if (opts.output_type == kOutput[3] ||
|
||||
opts.output_type == kOutput[4]) {
|
||||
} else {
|
||||
fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
|
||||
}
|
||||
return ret->proto();
|
||||
}
|
||||
}
|
||||
|
||||
|
71
tensorflow/core/profiler/profile.proto
Normal file
71
tensorflow/core/profiler/profile.proto
Normal file
@ -0,0 +1,71 @@
|
||||
// This proto intends to match format expected by pprof tool.
|
||||
syntax = "proto3";
|
||||
|
||||
package tensorflow.tfprof.pprof;
|
||||
|
||||
message Profile {
|
||||
repeated ValueType sample_type = 1;
|
||||
repeated Sample sample = 2;
|
||||
repeated Mapping mapping = 3;
|
||||
repeated Location location = 4;
|
||||
repeated Function function = 5;
|
||||
repeated string string_table = 6;
|
||||
int64 drop_frames = 7;
|
||||
int64 keep_frames = 8;
|
||||
int64 time_nanos = 9;
|
||||
int64 duration_nanos = 10;
|
||||
ValueType period_type = 11;
|
||||
int64 period = 12;
|
||||
repeated int64 comment = 13;
|
||||
int64 default_sample_type = 14;
|
||||
}
|
||||
|
||||
message ValueType {
|
||||
int64 type = 1;
|
||||
int64 unit = 2;
|
||||
}
|
||||
|
||||
message Sample {
|
||||
repeated uint64 location_id = 1;
|
||||
repeated int64 value = 2;
|
||||
repeated Label label = 3;
|
||||
}
|
||||
|
||||
message Label {
|
||||
int64 key = 1;
|
||||
int64 str = 2;
|
||||
int64 num = 3;
|
||||
}
|
||||
|
||||
message Mapping {
|
||||
uint64 id = 1;
|
||||
uint64 memory_start = 2;
|
||||
uint64 memory_limit = 3;
|
||||
uint64 file_offset = 4;
|
||||
int64 filename = 5;
|
||||
int64 build_id = 6;
|
||||
bool has_functions = 7;
|
||||
bool has_filenames = 8;
|
||||
bool has_line_numbers = 9;
|
||||
bool has_inline_frames = 10;
|
||||
}
|
||||
|
||||
message Location {
|
||||
uint64 id = 1;
|
||||
uint64 mapping_id = 2;
|
||||
uint64 address = 3;
|
||||
repeated Line line = 4;
|
||||
}
|
||||
|
||||
message Line {
|
||||
uint64 function_id = 1;
|
||||
int64 line = 2;
|
||||
}
|
||||
|
||||
message Function {
|
||||
uint64 id = 1;
|
||||
int64 name = 2;
|
||||
int64 system_name = 3;
|
||||
int64 filename = 4;
|
||||
int64 start_line = 5;
|
||||
}
|
@ -10,6 +10,7 @@ message CodeDef {
|
||||
int32 lineno = 2;
|
||||
string function = 3;
|
||||
string line = 4;
|
||||
int32 func_start_line = 5;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,11 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import gzip
|
||||
import io
|
||||
import os
|
||||
|
||||
from tensorflow.core.profiler import profile_pb2
|
||||
from tensorflow.core.protobuf import config_pb2
|
||||
from tensorflow.core.protobuf import rewriter_config_pb2
|
||||
from tensorflow.python.client import session
|
||||
@ -200,7 +204,6 @@ class PrintModelAnalysisTest(test.TestCase):
|
||||
|
||||
def testTimeline(self):
|
||||
ops.reset_default_graph()
|
||||
opts = builder.trainable_variables_parameter()
|
||||
outfile = os.path.join(test.get_temp_dir(), 'timeline')
|
||||
opts = (builder(builder.trainable_variables_parameter())
|
||||
.with_max_depth(100000)
|
||||
@ -312,6 +315,61 @@ class PrintModelAnalysisTest(test.TestCase):
|
||||
checker = advice_pb.checkers['ExpensiveOperationChecker']
|
||||
self.assertGreater(len(checker.reports), 0)
|
||||
|
||||
def pprof_test_helper(self, attribute, should_fail=False):
|
||||
ops.reset_default_graph()
|
||||
outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz')
|
||||
opts = (builder(builder.time_and_memory())
|
||||
.select([attribute])
|
||||
.with_max_depth(100000)
|
||||
.with_node_names(trim_name_regexes=['ops.py.*'])
|
||||
.with_pprof_output(outfile).build())
|
||||
|
||||
with session.Session() as sess:
|
||||
x = lib.BuildFullModel()
|
||||
|
||||
sess.run(variables.global_variables_initializer())
|
||||
run_meta = config_pb2.RunMetadata()
|
||||
_ = sess.run(
|
||||
x,
|
||||
options=config_pb2.RunOptions(
|
||||
trace_level=config_pb2.RunOptions.FULL_TRACE),
|
||||
run_metadata=run_meta)
|
||||
|
||||
_ = model_analyzer.profile(
|
||||
sess.graph, run_meta, cmd='code', options=opts)
|
||||
|
||||
if should_fail:
|
||||
self.assertFalse(gfile.Exists(outfile))
|
||||
return
|
||||
|
||||
profile_pb = profile_pb2.Profile()
|
||||
with gfile.Open(outfile, 'rb') as f:
|
||||
with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf:
|
||||
profile_pb.ParseFromString(gzipf.read())
|
||||
|
||||
self.assertGreater(len(profile_pb.sample), 10)
|
||||
self.assertGreater(len(profile_pb.location), 10)
|
||||
self.assertGreater(len(profile_pb.function), 10)
|
||||
self.assertGreater(len(profile_pb.string_table), 30)
|
||||
|
||||
has_rnn = False
|
||||
has_loop = False
|
||||
for s in profile_pb.string_table:
|
||||
if s.find('rnn') > 0:
|
||||
has_rnn = True
|
||||
if s.find('while') > 0:
|
||||
has_loop = True
|
||||
self.assertFalse(s.startswith('ops.py'))
|
||||
self.assertTrue(has_rnn)
|
||||
self.assertTrue(has_loop)
|
||||
|
||||
def testPprof(self):
|
||||
for attr in ['micros', 'bytes', 'accelerator_micros', 'cpu_micros',
|
||||
'params', 'float_ops']:
|
||||
self.pprof_test_helper(attr)
|
||||
for attr in ['op_types', 'device', 'input_shapes']:
|
||||
self.pprof_test_helper(attr, True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test.main()
|
||||
|
@ -353,6 +353,20 @@ class ProfileOptionBuilder(object):
|
||||
self._options['output'] = 'timeline:outfile=%s' % timeline_file
|
||||
return self
|
||||
|
||||
def with_pprof_output(self, pprof_file):
|
||||
"""Generate a pprof profile gzip file.
|
||||
|
||||
To use the pprof file:
|
||||
pprof -png --nodecount=20 --sample_index=1 <pprof_file>
|
||||
|
||||
Args:
|
||||
pprof_file: filename for output, usually suffixed with .pb.gz.
|
||||
Returns:
|
||||
self.
|
||||
"""
|
||||
self._options['output'] = 'pprof:outfile=%s' % pprof_file
|
||||
return self
|
||||
|
||||
def order_by(self, attribute):
|
||||
# pylint: disable=line-too-long
|
||||
"""Order the displayed profiler nodes based on a attribute.
|
||||
|
@ -98,12 +98,13 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
|
||||
add_entry = True
|
||||
|
||||
if add_trace:
|
||||
for tb in op.traceback:
|
||||
for tb in op.traceback_with_start_lines:
|
||||
trace = entry.code_def.traces.add()
|
||||
trace.file = tb[0] if tb[0] else 'none'
|
||||
trace.lineno = tb[1] if tb[1] else -1
|
||||
trace.function = tb[2] if tb[2] else 'none'
|
||||
trace.line = tb[3] if tb[3] else 'none'
|
||||
trace.func_start_line = tb[4] if tb[4] else -1
|
||||
add_entry = True
|
||||
|
||||
if add_entry:
|
||||
|
@ -74,6 +74,10 @@ tf_class {
|
||||
name: "with_node_names"
|
||||
argspec: "args=[\'self\', \'start_name_regexes\', \'show_name_regexes\', \'hide_name_regexes\', \'trim_name_regexes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "with_pprof_output"
|
||||
argspec: "args=[\'self\', \'pprof_file\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "with_stdout_output"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
|
Loading…
Reference in New Issue
Block a user