STT-tensorflow/tensorflow/tools/tfprof/internal/tfprof_show_code.cc
A. Unique TensorFlower 1e59f00c48 Extend tfprof to associate op stats with Python codes.
It's backward compatible. Stats of a source code line
are aggregated from all ops created by that line.

A example.
_TFProfRoot (0us/22.44ms)
  model_analyzer_test.py:149:run_filename_as_m...:none (0us/22.44ms)
    model_analyzer_test.py:33:_run_code_in_main:none (0us/22.44ms)
      model_analyzer_test.py:208:<module>:test.main() (0us/22.44ms)
        model_analyzer_test.py:132:testComplexCodeView:x = lib.BuildFull... (0us/22.44ms)
          model_analyzer_testlib.py:63:BuildFullModel:return sgd_op.min... (0us/21.83ms)
          model_analyzer_testlib.py:54:BuildFullModel:seq.append(array_... (0us/254us)
            model_analyzer_testlib.py:42:BuildSmallModel:x = nn_ops.conv2d... (0us/134us)
            ...
          model_analyzer_testlib.py:61:BuildFullModel:loss = nn_ops.l2_... (0us/28us)
        model_analyzer_test.py:134:testComplexCodeView:sess.run(variable... (0us/0us)
Change: 155258346
2017-05-05 17:30:49 -07:00

276 lines
9.1 KiB
C++

/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/tools/tfprof/internal/tfprof_show_code.h"
#include <memory>
#include <set>
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/regexp.h"
#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
namespace tensorflow {
namespace tfprof {
ShowCodeNode::ShowCodeNode(const TFCodeNode* node)
: node(node), account(true) {
std::vector<ScopeNode> snodes;
for (auto it : node->graph_nodes()) {
ScopeNode snode(it.second);
snodes.push_back(snode);
snodes[snodes.size()-1].AddSelfToTotalStats();
*mutable_proto()->mutable_graph_nodes()->Add() =
snodes[snodes.size()-1].proto();
}
mutable_proto()->set_name(name());
mutable_proto()->set_exec_micros(node->kernel_compute_micros());
mutable_proto()->set_requested_bytes(node->requested_bytes());
mutable_proto()->set_float_ops(node->float_ops());
if (!node->shapes().empty()) {
for (const std::vector<int64>& shape : node->shapes()) {
int64 params = 1;
bool complete_shape = true;
for (int64 d : shape) {
// Sometimes parameters could be <0 when a dim is unknown.
if (d < 0) {
complete_shape = false;
break;
}
params *= d;
}
if (complete_shape) {
mutable_proto()->set_parameters(proto().parameters() + params);
} else {
fprintf(stderr, "Incomplete shape.");
}
}
}
}
string ShowCodeNode::Format(const Options& opts) {
if (opts.select.empty()) {
return name();
}
return strings::Printf("%s (%s)", name().c_str(), FormatMeta(opts).c_str());
}
string ShowCodeNode::FormatMeta(const Options& opts) {
std::vector<string> info;
std::vector<string> shapes;
if (opts.select.find(kShown[2]) != opts.select.end()) {
for (const std::vector<int64>& shape : node->shapes()) {
if (!shape.empty()) {
shapes.push_back(FormatShapes(shape));
}
}
if (!shapes.empty()) {
info.push_back(str_util::Join(shapes, "|"));
}
string params = FormatNumber(proto().total_parameters()) + " params";
if (account) {
params = FormatNumber(proto().parameters()) + "/" + params;
} else {
params = "--/" + params;
}
info.push_back(params);
}
if (opts.select.find(kShown[3]) != opts.select.end()) {
string fops = FormatNumber(proto().total_float_ops()) + " flops";
if (account) {
fops = FormatNumber(proto().float_ops()) + "/" + fops;
} else {
fops = "--/" + fops;
}
info.push_back(fops);
}
if (opts.select.find(kShown[0]) != opts.select.end()) {
string memory = FormatMemory(proto().total_requested_bytes());
if (account) {
memory = FormatMemory(proto().requested_bytes()) + "/" + memory;
} else {
memory = "--/" + memory;
}
info.push_back(memory);
}
if (opts.select.find(kShown[1]) != opts.select.end()) {
string time = FormatTime(proto().total_exec_micros());
if (account) {
time = FormatTime(proto().exec_micros()) + "/" + time;
} else {
time = "--/" + time;
}
info.push_back(time);
}
if (opts.select.find(kShown[6]) != opts.select.end()) {
if (!node->devices().empty()) {
info.push_back(str_util::Join(node->devices(), "|"));
}
}
if (opts.select.find(kShown[7]) != opts.select.end()) {
std::set<string> op_types = node->op_types();
// Device is considered a type.
op_types.insert(node->devices().cbegin(), node->devices().cend());
info.push_back(str_util::Join(op_types, "|"));
}
return str_util::Join(info, ", ");
}
TFCodeNodeProto* ShowCodeNode::mutable_proto() { return &proto_; }
const TFCodeNodeProto& ShowCodeNode::proto() const { return proto_; }
void ShowCodeNode::AggregateTotalStats(ShowCodeNode* node) {
TFCodeNodeProto* node_pb = node->mutable_proto();
mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
node_pb->total_exec_micros());
mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
node_pb->total_requested_bytes());
mutable_proto()->set_total_parameters(proto().total_parameters() +
node_pb->total_parameters());
mutable_proto()->set_total_float_ops(proto().total_float_ops() +
node_pb->total_float_ops());
}
void ShowCodeNode::AddSelfToTotalStats() {
mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
proto().exec_micros());
mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
proto().requested_bytes());
mutable_proto()->set_total_parameters(proto().total_parameters() +
proto().parameters());
mutable_proto()->set_total_float_ops(proto().total_float_ops() +
proto().float_ops());
}
void ShowCodeNode::ResetTotalStats() {
mutable_proto()->set_total_exec_micros(0);
mutable_proto()->set_total_requested_bytes(0);
mutable_proto()->set_total_parameters(0);
mutable_proto()->set_total_float_ops(0);
mutable_proto()->mutable_children()->Clear();
}
const TFCodeNodeProto& TFShowCode::Show(const Options& opts) {
const ShowCodeNode* root = ShowInternal(opts);
if (opts.dump_to_file.empty()) {
printf("%s", root->formatted_str.c_str());
fflush(stdout);
} else {
Status s = WriteStringToFile(Env::Default(), opts.dump_to_file,
root->formatted_str);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
}
return root->proto();
}
bool TFShowCode::ShouldShow(ShowCodeNode* node,
const Options& opts,
int depth) {
// Always show kTFProfRoot.
if (node->name() == kTFProfRoot) return true;
if (!node->account) return false;
// TODO(xpan): Think more carefully about node filtering in code view.
// Unlike graph/scope view, which users want to see the exact leaf op.
// In code view, users want to see the middle code traces they wrote.
//
// This is a subtle difference from scope/graph view. Usually mostly
// want to see the middle code traces (i.e. their own codes.), instead
// of the TensorFlow internal codes traces.
if (node->proto().total_requested_bytes() < opts.min_bytes ||
node->proto().total_exec_micros() < opts.min_micros ||
node->proto().total_parameters() < opts.min_params ||
node->proto().total_float_ops() < opts.min_float_ops ||
depth > opts.max_depth || !ShouldShowIfExtra(node, opts, depth)) {
return false;
}
bool show = false;
if (opts.device_regexes.size() == 1 && opts.device_regexes[0] == ".*") {
show = true;
} else {
for (const string& regex : opts.device_regexes) {
for (const string& device : node->node->devices()) {
if (RE2::FullMatch(device, regex)) {
show = true;
break;
}
}
if (show) break;
}
}
// Don't show if device_regexes don't cover it.
if (!show) return false;
show = false;
if (opts.show_name_regexes.size() == 1 && opts.show_name_regexes[0] == ".*") {
show = true;
} else {
for (const string& regex : opts.show_name_regexes) {
if (RE2::FullMatch(node->name(), regex)) {
show = true;
break;
}
}
}
// Don't show if show_name_regexes don't cover it.
if (!show) return false;
// Don't show if hide_name_regexes cover it.
for (const string& regex : opts.hide_name_regexes) {
if (RE2::FullMatch(node->name(), regex)) return false;
}
return true;
}
bool TFShowCode::ShouldTrim(ShowCodeNode* node,
const std::vector<string>& regexes) {
for (const string& regex : regexes) {
if (RE2::FullMatch(node->name(), regex)) {
return true;
}
}
return false;
}
bool TFShowCode::ShouldAccount(ShowCodeNode* node, const Options& opts) {
if (opts.account_type_regexes.size() == 1 &&
opts.account_type_regexes[0] == ".*") {
return true;
}
for (const string& regex : opts.account_type_regexes) {
for (const string& type : node->node->op_types()) {
if (RE2::FullMatch(type, regex)) {
return true;
}
}
for (const string& device : node->node->devices()) {
if (RE2::FullMatch(device, regex)) {
return true;
}
}
}
return false;
}
} // namespace tfprof
} // namespace tensorflow