Refactor StatSummarizer extract common functionality without proto dependencies.

PiperOrigin-RevId: 197816405
This commit is contained in:
Shashi Shekhar 2018-05-23 17:14:39 -07:00 committed by TensorFlower Gardener
parent dac1f12402
commit 2307db76a2
13 changed files with 942 additions and 434 deletions

View File

@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/contrib/android/jni/run_stats_jni.h" #include "tensorflow/contrib/android/jni/run_stats_jni.h"
#include <jni.h> #include <jni.h>
#include <sstream> #include <sstream>
#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/protobuf/config.pb.h"
@ -73,7 +74,8 @@ JNIEXPORT jstring RUN_STATS_METHOD(summary)(JNIEnv* env, jclass clazz,
StatSummarizer* s = requireHandle(env, handle); StatSummarizer* s = requireHandle(env, handle);
if (s == nullptr) return nullptr; if (s == nullptr) return nullptr;
std::stringstream ret; std::stringstream ret;
ret << s->GetStatsByMetric("Top 10 CPU", StatSummarizer::BY_TIME, 10) ret << s->GetStatsByMetric("Top 10 CPU", tensorflow::StatsCalculator::BY_TIME,
10)
<< s->GetStatsByNodeType() << s->ShortSummary(); << s->GetStatsByNodeType() << s->ShortSummary();
return env->NewStringUTF(ret.str().c_str()); return env->NewStringUTF(ret.str().c_str());
} }

View File

@ -31,6 +31,33 @@ cc_library(
copts = common_copts, copts = common_copts,
) )
cc_library(
name = "profile_summarizer",
srcs = ["profile_summarizer.cc"],
hdrs = ["profile_summarizer.h"],
deps = [
":profiler",
"//tensorflow/contrib/lite:framework",
"//tensorflow/contrib/lite/schema:schema_fbs",
"//tensorflow/core:stats_calculator_portable",
],
)
cc_test(
name = "profile_summarizer_test",
srcs = ["profile_summarizer_test.cc"],
deps = [
":profile_summarizer",
"//tensorflow/contrib/lite:framework",
"//tensorflow/contrib/lite:schema_fbs_version",
"//tensorflow/contrib/lite/kernels:builtin_ops",
"//tensorflow/contrib/lite/kernels:kernel_util",
"//tensorflow/contrib/lite/kernels:test_util",
"//tensorflow/contrib/lite/testing:util",
"@com_google_googletest//:gtest",
],
)
cc_test( cc_test(
name = "profile_buffer_test", name = "profile_buffer_test",
srcs = ["profile_buffer_test.cc"], srcs = ["profile_buffer_test.cc"],

View File

@ -0,0 +1,140 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
#include <sstream>
#include "tensorflow/contrib/lite/schema/schema_generated.h"
namespace tflite {
namespace profiling {
namespace {
using Detail = tensorflow::StatsCalculator::Detail;
struct OperatorDetails {
string name;
std::vector<string> inputs;
std::vector<string> outputs;
};
string GetTensorName(const tflite::Interpreter& interpreter, int tensor_index) {
const auto tensor = interpreter.tensor(tensor_index);
if (tensor == nullptr || tensor->name == nullptr) {
return "Unknown";
}
return tensor->name;
}
std::vector<string> GetTensorNames(const tflite::Interpreter& interpreter,
const TfLiteIntArray* tensor_indices) {
std::vector<string> tensors;
tensors.reserve(tensor_indices->size);
for (int i = 0; i < tensor_indices->size; i++) {
tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
}
return tensors;
}
string ToString(const std::vector<string>& str_vector) {
std::stringstream stream;
stream << "[";
bool first = true;
for (const auto& s : str_vector) {
if (!first) {
stream << ", ";
} else {
first = false;
}
stream << s;
}
stream << "]";
return stream.str();
}
OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
int node_index) {
auto node_reg = interpreter.node_and_registration(node_index);
auto inputs = node_reg->first.inputs;
auto outputs = node_reg->first.outputs;
int code = node_reg->second.builtin_code;
const char* op_name = nullptr;
if (code == tflite::BuiltinOperator_CUSTOM) {
const char* custom_name = node_reg->second.custom_name;
op_name = custom_name ? custom_name : "UnknownCustomOp";
} else {
op_name = tflite::EnumNamesBuiltinOperator()[code];
}
OperatorDetails details;
details.name = op_name;
details.inputs = GetTensorNames(interpreter, inputs);
details.outputs = GetTensorNames(interpreter, outputs);
return details;
}
} // namespace
ProfileSummarizer::ProfileSummarizer()
: stats_calculator_(new ::tensorflow::StatsCalculator(
tensorflow::StatSummarizerOptions())) {}
void ProfileSummarizer::ProcessProfiles(
const std::vector<const ProfileEvent*>& profile_stats,
const tflite::Interpreter& interpreter) {
std::vector<const ProfileEvent*> events;
std::copy_if(profile_stats.begin(), profile_stats.end(),
std::back_inserter(events), [](const ProfileEvent* e) {
return e->event_type ==
ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
e->end_timestamp_us >= e->begin_timestamp_us;
});
// Sort with begin_time.
std::sort(events.begin(), events.end(),
[](const ProfileEvent* const& a, const ProfileEvent* const& b) {
return a->begin_timestamp_us < b->begin_timestamp_us;
});
if (events.empty()) {
return;
}
int64_t base_start_us = events[0]->begin_timestamp_us;
int node_num = 0;
int64_t curr_total_us = 0;
std::map<std::string, Detail> details;
for (auto event : events) {
auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
auto node_name = ToString(op_details.outputs);
auto result = details.emplace(node_name, Detail());
Detail* detail = &(result.first->second);
detail->start_us.UpdateStat(event->begin_timestamp_us - base_start_us);
int64_t node_exec_time =
event->end_timestamp_us - event->begin_timestamp_us;
detail->rel_end_us.UpdateStat(node_exec_time);
curr_total_us += node_exec_time;
++node_num;
if (result.second) {
detail->name = node_name;
detail->type = op_details.name;
detail->run_order = node_num;
detail->times_called = 0;
}
++detail->times_called;
}
stats_calculator_->UpdateDetails(details);
stats_calculator_->UpdateRunTotalUs(curr_total_us);
}
} // namespace profiling
} // namespace tflite

View File

@ -0,0 +1,58 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
#include <vector>
#include "tensorflow/contrib/lite/interpreter.h"
#include "tensorflow/contrib/lite/profiling/profiler.h"
#include "tensorflow/core/util/stats_calculator.h"
namespace tflite {
namespace profiling {
// Creates a summary of operator invocations in the interpreter.
class ProfileSummarizer {
public:
ProfileSummarizer();
virtual ~ProfileSummarizer() {}
// Process profile events to update statistics for operator invocations.
void ProcessProfiles(const std::vector<const ProfileEvent*>& profile_stats,
const tflite::Interpreter& interpreter);
// Returns a string detailing the accumulated runtime stats in a tab-separated
// format which can be pasted into a spreadsheet for further analysis.
std::string GetOutputString() const {
return stats_calculator_->GetOutputString();
}
std::string GetShortSummary() const {
return stats_calculator_->GetShortSummary();
}
// Prints the string returned by GetOutputString().
void PrintStepStats() const { stats_calculator_->PrintStepStats(); }
private:
std::unique_ptr<tensorflow::StatsCalculator> stats_calculator_;
};
} // namespace profiling
} // namespace tflite
#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_

View File

@ -0,0 +1,116 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <string>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "tensorflow/contrib/lite/context.h"
#include "tensorflow/contrib/lite/kernels/kernel_util.h"
#include "tensorflow/contrib/lite/kernels/test_util.h"
#include "tensorflow/contrib/lite/model.h"
#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
#include "tensorflow/contrib/lite/testing/util.h"
#include "tensorflow/contrib/lite/version.h"
namespace tflite {
namespace profiling {
namespace {
TfLiteStatus SimpleOpEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = tflite::GetInput(context, node, /*index=*/0);
const TfLiteTensor* input2 = tflite::GetInput(context, node, /*index=*/1);
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
int32_t* output_data = output->data.i32;
*output_data = *(input1->data.i32) + *(input2->data.i32);
return kTfLiteOk;
}
TfLiteRegistration* RegisterSimpleOp() {
static TfLiteRegistration registration = {nullptr,
nullptr,
nullptr,
SimpleOpEval,
tflite::BuiltinOperator_CUSTOM,
"SimpleOpEval",
1};
return &registration;
}
class SimpleOpModel : public SingleOpModel {
public:
void Init();
tflite::Interpreter* GetInterpreter() { return interpreter_.get(); }
void SetInputs(int32_t x, int32_t y) {
PopulateTensor(inputs_[0], {x});
PopulateTensor(inputs_[1], {y});
}
int32_t GetOutput() { return ExtractVector<int32_t>(output_)[0]; }
private:
int inputs_[2];
int output_;
};
void SimpleOpModel::Init() {
inputs_[0] = AddInput({TensorType_INT32, {1}});
inputs_[1] = AddInput({TensorType_INT32, {1}});
output_ = AddOutput({TensorType_INT32, {}});
SetCustomOp("SimpleAdd", {}, RegisterSimpleOp);
BuildInterpreter({GetShape(inputs_[0]), GetShape(inputs_[1])});
}
TEST(ProfileSummarizerTest, Empty) {
ProfileSummarizer summarizer;
std::string output = summarizer.GetOutputString();
EXPECT_GT(output.size(), 0);
}
#ifdef TFLITE_PROFILING_ENABLED
TEST(ProfileSummarizerTest, Interpreter) {
Profiler profiler;
SimpleOpModel m;
m.Init();
auto interpreter = m.GetInterpreter();
interpreter->SetProfiler(&profiler);
profiler.StartProfiling();
m.SetInputs(1, 2);
m.Invoke();
// 3 = 1 + 2
EXPECT_EQ(m.GetOutput(), 3);
profiler.StopProfiling();
ProfileSummarizer summarizer;
auto events = profiler.GetProfileEvents();
EXPECT_EQ(1, events.size());
summarizer.ProcessProfiles(profiler.GetProfileEvents(), *interpreter);
auto output = summarizer.GetOutputString();
// TODO(shashishekhar): Add a better test here.
ASSERT_TRUE(output.find("SimpleOp") != std::string::npos) << output;
}
#endif
} // namespace
} // namespace profiling
} // namespace tflite
int main(int argc, char** argv) {
::tflite::LogToStderr();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -827,6 +827,8 @@ tf_cuda_library(
"util/sparse/group_iterator.h", "util/sparse/group_iterator.h",
"util/sparse/sparse_tensor.h", "util/sparse/sparse_tensor.h",
"util/stat_summarizer.h", "util/stat_summarizer.h",
"util/stat_summarizer_options.h",
"util/stats_calculator.h",
"util/stream_executor_util.h", "util/stream_executor_util.h",
"util/strided_slice_op.h", "util/strided_slice_op.h",
"util/tensor_format.h", "util/tensor_format.h",
@ -851,6 +853,16 @@ tf_cuda_library(
deps = [":framework_internal"], deps = [":framework_internal"],
) )
cc_library(
name = "stats_calculator_portable",
srcs = ["util/stats_calculator.cc"],
hdrs = [
"util/stat_summarizer_options.h",
"util/stats_calculator.h",
],
deps = [":platform_base"],
)
cc_library( cc_library(
name = "overflow", name = "overflow",
hdrs = ["util/overflow.h"], hdrs = ["util/overflow.h"],

View File

@ -31,26 +31,22 @@ limitations under the License.
namespace tensorflow { namespace tensorflow {
using Detail = StatsCalculator::Detail;
StatSummarizer::StatSummarizer(const StatSummarizerOptions& options) StatSummarizer::StatSummarizer(const StatSummarizerOptions& options)
: options_(options) {} : stats_calculator_(new StatsCalculator(options)) {}
StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph) StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph)
: StatSummarizer(StatSummarizerOptions()) {} : stats_calculator_(new StatsCalculator(StatSummarizerOptions())) {}
StatSummarizer::~StatSummarizer() {} StatSummarizer::~StatSummarizer() {}
void StatSummarizer::Reset() { void StatSummarizer::Validate(const std::vector<TensorDescription>* outputs,
run_total_us_.Reset();
memory_.Reset();
details_.clear();
}
void StatSummarizer::Validate(const Detail* detail,
const NodeExecStats& ns) const { const NodeExecStats& ns) const {
if (detail->outputs.size() != ns.output_size()) { if (outputs->size() != ns.output_size()) {
LOG(WARNING) << "Number of outputs changed between runs for '" LOG(WARNING) << "Number of outputs changed between runs for '"
<< ns.node_name() << "' - was " << detail->outputs.size() << ns.node_name() << "' - was " << outputs->size() << ", now "
<< ", now " << ns.output_size(); << ns.output_size();
} else { } else {
for (const auto& output : ns.output()) { for (const auto& output : ns.output()) {
const int32 slot = output.slot(); const int32 slot = output.slot();
@ -58,7 +54,7 @@ void StatSummarizer::Validate(const Detail* detail,
// This is not a hard error for Switch ops, so just pass. // This is not a hard error for Switch ops, so just pass.
continue; continue;
} }
const auto& stored = detail->outputs[slot]; const auto& stored = (*outputs)[slot];
const auto& current = output.tensor_description(); const auto& current = output.tensor_description();
bool do_tensors_match = bool do_tensors_match =
@ -129,6 +125,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
int64 first_node_start_us = int64 first_node_start_us =
step_stats.dev_stats(0).node_stats(0).all_start_micros(); step_stats.dev_stats(0).node_stats(0).all_start_micros();
std::map<std::string, Detail> details;
int node_num = 0; int node_num = 0;
for (const auto& ds : step_stats.dev_stats()) { for (const auto& ds : step_stats.dev_stats()) {
@ -172,7 +169,10 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
++node_num; ++node_num;
const int64 curr_time = ns.all_end_rel_micros(); const int64 curr_time = ns.all_end_rel_micros();
curr_total_us += curr_time; curr_total_us += curr_time;
auto result = details_.emplace(name, Detail()); auto result = details.emplace(name, Detail());
auto output_result =
outputs_.emplace(name, std::vector<TensorDescription>());
std::vector<TensorDescription>* outputs = &(output_result.first->second);
Detail* detail = &(result.first->second); Detail* detail = &(result.first->second);
detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us); detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us);
@ -185,16 +185,15 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
detail->run_order = node_num; detail->run_order = node_num;
detail->outputs.resize(ns.output_size()); outputs->resize(ns.output_size());
for (const auto& output : ns.output()) { for (const auto& output : ns.output()) {
const int32 slot = output.slot(); const int32 slot = output.slot();
if ((slot < 0) || (slot >= ns.output_size())) { if ((slot < 0) || (slot >= ns.output_size())) {
// This is not a hard error for Switch ops, so just pass. // This is not a hard error for Switch ops, so just pass.
continue; continue;
} }
detail->outputs[slot] = output.tensor_description(); (*outputs)[slot] = output.tensor_description();
} }
detail->times_called = 0; detail->times_called = 0;
} }
@ -207,273 +206,22 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
mem_total += curr_node_mem; mem_total += curr_node_mem;
++detail->times_called; ++detail->times_called;
stats_calculator_->UpdateDetails(details);
Validate(detail, ns); Validate(outputs, ns);
} }
} }
run_total_us_.UpdateStat(curr_total_us); stats_calculator_->UpdateRunTotalUs(curr_total_us);
memory_.UpdateStat(mem_total); stats_calculator_->UpdateMemoryUsed(mem_total);
} }
std::string StatSummarizer::ShortSummary() const {
std::stringstream stream;
stream << "Timings (microseconds): ";
run_total_us_.OutputToStream(&stream);
stream << std::endl;
stream << "Memory (bytes): ";
memory_.OutputToStream(&stream);
stream << std::endl;
stream << details_.size() << " nodes observed" << std::endl;
return stream.str();
}
std::ostream& InitField(std::ostream& stream, int width) {
stream << "\t" << std::right << std::setw(width) << std::fixed
<< std::setprecision(3);
return stream;
}
std::string StatSummarizer::HeaderString(const string& title) const {
std::stringstream stream;
stream << "============================== " << title
<< " ==============================" << std::endl;
InitField(stream, 24) << "[node type]";
InitField(stream, 9) << "[start]";
InitField(stream, 9) << "[first]";
InitField(stream, 9) << "[avg ms]";
InitField(stream, 8) << "[%]";
InitField(stream, 8) << "[cdf%]";
InitField(stream, 10) << "[mem KB]";
InitField(stream, 9) << "[times called]";
stream << "\t"
<< "[Name]";
return stream.str();
}
std::string StatSummarizer::ColumnString(const Detail& detail,
const int64 cumulative_stat_on_node,
const Stat<int64>& stat) const {
const double start_ms = detail.start_us.avg() / 1000.0;
const double first_time_ms = detail.rel_end_us.first() / 1000.0;
const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
const int64 times_called = detail.times_called / num_runs();
std::stringstream stream;
InitField(stream, 24) << detail.type;
InitField(stream, 9) << start_ms;
InitField(stream, 9) << first_time_ms;
InitField(stream, 9) << avg_time_ms;
InitField(stream, 7) << percentage << "%";
InitField(stream, 7) << cdf_percentage << "%";
InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
InitField(stream, 9) << times_called;
stream << "\t" << detail.name;
return stream.str();
}
void StatSummarizer::OrderNodesByMetric(
SortingMetric metric, std::vector<const Detail*>* details) const {
std::priority_queue<std::pair<string, const Detail*>> sorted_list;
const int num_nodes = details_.size();
for (const auto& det : details_) {
const Detail* detail = &(det.second);
std::stringstream stream;
stream << std::setw(20) << std::right << std::setprecision(10)
<< std::fixed;
switch (metric) {
case BY_NAME:
stream << detail->name;
break;
case BY_RUN_ORDER:
stream << num_nodes - detail->run_order;
break;
case BY_TIME:
stream << detail->rel_end_us.avg();
break;
case BY_MEMORY:
stream << detail->mem_used.avg();
break;
case BY_TYPE:
stream << detail->type;
break;
default:
stream << "";
break;
}
sorted_list.emplace(stream.str(), detail);
}
while (!sorted_list.empty()) {
auto entry = sorted_list.top();
sorted_list.pop();
details->push_back(entry.second);
}
}
void StatSummarizer::ComputeStatsByType(
std::map<string, int64>* node_type_map_count,
std::map<string, int64>* node_type_map_time,
std::map<string, int64>* node_type_map_memory,
std::map<string, int64>* node_type_map_times_called,
int64* accumulated_us) const {
int64 run_count = run_total_us_.count();
for (const auto& det : details_) {
const string node_name = det.first;
const Detail& detail = det.second;
int64 curr_time_val =
static_cast<int64>(detail.rel_end_us.sum() / run_count);
*accumulated_us += curr_time_val;
int64 curr_memory_val = detail.mem_used.newest();
const string& node_type = detail.type;
(*node_type_map_count)[node_type] += 1;
(*node_type_map_time)[node_type] += curr_time_val;
(*node_type_map_memory)[node_type] += curr_memory_val;
(*node_type_map_times_called)[node_type] += detail.times_called / run_count;
}
}
std::string StatSummarizer::GetStatsByNodeType() const {
std::stringstream stream;
stream << "============================== Summary by node type "
"=============================="
<< std::endl;
LOG(INFO) << "Number of nodes executed: " << details_.size();
std::map<string, int64> node_type_map_count;
std::map<string, int64> node_type_map_time;
std::map<string, int64> node_type_map_memory;
std::map<string, int64> node_type_map_times_called;
int64 accumulated_us = 0;
ComputeStatsByType(&node_type_map_count, &node_type_map_time,
&node_type_map_memory, &node_type_map_times_called,
&accumulated_us);
// Sort them.
std::priority_queue<std::pair<int64, std::pair<string, int64>>> timings;
for (const auto& node_type : node_type_map_time) {
const int64 mem_used = node_type_map_memory[node_type.first];
timings.emplace(node_type.second,
std::pair<string, int64>(node_type.first, mem_used));
}
InitField(stream, 24) << "[Node type]";
InitField(stream, 9) << "[count]";
InitField(stream, 10) << "[avg ms]";
InitField(stream, 11) << "[avg %]";
InitField(stream, 11) << "[cdf %]";
InitField(stream, 10) << "[mem KB]";
InitField(stream, 10) << "[times called]";
stream << std::endl;
float cdf = 0.0f;
while (!timings.empty()) {
auto entry = timings.top();
timings.pop();
const string node_type = entry.second.first;
const float memory = entry.second.second / 1000.0f;
const int64 node_type_total_us = entry.first;
const float time_per_run_ms = node_type_total_us / 1000.0f;
const float percentage =
((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
cdf += percentage;
InitField(stream, 24) << node_type;
InitField(stream, 9) << node_type_map_count[node_type];
InitField(stream, 10) << time_per_run_ms;
InitField(stream, 10) << percentage << "%";
InitField(stream, 10) << cdf << "%";
InitField(stream, 10) << memory;
InitField(stream, 9) << node_type_map_times_called[node_type];
stream << std::endl;
}
stream << std::endl;
return stream.str();
}
std::string StatSummarizer::GetStatsByMetric(const string& title,
SortingMetric sorting_metric,
int num_stats) const {
std::vector<const Detail*> details;
OrderNodesByMetric(sorting_metric, &details);
double cumulative_stat_on_node = 0;
std::stringstream stream;
stream << HeaderString(title) << std::endl;
int stat_num = 0;
for (auto detail : details) {
++stat_num;
if (num_stats > 0 && stat_num > num_stats) {
break;
}
// TODO(andrewharp): Make this keep track of the particular metric for cdf.
cumulative_stat_on_node += detail->rel_end_us.sum();
stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
<< std::endl;
}
stream << std::endl;
return stream.str();
}
std::string StatSummarizer::GetOutputString() const {
std::stringstream stream;
if (options_.show_run_order) {
stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
options_.run_order_limit);
}
if (options_.show_time) {
stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
options_.time_limit);
}
if (options_.show_memory) {
stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
options_.memory_limit);
}
if (options_.show_type) {
stream << GetStatsByNodeType();
}
if (options_.show_summary) {
stream << ShortSummary() << std::endl;
}
return stream.str();
}
void StatSummarizer::PrintStepStats() const {
string output = GetOutputString();
std::istringstream iss(output);
for (std::string line; std::getline(iss, line);) {
LOG(INFO) << line;
}
}
void StatSummarizer::PrintOutputs() const { void StatSummarizer::PrintOutputs() const {
std::priority_queue< std::priority_queue<
std::pair<int64, const std::pair<const std::string, Detail>*>> std::pair<int64, const std::pair<const std::string, Detail>*>>
timings; timings;
for (const auto& entry : details_) { for (const auto& entry : stats_calculator_->GetDetails()) {
timings.emplace(-entry.second.start_us.avg(), &entry); timings.emplace(-entry.second.start_us.avg(), &entry);
} }
@ -481,10 +229,10 @@ void StatSummarizer::PrintOutputs() const {
while (!timings.empty()) { while (!timings.empty()) {
auto entry = timings.top(); auto entry = timings.top();
timings.pop(); timings.pop();
const Detail& detail = entry.second->second;
std::stringstream stream; std::stringstream stream;
stream << entry.second->first << "\t" << detail.outputs.size(); const auto detail_outputs = outputs_.at(entry.second->first);
for (const auto& tensor : detail.outputs) { stream << entry.second->first << "\t" << detail_outputs.size();
for (const auto& tensor : detail_outputs) {
stream << "\t" << DataTypeString(tensor.dtype()); stream << "\t" << DataTypeString(tensor.dtype());
stream << "\t" << tensor.shape().dim_size(); stream << "\t" << tensor.shape().dim_size();
for (const auto& d : tensor.shape().dim()) { for (const auto& d : tensor.shape().dim()) {

View File

@ -13,20 +13,23 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ #ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
#define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ #define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
#include <stdlib.h> #include <stdlib.h>
#include <cmath> #include <cmath>
#include <limits> #include <limits>
#include <map> #include <map>
#include <memory>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/stat_summarizer_options.h"
#include "tensorflow/core/util/stats_calculator.h"
namespace tensorflow { namespace tensorflow {
@ -34,103 +37,6 @@ class GraphDef;
class StepStats; class StepStats;
class NodeExecStats; class NodeExecStats;
template <typename ValueType, typename HighPrecisionValueType = double>
class Stat {
public:
void UpdateStat(ValueType v) {
if (count_ == 0) {
first_ = v;
}
newest_ = v;
max_ = std::max(v, max_);
min_ = std::min(v, min_);
++count_;
sum_ += v;
squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
}
void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
bool empty() const { return count_ == 0; }
ValueType first() const { return first_; }
ValueType newest() const { return newest_; }
ValueType max() const { return max_; }
ValueType min() const { return min_; }
int64 count() const { return count_; }
ValueType sum() const { return sum_; }
HighPrecisionValueType squared_sum() const { return squared_sum_; }
bool all_same() const { return (count_ == 0 || min_ == max_); }
HighPrecisionValueType avg() const {
return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
: static_cast<HighPrecisionValueType>(sum_) / count_;
}
ValueType std_deviation() const {
return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
}
void OutputToStream(std::ostream* stream) const {
if (empty()) {
*stream << "count=0";
} else if (all_same()) {
*stream << "count=" << count_ << " curr=" << newest_;
if (count_ > 1) *stream << "(all same)";
} else {
*stream << "count=" << count_ << " first=" << first_
<< " curr=" << newest_ << " min=" << min_ << " max=" << max_
<< " avg=" << avg() << " std=" << std_deviation();
}
}
friend std::ostream& operator<<(std::ostream& stream,
const Stat<ValueType>& stat) {
stat.OutputToStream(&stream);
return stream;
}
private:
ValueType first_ = 0;
ValueType newest_ = 0;
ValueType max_ = std::numeric_limits<ValueType>::min();
ValueType min_ = std::numeric_limits<ValueType>::max();
int64 count_ = 0;
ValueType sum_ = 0;
HighPrecisionValueType squared_sum_ = 0;
};
// Used to control the output of the statistics summarizer;
class StatSummarizerOptions {
public:
StatSummarizerOptions()
: show_run_order(true),
run_order_limit(0),
show_time(true),
time_limit(10),
show_memory(true),
memory_limit(10),
show_type(true),
show_summary(true) {}
bool show_run_order;
int run_order_limit;
bool show_time;
int time_limit;
bool show_memory;
int memory_limit;
bool show_type;
bool show_summary;
};
// A StatSummarizer assists in performance analysis of Graph executions. // A StatSummarizer assists in performance analysis of Graph executions.
// //
// It summarizes time spent executing (on GPU/CPU), memory used etc. across // It summarizes time spent executing (on GPU/CPU), memory used etc. across
@ -140,14 +46,6 @@ class StatSummarizerOptions {
// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage. // See tensorflow/tools/benchmark/benchmark_model.cc for an example usage.
class StatSummarizer { class StatSummarizer {
public: public:
enum SortingMetric {
BY_NAME,
BY_RUN_ORDER,
BY_TIME,
BY_MEMORY,
BY_TYPE,
};
explicit StatSummarizer(const StatSummarizerOptions& options); explicit StatSummarizer(const StatSummarizerOptions& options);
// Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The // Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The
@ -161,65 +59,51 @@ class StatSummarizer {
// Returns a string detailing the accumulated runtime stats in a tab-separated // Returns a string detailing the accumulated runtime stats in a tab-separated
// format which can be pasted into a spreadsheet for further analysis. // format which can be pasted into a spreadsheet for further analysis.
std::string GetOutputString() const; std::string GetOutputString() const {
return stats_calculator_->GetOutputString();
}
std::string ShortSummary() const; std::string ShortSummary() const {
return stats_calculator_->GetShortSummary();
}
// Prints the string returned by GetOutputString(). // Prints the string returned by GetOutputString().
void PrintStepStats() const; void PrintStepStats() const { stats_calculator_->PrintStepStats(); }
// Prints the output tensor sizes and types for each node. // Prints the output tensor sizes and types for each node.
void PrintOutputs() const; void PrintOutputs() const;
void ComputeStatsByType(std::map<string, int64>* node_type_map_count, void ComputeStatsByType(
std::map<string, int64>* node_type_map_time, std::map<std::string, int64_t>* node_type_map_count,
std::map<string, int64>* node_type_map_memory, std::map<std::string, int64_t>* node_type_map_time,
std::map<string, int64>* node_type_map_times_called, std::map<std::string, int64_t>* node_type_map_memory,
int64* accumulated_us) const; std::map<std::string, int64_t>* node_type_map_times_called,
int64_t* accumulated_us) const {
stats_calculator_->ComputeStatsByType(
node_type_map_count, node_type_map_time, node_type_map_memory,
node_type_map_times_called, accumulated_us);
}
std::string GetStatsByNodeType() const; std::string GetStatsByNodeType() const {
return stats_calculator_->GetStatsByNodeType();
}
std::string GetStatsByMetric(const string& title, std::string GetStatsByMetric(const string& title,
SortingMetric sorting_metric, StatsCalculator::SortingMetric sorting_metric,
int num_stats) const; int num_stats) const {
return stats_calculator_->GetStatsByMetric(title, sorting_metric,
void Reset(); num_stats);
}
// Returns number of runs.
int num_runs() const { return static_cast<int>(run_total_us_.count()); }
// Returns stats of total microseconds spent by all nodes in each run.
const Stat<int64>& run_total_us() const { return run_total_us_; }
private: private:
struct Detail { void Validate(const std::vector<TensorDescription>* outputs,
string name; const NodeExecStats& ns) const;
string type;
int64 run_order;
Stat<int64> start_us;
Stat<int64> rel_end_us;
Stat<int64> mem_used;
std::vector<TensorDescription> outputs;
int64 times_called;
};
void Validate(const Detail* detail, const NodeExecStats& ns) const; std::map<std::string, std::vector<TensorDescription> > outputs_;
void OrderNodesByMetric(SortingMetric sorting_metric, std::unique_ptr<StatsCalculator> stats_calculator_;
std::vector<const Detail*>* details) const;
std::string HeaderString(const string& title) const;
std::string ColumnString(const Detail& detail,
const int64 cumulative_stat_on_node,
const Stat<int64>& stat) const;
Stat<int64> run_total_us_;
Stat<int64> memory_;
std::map<std::string, Detail> details_;
StatSummarizerOptions options_;
}; };
} // namespace tensorflow } // namespace tensorflow
#endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ #endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_

View File

@ -0,0 +1,43 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
namespace tensorflow {
// Used to control the output of the statistics summarizer;
class StatSummarizerOptions {
public:
StatSummarizerOptions()
: show_run_order(true),
run_order_limit(0),
show_time(true),
time_limit(10),
show_memory(true),
memory_limit(10),
show_type(true),
show_summary(true) {}
bool show_run_order;
int run_order_limit;
bool show_time;
int time_limit;
bool show_memory;
int memory_limit;
bool show_type;
bool show_summary;
};
} // namespace tensorflow
#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_

View File

@ -0,0 +1,289 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/util/stats_calculator.h"
#include <iomanip>
#include <map>
#include <queue>
#include <sstream>
#include <string>
#include "tensorflow/core/platform/logging.h"
namespace tensorflow {
StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
: options_(options) {}
std::string StatsCalculator::GetShortSummary() const {
std::stringstream stream;
stream << "Timings (microseconds): ";
run_total_us_.OutputToStream(&stream);
stream << std::endl;
stream << "Memory (bytes): ";
memory_.OutputToStream(&stream);
stream << std::endl;
stream << details_.size() << " nodes observed" << std::endl;
return stream.str();
}
std::ostream& InitField(std::ostream& stream, int width) {
stream << "\t" << std::right << std::setw(width) << std::fixed
<< std::setprecision(3);
return stream;
}
std::string StatsCalculator::HeaderString(const std::string& title) const {
std::stringstream stream;
stream << "============================== " << title
<< " ==============================" << std::endl;
InitField(stream, 24) << "[node type]";
InitField(stream, 9) << "[start]";
InitField(stream, 9) << "[first]";
InitField(stream, 9) << "[avg ms]";
InitField(stream, 8) << "[%]";
InitField(stream, 8) << "[cdf%]";
InitField(stream, 10) << "[mem KB]";
InitField(stream, 9) << "[times called]";
stream << "\t"
<< "[Name]";
return stream.str();
}
std::string StatsCalculator::ColumnString(const Detail& detail,
const int64_t cumulative_stat_on_node,
const Stat<int64_t>& stat) const {
const double start_ms = detail.start_us.avg() / 1000.0;
const double first_time_ms = detail.rel_end_us.first() / 1000.0;
const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
const int64_t times_called = detail.times_called / num_runs();
std::stringstream stream;
InitField(stream, 24) << detail.type;
InitField(stream, 9) << start_ms;
InitField(stream, 9) << first_time_ms;
InitField(stream, 9) << avg_time_ms;
InitField(stream, 7) << percentage << "%";
InitField(stream, 7) << cdf_percentage << "%";
InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
InitField(stream, 9) << times_called;
stream << "\t" << detail.name;
return stream.str();
}
void StatsCalculator::OrderNodesByMetric(
SortingMetric metric, std::vector<const Detail*>* details) const {
std::priority_queue<std::pair<string, const Detail*>> sorted_list;
const int num_nodes = details_.size();
for (const auto& det : details_) {
const Detail* detail = &(det.second);
std::stringstream stream;
stream << std::setw(20) << std::right << std::setprecision(10)
<< std::fixed;
switch (metric) {
case BY_NAME:
stream << detail->name;
break;
case BY_RUN_ORDER:
stream << num_nodes - detail->run_order;
break;
case BY_TIME:
stream << detail->rel_end_us.avg();
break;
case BY_MEMORY:
stream << detail->mem_used.avg();
break;
case BY_TYPE:
stream << detail->type;
break;
default:
stream << "";
break;
}
sorted_list.emplace(stream.str(), detail);
}
while (!sorted_list.empty()) {
auto entry = sorted_list.top();
sorted_list.pop();
details->push_back(entry.second);
}
}
void StatsCalculator::ComputeStatsByType(
std::map<std::string, int64_t>* node_type_map_count,
std::map<std::string, int64_t>* node_type_map_time,
std::map<std::string, int64_t>* node_type_map_memory,
std::map<std::string, int64_t>* node_type_map_times_called,
int64_t* accumulated_us) const {
int64_t run_count = run_total_us_.count();
for (const auto& det : details_) {
const string node_name = det.first;
const Detail& detail = det.second;
int64_t curr_time_val =
static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
*accumulated_us += curr_time_val;
int64_t curr_memory_val = detail.mem_used.newest();
const string& node_type = detail.type;
(*node_type_map_count)[node_type] += 1;
(*node_type_map_time)[node_type] += curr_time_val;
(*node_type_map_memory)[node_type] += curr_memory_val;
(*node_type_map_times_called)[node_type] += detail.times_called / run_count;
}
}
std::string StatsCalculator::GetStatsByNodeType() const {
std::stringstream stream;
stream << "============================== Summary by node type "
"=============================="
<< std::endl;
LOG(INFO) << "Number of nodes executed: " << details_.size();
std::map<std::string, int64_t> node_type_map_count;
std::map<std::string, int64_t> node_type_map_time;
std::map<std::string, int64_t> node_type_map_memory;
std::map<std::string, int64_t> node_type_map_times_called;
int64_t accumulated_us = 0;
ComputeStatsByType(&node_type_map_count, &node_type_map_time,
&node_type_map_memory, &node_type_map_times_called,
&accumulated_us);
// Sort them.
std::priority_queue<std::pair<int64_t, std::pair<string, int64_t>>> timings;
for (const auto& node_type : node_type_map_time) {
const int64_t mem_used = node_type_map_memory[node_type.first];
timings.emplace(node_type.second,
std::pair<string, int64_t>(node_type.first, mem_used));
}
InitField(stream, 24) << "[Node type]";
InitField(stream, 9) << "[count]";
InitField(stream, 10) << "[avg ms]";
InitField(stream, 11) << "[avg %]";
InitField(stream, 11) << "[cdf %]";
InitField(stream, 10) << "[mem KB]";
InitField(stream, 10) << "[times called]";
stream << std::endl;
float cdf = 0.0f;
while (!timings.empty()) {
auto entry = timings.top();
timings.pop();
const string node_type = entry.second.first;
const float memory = entry.second.second / 1000.0f;
const int64_t node_type_total_us = entry.first;
const float time_per_run_ms = node_type_total_us / 1000.0f;
const float percentage =
((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
cdf += percentage;
InitField(stream, 24) << node_type;
InitField(stream, 9) << node_type_map_count[node_type];
InitField(stream, 10) << time_per_run_ms;
InitField(stream, 10) << percentage << "%";
InitField(stream, 10) << cdf << "%";
InitField(stream, 10) << memory;
InitField(stream, 9) << node_type_map_times_called[node_type];
stream << std::endl;
}
stream << std::endl;
return stream.str();
}
std::string StatsCalculator::GetStatsByMetric(const std::string& title,
SortingMetric sorting_metric,
int num_stats) const {
std::vector<const Detail*> details;
OrderNodesByMetric(sorting_metric, &details);
double cumulative_stat_on_node = 0;
std::stringstream stream;
stream << HeaderString(title) << std::endl;
int stat_num = 0;
for (auto detail : details) {
++stat_num;
if (num_stats > 0 && stat_num > num_stats) {
break;
}
// TODO(andrewharp): Make this keep track of the particular metric for cdf.
cumulative_stat_on_node += detail->rel_end_us.sum();
stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
<< std::endl;
}
stream << std::endl;
return stream.str();
}
std::string StatsCalculator::GetOutputString() const {
std::stringstream stream;
if (options_.show_run_order) {
stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
options_.run_order_limit);
}
if (options_.show_time) {
stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
options_.time_limit);
}
if (options_.show_memory) {
stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
options_.memory_limit);
}
if (options_.show_type) {
stream << GetStatsByNodeType();
}
if (options_.show_summary) {
stream << GetShortSummary() << std::endl;
}
return stream.str();
}
void StatsCalculator::PrintStepStats() const {
string output = GetOutputString();
std::istringstream iss(output);
for (std::string line; std::getline(iss, line);) {
LOG(INFO) << line;
}
}
void StatsCalculator::UpdateDetails(
const std::map<std::string, Detail>& details) {
details_.insert(details.begin(), details.end());
}
} // namespace tensorflow

View File

@ -0,0 +1,189 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
#define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
#include <stdlib.h>
#include <cmath>
#include <limits>
#include <map>
#include <sstream>
#include <string>
#include <vector>
#include "tensorflow/core/util/stat_summarizer_options.h"
namespace tensorflow {
template <typename ValueType, typename HighPrecisionValueType = double>
class Stat {
public:
void UpdateStat(ValueType v) {
if (count_ == 0) {
first_ = v;
}
newest_ = v;
max_ = std::max(v, max_);
min_ = std::min(v, min_);
++count_;
sum_ += v;
squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
}
void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
bool empty() const { return count_ == 0; }
ValueType first() const { return first_; }
ValueType newest() const { return newest_; }
ValueType max() const { return max_; }
ValueType min() const { return min_; }
int64_t count() const { return count_; }
ValueType sum() const { return sum_; }
HighPrecisionValueType squared_sum() const { return squared_sum_; }
bool all_same() const { return (count_ == 0 || min_ == max_); }
HighPrecisionValueType avg() const {
return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
: static_cast<HighPrecisionValueType>(sum_) / count_;
}
ValueType std_deviation() const {
return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
}
void OutputToStream(std::ostream* stream) const {
if (empty()) {
*stream << "count=0";
} else if (all_same()) {
*stream << "count=" << count_ << " curr=" << newest_;
if (count_ > 1) *stream << "(all same)";
} else {
*stream << "count=" << count_ << " first=" << first_
<< " curr=" << newest_ << " min=" << min_ << " max=" << max_
<< " avg=" << avg() << " std=" << std_deviation();
}
}
friend std::ostream& operator<<(std::ostream& stream,
const Stat<ValueType>& stat) {
stat.OutputToStream(&stream);
return stream;
}
private:
ValueType first_ = 0;
ValueType newest_ = 0;
ValueType max_ = std::numeric_limits<ValueType>::min();
ValueType min_ = std::numeric_limits<ValueType>::max();
int64_t count_ = 0;
ValueType sum_ = 0;
HighPrecisionValueType squared_sum_ = 0;
};
// A StatsCalculator assists in performance analysis of Graph executions.
//
// It summarizes time spent executing (on GPU/CPU), memory used etc for
// graph execution.
//
// For example usage see StatsSummarizer.
class StatsCalculator {
public:
enum SortingMetric {
BY_NAME,
BY_RUN_ORDER,
BY_TIME,
BY_MEMORY,
BY_TYPE,
};
explicit StatsCalculator(const StatSummarizerOptions& options);
// Returns a string detailing the accumulated runtime stats in a tab-separated
// format which can be pasted into a spreadsheet for further analysis.
std::string GetOutputString() const;
std::string GetShortSummary() const;
// Prints the string returned by GetOutputString().
void PrintStepStats() const;
void ComputeStatsByType(
std::map<std::string, int64_t>* node_type_map_count,
std::map<std::string, int64_t>* node_type_map_time,
std::map<std::string, int64_t>* node_type_map_memory,
std::map<std::string, int64_t>* node_type_map_times_called,
int64_t* accumulated_us) const;
std::string GetStatsByNodeType() const;
std::string GetStatsByMetric(const std::string& title,
SortingMetric sorting_metric,
int num_stats) const;
// Returns number of runs.
int num_runs() const { return static_cast<int>(run_total_us_.count()); }
// Returns stats of total microseconds spent by all nodes in each run.
const Stat<int64_t>& run_total_us() const { return run_total_us_; }
void UpdateRunTotalUs(int64_t run_total_us) {
run_total_us_.UpdateStat(run_total_us);
}
void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); }
struct Detail {
std::string name;
std::string type;
int64_t run_order;
Stat<int64_t> start_us;
Stat<int64_t> rel_end_us;
Stat<int64_t> mem_used;
int64_t times_called;
};
const std::map<std::string, Detail>& GetDetails() const { return details_; }
void UpdateDetails(const std::map<std::string, Detail>& details);
private:
void OrderNodesByMetric(SortingMetric sorting_metric,
std::vector<const Detail*>* details) const;
std::string HeaderString(const std::string& title) const;
std::string ColumnString(const Detail& detail,
const int64_t cumulative_stat_on_node,
const Stat<int64_t>& stat) const;
Stat<int64_t> run_total_us_;
Stat<int64_t> memory_;
std::map<std::string, Detail> details_;
StatSummarizerOptions options_;
};
} // namespace tensorflow
#endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_

View File

@ -73,7 +73,7 @@ void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
return ss; return ss;
} }
} }
%include "tensorflow/core/util/stat_summarizer_options.h"
%include "tensorflow/core/util/stat_summarizer.h" %include "tensorflow/core/util/stat_summarizer.h"
%unignoreall %unignoreall

View File

@ -667,12 +667,12 @@ int Main(int argc, char** argv) {
output_prefix, benchmark_name, "meta-init-plus-first-inference", 1, output_prefix, benchmark_name, "meta-init-plus-first-inference", 1,
initialization_time_s + (warmup_time_us / 1000000.0) / warmup_runs); initialization_time_s + (warmup_time_us / 1000000.0) / warmup_runs);
std::map<string, int64> node_type_map_count; std::map<std::string, int64_t> node_type_map_count;
std::map<string, int64> node_type_map_time; std::map<std::string, int64_t> node_type_map_time;
std::map<string, int64> node_type_map_memory; std::map<std::string, int64_t> node_type_map_memory;
std::map<string, int64> node_type_map_times_called; std::map<std::string, int64_t> node_type_map_times_called;
int64 accumulated_us; int64_t accumulated_us;
stats->ComputeStatsByType(&node_type_map_count, &node_type_map_time, stats->ComputeStatsByType(&node_type_map_count, &node_type_map_time,
&node_type_map_memory, &node_type_map_memory,
&node_type_map_times_called, &accumulated_us); &node_type_map_times_called, &accumulated_us);