Refactor StatSummarizer extract common functionality without proto dependencies.
PiperOrigin-RevId: 197816405
This commit is contained in:
parent
dac1f12402
commit
2307db76a2
@ -16,6 +16,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/contrib/android/jni/run_stats_jni.h"
|
#include "tensorflow/contrib/android/jni/run_stats_jni.h"
|
||||||
|
|
||||||
#include <jni.h>
|
#include <jni.h>
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
@ -73,7 +74,8 @@ JNIEXPORT jstring RUN_STATS_METHOD(summary)(JNIEnv* env, jclass clazz,
|
|||||||
StatSummarizer* s = requireHandle(env, handle);
|
StatSummarizer* s = requireHandle(env, handle);
|
||||||
if (s == nullptr) return nullptr;
|
if (s == nullptr) return nullptr;
|
||||||
std::stringstream ret;
|
std::stringstream ret;
|
||||||
ret << s->GetStatsByMetric("Top 10 CPU", StatSummarizer::BY_TIME, 10)
|
ret << s->GetStatsByMetric("Top 10 CPU", tensorflow::StatsCalculator::BY_TIME,
|
||||||
|
10)
|
||||||
<< s->GetStatsByNodeType() << s->ShortSummary();
|
<< s->GetStatsByNodeType() << s->ShortSummary();
|
||||||
return env->NewStringUTF(ret.str().c_str());
|
return env->NewStringUTF(ret.str().c_str());
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,33 @@ cc_library(
|
|||||||
copts = common_copts,
|
copts = common_copts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "profile_summarizer",
|
||||||
|
srcs = ["profile_summarizer.cc"],
|
||||||
|
hdrs = ["profile_summarizer.h"],
|
||||||
|
deps = [
|
||||||
|
":profiler",
|
||||||
|
"//tensorflow/contrib/lite:framework",
|
||||||
|
"//tensorflow/contrib/lite/schema:schema_fbs",
|
||||||
|
"//tensorflow/core:stats_calculator_portable",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "profile_summarizer_test",
|
||||||
|
srcs = ["profile_summarizer_test.cc"],
|
||||||
|
deps = [
|
||||||
|
":profile_summarizer",
|
||||||
|
"//tensorflow/contrib/lite:framework",
|
||||||
|
"//tensorflow/contrib/lite:schema_fbs_version",
|
||||||
|
"//tensorflow/contrib/lite/kernels:builtin_ops",
|
||||||
|
"//tensorflow/contrib/lite/kernels:kernel_util",
|
||||||
|
"//tensorflow/contrib/lite/kernels:test_util",
|
||||||
|
"//tensorflow/contrib/lite/testing:util",
|
||||||
|
"@com_google_googletest//:gtest",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_test(
|
cc_test(
|
||||||
name = "profile_buffer_test",
|
name = "profile_buffer_test",
|
||||||
srcs = ["profile_buffer_test.cc"],
|
srcs = ["profile_buffer_test.cc"],
|
||||||
|
140
tensorflow/contrib/lite/profiling/profile_summarizer.cc
Normal file
140
tensorflow/contrib/lite/profiling/profile_summarizer.cc
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/lite/schema/schema_generated.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
namespace profiling {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using Detail = tensorflow::StatsCalculator::Detail;
|
||||||
|
|
||||||
|
struct OperatorDetails {
|
||||||
|
string name;
|
||||||
|
std::vector<string> inputs;
|
||||||
|
std::vector<string> outputs;
|
||||||
|
};
|
||||||
|
|
||||||
|
string GetTensorName(const tflite::Interpreter& interpreter, int tensor_index) {
|
||||||
|
const auto tensor = interpreter.tensor(tensor_index);
|
||||||
|
if (tensor == nullptr || tensor->name == nullptr) {
|
||||||
|
return "Unknown";
|
||||||
|
}
|
||||||
|
return tensor->name;
|
||||||
|
}
|
||||||
|
std::vector<string> GetTensorNames(const tflite::Interpreter& interpreter,
|
||||||
|
const TfLiteIntArray* tensor_indices) {
|
||||||
|
std::vector<string> tensors;
|
||||||
|
tensors.reserve(tensor_indices->size);
|
||||||
|
for (int i = 0; i < tensor_indices->size; i++) {
|
||||||
|
tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
|
||||||
|
}
|
||||||
|
return tensors;
|
||||||
|
}
|
||||||
|
|
||||||
|
string ToString(const std::vector<string>& str_vector) {
|
||||||
|
std::stringstream stream;
|
||||||
|
stream << "[";
|
||||||
|
bool first = true;
|
||||||
|
for (const auto& s : str_vector) {
|
||||||
|
if (!first) {
|
||||||
|
stream << ", ";
|
||||||
|
} else {
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
stream << s;
|
||||||
|
}
|
||||||
|
stream << "]";
|
||||||
|
return stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
|
||||||
|
int node_index) {
|
||||||
|
auto node_reg = interpreter.node_and_registration(node_index);
|
||||||
|
auto inputs = node_reg->first.inputs;
|
||||||
|
auto outputs = node_reg->first.outputs;
|
||||||
|
int code = node_reg->second.builtin_code;
|
||||||
|
const char* op_name = nullptr;
|
||||||
|
if (code == tflite::BuiltinOperator_CUSTOM) {
|
||||||
|
const char* custom_name = node_reg->second.custom_name;
|
||||||
|
op_name = custom_name ? custom_name : "UnknownCustomOp";
|
||||||
|
} else {
|
||||||
|
op_name = tflite::EnumNamesBuiltinOperator()[code];
|
||||||
|
}
|
||||||
|
OperatorDetails details;
|
||||||
|
details.name = op_name;
|
||||||
|
details.inputs = GetTensorNames(interpreter, inputs);
|
||||||
|
details.outputs = GetTensorNames(interpreter, outputs);
|
||||||
|
return details;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
ProfileSummarizer::ProfileSummarizer()
|
||||||
|
: stats_calculator_(new ::tensorflow::StatsCalculator(
|
||||||
|
tensorflow::StatSummarizerOptions())) {}
|
||||||
|
|
||||||
|
void ProfileSummarizer::ProcessProfiles(
|
||||||
|
const std::vector<const ProfileEvent*>& profile_stats,
|
||||||
|
const tflite::Interpreter& interpreter) {
|
||||||
|
std::vector<const ProfileEvent*> events;
|
||||||
|
std::copy_if(profile_stats.begin(), profile_stats.end(),
|
||||||
|
std::back_inserter(events), [](const ProfileEvent* e) {
|
||||||
|
return e->event_type ==
|
||||||
|
ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
|
||||||
|
e->end_timestamp_us >= e->begin_timestamp_us;
|
||||||
|
});
|
||||||
|
// Sort with begin_time.
|
||||||
|
std::sort(events.begin(), events.end(),
|
||||||
|
[](const ProfileEvent* const& a, const ProfileEvent* const& b) {
|
||||||
|
return a->begin_timestamp_us < b->begin_timestamp_us;
|
||||||
|
});
|
||||||
|
if (events.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t base_start_us = events[0]->begin_timestamp_us;
|
||||||
|
int node_num = 0;
|
||||||
|
int64_t curr_total_us = 0;
|
||||||
|
std::map<std::string, Detail> details;
|
||||||
|
for (auto event : events) {
|
||||||
|
auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
|
||||||
|
auto node_name = ToString(op_details.outputs);
|
||||||
|
auto result = details.emplace(node_name, Detail());
|
||||||
|
Detail* detail = &(result.first->second);
|
||||||
|
detail->start_us.UpdateStat(event->begin_timestamp_us - base_start_us);
|
||||||
|
int64_t node_exec_time =
|
||||||
|
event->end_timestamp_us - event->begin_timestamp_us;
|
||||||
|
detail->rel_end_us.UpdateStat(node_exec_time);
|
||||||
|
curr_total_us += node_exec_time;
|
||||||
|
++node_num;
|
||||||
|
|
||||||
|
if (result.second) {
|
||||||
|
detail->name = node_name;
|
||||||
|
detail->type = op_details.name;
|
||||||
|
detail->run_order = node_num;
|
||||||
|
detail->times_called = 0;
|
||||||
|
}
|
||||||
|
++detail->times_called;
|
||||||
|
}
|
||||||
|
stats_calculator_->UpdateDetails(details);
|
||||||
|
stats_calculator_->UpdateRunTotalUs(curr_total_us);
|
||||||
|
}
|
||||||
|
} // namespace profiling
|
||||||
|
} // namespace tflite
|
58
tensorflow/contrib/lite/profiling/profile_summarizer.h
Normal file
58
tensorflow/contrib/lite/profiling/profile_summarizer.h
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
|
||||||
|
#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/lite/interpreter.h"
|
||||||
|
#include "tensorflow/contrib/lite/profiling/profiler.h"
|
||||||
|
#include "tensorflow/core/util/stats_calculator.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
namespace profiling {
|
||||||
|
|
||||||
|
// Creates a summary of operator invocations in the interpreter.
|
||||||
|
class ProfileSummarizer {
|
||||||
|
public:
|
||||||
|
ProfileSummarizer();
|
||||||
|
virtual ~ProfileSummarizer() {}
|
||||||
|
|
||||||
|
// Process profile events to update statistics for operator invocations.
|
||||||
|
void ProcessProfiles(const std::vector<const ProfileEvent*>& profile_stats,
|
||||||
|
const tflite::Interpreter& interpreter);
|
||||||
|
|
||||||
|
// Returns a string detailing the accumulated runtime stats in a tab-separated
|
||||||
|
// format which can be pasted into a spreadsheet for further analysis.
|
||||||
|
std::string GetOutputString() const {
|
||||||
|
return stats_calculator_->GetOutputString();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GetShortSummary() const {
|
||||||
|
return stats_calculator_->GetShortSummary();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prints the string returned by GetOutputString().
|
||||||
|
void PrintStepStats() const { stats_calculator_->PrintStepStats(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<tensorflow::StatsCalculator> stats_calculator_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace profiling
|
||||||
|
} // namespace tflite
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
|
116
tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
Normal file
116
tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <gmock/gmock.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include "tensorflow/contrib/lite/context.h"
|
||||||
|
#include "tensorflow/contrib/lite/kernels/kernel_util.h"
|
||||||
|
#include "tensorflow/contrib/lite/kernels/test_util.h"
|
||||||
|
#include "tensorflow/contrib/lite/model.h"
|
||||||
|
#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
|
||||||
|
#include "tensorflow/contrib/lite/testing/util.h"
|
||||||
|
#include "tensorflow/contrib/lite/version.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
namespace profiling {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
TfLiteStatus SimpleOpEval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
const TfLiteTensor* input1 = tflite::GetInput(context, node, /*index=*/0);
|
||||||
|
const TfLiteTensor* input2 = tflite::GetInput(context, node, /*index=*/1);
|
||||||
|
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
|
||||||
|
|
||||||
|
int32_t* output_data = output->data.i32;
|
||||||
|
*output_data = *(input1->data.i32) + *(input2->data.i32);
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteRegistration* RegisterSimpleOp() {
|
||||||
|
static TfLiteRegistration registration = {nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
SimpleOpEval,
|
||||||
|
tflite::BuiltinOperator_CUSTOM,
|
||||||
|
"SimpleOpEval",
|
||||||
|
1};
|
||||||
|
return ®istration;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SimpleOpModel : public SingleOpModel {
|
||||||
|
public:
|
||||||
|
void Init();
|
||||||
|
tflite::Interpreter* GetInterpreter() { return interpreter_.get(); }
|
||||||
|
void SetInputs(int32_t x, int32_t y) {
|
||||||
|
PopulateTensor(inputs_[0], {x});
|
||||||
|
PopulateTensor(inputs_[1], {y});
|
||||||
|
}
|
||||||
|
int32_t GetOutput() { return ExtractVector<int32_t>(output_)[0]; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int inputs_[2];
|
||||||
|
int output_;
|
||||||
|
};
|
||||||
|
|
||||||
|
void SimpleOpModel::Init() {
|
||||||
|
inputs_[0] = AddInput({TensorType_INT32, {1}});
|
||||||
|
inputs_[1] = AddInput({TensorType_INT32, {1}});
|
||||||
|
output_ = AddOutput({TensorType_INT32, {}});
|
||||||
|
SetCustomOp("SimpleAdd", {}, RegisterSimpleOp);
|
||||||
|
BuildInterpreter({GetShape(inputs_[0]), GetShape(inputs_[1])});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ProfileSummarizerTest, Empty) {
|
||||||
|
ProfileSummarizer summarizer;
|
||||||
|
std::string output = summarizer.GetOutputString();
|
||||||
|
EXPECT_GT(output.size(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef TFLITE_PROFILING_ENABLED
|
||||||
|
TEST(ProfileSummarizerTest, Interpreter) {
|
||||||
|
Profiler profiler;
|
||||||
|
SimpleOpModel m;
|
||||||
|
m.Init();
|
||||||
|
auto interpreter = m.GetInterpreter();
|
||||||
|
interpreter->SetProfiler(&profiler);
|
||||||
|
profiler.StartProfiling();
|
||||||
|
m.SetInputs(1, 2);
|
||||||
|
m.Invoke();
|
||||||
|
// 3 = 1 + 2
|
||||||
|
EXPECT_EQ(m.GetOutput(), 3);
|
||||||
|
profiler.StopProfiling();
|
||||||
|
ProfileSummarizer summarizer;
|
||||||
|
auto events = profiler.GetProfileEvents();
|
||||||
|
EXPECT_EQ(1, events.size());
|
||||||
|
summarizer.ProcessProfiles(profiler.GetProfileEvents(), *interpreter);
|
||||||
|
auto output = summarizer.GetOutputString();
|
||||||
|
// TODO(shashishekhar): Add a better test here.
|
||||||
|
ASSERT_TRUE(output.find("SimpleOp") != std::string::npos) << output;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace profiling
|
||||||
|
} // namespace tflite
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
::tflite::LogToStderr();
|
||||||
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
@ -827,6 +827,8 @@ tf_cuda_library(
|
|||||||
"util/sparse/group_iterator.h",
|
"util/sparse/group_iterator.h",
|
||||||
"util/sparse/sparse_tensor.h",
|
"util/sparse/sparse_tensor.h",
|
||||||
"util/stat_summarizer.h",
|
"util/stat_summarizer.h",
|
||||||
|
"util/stat_summarizer_options.h",
|
||||||
|
"util/stats_calculator.h",
|
||||||
"util/stream_executor_util.h",
|
"util/stream_executor_util.h",
|
||||||
"util/strided_slice_op.h",
|
"util/strided_slice_op.h",
|
||||||
"util/tensor_format.h",
|
"util/tensor_format.h",
|
||||||
@ -851,6 +853,16 @@ tf_cuda_library(
|
|||||||
deps = [":framework_internal"],
|
deps = [":framework_internal"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "stats_calculator_portable",
|
||||||
|
srcs = ["util/stats_calculator.cc"],
|
||||||
|
hdrs = [
|
||||||
|
"util/stat_summarizer_options.h",
|
||||||
|
"util/stats_calculator.h",
|
||||||
|
],
|
||||||
|
deps = [":platform_base"],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "overflow",
|
name = "overflow",
|
||||||
hdrs = ["util/overflow.h"],
|
hdrs = ["util/overflow.h"],
|
||||||
|
@ -31,26 +31,22 @@ limitations under the License.
|
|||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
|
using Detail = StatsCalculator::Detail;
|
||||||
|
|
||||||
StatSummarizer::StatSummarizer(const StatSummarizerOptions& options)
|
StatSummarizer::StatSummarizer(const StatSummarizerOptions& options)
|
||||||
: options_(options) {}
|
: stats_calculator_(new StatsCalculator(options)) {}
|
||||||
|
|
||||||
StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph)
|
StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph)
|
||||||
: StatSummarizer(StatSummarizerOptions()) {}
|
: stats_calculator_(new StatsCalculator(StatSummarizerOptions())) {}
|
||||||
|
|
||||||
StatSummarizer::~StatSummarizer() {}
|
StatSummarizer::~StatSummarizer() {}
|
||||||
|
|
||||||
void StatSummarizer::Reset() {
|
void StatSummarizer::Validate(const std::vector<TensorDescription>* outputs,
|
||||||
run_total_us_.Reset();
|
|
||||||
memory_.Reset();
|
|
||||||
details_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void StatSummarizer::Validate(const Detail* detail,
|
|
||||||
const NodeExecStats& ns) const {
|
const NodeExecStats& ns) const {
|
||||||
if (detail->outputs.size() != ns.output_size()) {
|
if (outputs->size() != ns.output_size()) {
|
||||||
LOG(WARNING) << "Number of outputs changed between runs for '"
|
LOG(WARNING) << "Number of outputs changed between runs for '"
|
||||||
<< ns.node_name() << "' - was " << detail->outputs.size()
|
<< ns.node_name() << "' - was " << outputs->size() << ", now "
|
||||||
<< ", now " << ns.output_size();
|
<< ns.output_size();
|
||||||
} else {
|
} else {
|
||||||
for (const auto& output : ns.output()) {
|
for (const auto& output : ns.output()) {
|
||||||
const int32 slot = output.slot();
|
const int32 slot = output.slot();
|
||||||
@ -58,7 +54,7 @@ void StatSummarizer::Validate(const Detail* detail,
|
|||||||
// This is not a hard error for Switch ops, so just pass.
|
// This is not a hard error for Switch ops, so just pass.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto& stored = detail->outputs[slot];
|
const auto& stored = (*outputs)[slot];
|
||||||
const auto& current = output.tensor_description();
|
const auto& current = output.tensor_description();
|
||||||
|
|
||||||
bool do_tensors_match =
|
bool do_tensors_match =
|
||||||
@ -129,6 +125,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
|
|||||||
|
|
||||||
int64 first_node_start_us =
|
int64 first_node_start_us =
|
||||||
step_stats.dev_stats(0).node_stats(0).all_start_micros();
|
step_stats.dev_stats(0).node_stats(0).all_start_micros();
|
||||||
|
std::map<std::string, Detail> details;
|
||||||
|
|
||||||
int node_num = 0;
|
int node_num = 0;
|
||||||
for (const auto& ds : step_stats.dev_stats()) {
|
for (const auto& ds : step_stats.dev_stats()) {
|
||||||
@ -172,7 +169,10 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
|
|||||||
++node_num;
|
++node_num;
|
||||||
const int64 curr_time = ns.all_end_rel_micros();
|
const int64 curr_time = ns.all_end_rel_micros();
|
||||||
curr_total_us += curr_time;
|
curr_total_us += curr_time;
|
||||||
auto result = details_.emplace(name, Detail());
|
auto result = details.emplace(name, Detail());
|
||||||
|
auto output_result =
|
||||||
|
outputs_.emplace(name, std::vector<TensorDescription>());
|
||||||
|
std::vector<TensorDescription>* outputs = &(output_result.first->second);
|
||||||
Detail* detail = &(result.first->second);
|
Detail* detail = &(result.first->second);
|
||||||
|
|
||||||
detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us);
|
detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us);
|
||||||
@ -185,16 +185,15 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
|
|||||||
|
|
||||||
detail->run_order = node_num;
|
detail->run_order = node_num;
|
||||||
|
|
||||||
detail->outputs.resize(ns.output_size());
|
outputs->resize(ns.output_size());
|
||||||
for (const auto& output : ns.output()) {
|
for (const auto& output : ns.output()) {
|
||||||
const int32 slot = output.slot();
|
const int32 slot = output.slot();
|
||||||
if ((slot < 0) || (slot >= ns.output_size())) {
|
if ((slot < 0) || (slot >= ns.output_size())) {
|
||||||
// This is not a hard error for Switch ops, so just pass.
|
// This is not a hard error for Switch ops, so just pass.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
detail->outputs[slot] = output.tensor_description();
|
(*outputs)[slot] = output.tensor_description();
|
||||||
}
|
}
|
||||||
|
|
||||||
detail->times_called = 0;
|
detail->times_called = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,273 +206,22 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
|
|||||||
mem_total += curr_node_mem;
|
mem_total += curr_node_mem;
|
||||||
|
|
||||||
++detail->times_called;
|
++detail->times_called;
|
||||||
|
stats_calculator_->UpdateDetails(details);
|
||||||
|
|
||||||
Validate(detail, ns);
|
Validate(outputs, ns);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
run_total_us_.UpdateStat(curr_total_us);
|
stats_calculator_->UpdateRunTotalUs(curr_total_us);
|
||||||
memory_.UpdateStat(mem_total);
|
stats_calculator_->UpdateMemoryUsed(mem_total);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string StatSummarizer::ShortSummary() const {
|
|
||||||
std::stringstream stream;
|
|
||||||
stream << "Timings (microseconds): ";
|
|
||||||
run_total_us_.OutputToStream(&stream);
|
|
||||||
stream << std::endl;
|
|
||||||
|
|
||||||
stream << "Memory (bytes): ";
|
|
||||||
memory_.OutputToStream(&stream);
|
|
||||||
stream << std::endl;
|
|
||||||
|
|
||||||
stream << details_.size() << " nodes observed" << std::endl;
|
|
||||||
return stream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& InitField(std::ostream& stream, int width) {
|
|
||||||
stream << "\t" << std::right << std::setw(width) << std::fixed
|
|
||||||
<< std::setprecision(3);
|
|
||||||
return stream;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string StatSummarizer::HeaderString(const string& title) const {
|
|
||||||
std::stringstream stream;
|
|
||||||
|
|
||||||
stream << "============================== " << title
|
|
||||||
<< " ==============================" << std::endl;
|
|
||||||
|
|
||||||
InitField(stream, 24) << "[node type]";
|
|
||||||
InitField(stream, 9) << "[start]";
|
|
||||||
InitField(stream, 9) << "[first]";
|
|
||||||
InitField(stream, 9) << "[avg ms]";
|
|
||||||
InitField(stream, 8) << "[%]";
|
|
||||||
InitField(stream, 8) << "[cdf%]";
|
|
||||||
InitField(stream, 10) << "[mem KB]";
|
|
||||||
InitField(stream, 9) << "[times called]";
|
|
||||||
stream << "\t"
|
|
||||||
<< "[Name]";
|
|
||||||
return stream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string StatSummarizer::ColumnString(const Detail& detail,
|
|
||||||
const int64 cumulative_stat_on_node,
|
|
||||||
const Stat<int64>& stat) const {
|
|
||||||
const double start_ms = detail.start_us.avg() / 1000.0;
|
|
||||||
const double first_time_ms = detail.rel_end_us.first() / 1000.0;
|
|
||||||
const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
|
|
||||||
const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
|
|
||||||
const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
|
|
||||||
const int64 times_called = detail.times_called / num_runs();
|
|
||||||
|
|
||||||
std::stringstream stream;
|
|
||||||
InitField(stream, 24) << detail.type;
|
|
||||||
InitField(stream, 9) << start_ms;
|
|
||||||
InitField(stream, 9) << first_time_ms;
|
|
||||||
InitField(stream, 9) << avg_time_ms;
|
|
||||||
InitField(stream, 7) << percentage << "%";
|
|
||||||
InitField(stream, 7) << cdf_percentage << "%";
|
|
||||||
InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
|
|
||||||
InitField(stream, 9) << times_called;
|
|
||||||
stream << "\t" << detail.name;
|
|
||||||
|
|
||||||
return stream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
void StatSummarizer::OrderNodesByMetric(
|
|
||||||
SortingMetric metric, std::vector<const Detail*>* details) const {
|
|
||||||
std::priority_queue<std::pair<string, const Detail*>> sorted_list;
|
|
||||||
const int num_nodes = details_.size();
|
|
||||||
|
|
||||||
for (const auto& det : details_) {
|
|
||||||
const Detail* detail = &(det.second);
|
|
||||||
std::stringstream stream;
|
|
||||||
stream << std::setw(20) << std::right << std::setprecision(10)
|
|
||||||
<< std::fixed;
|
|
||||||
|
|
||||||
switch (metric) {
|
|
||||||
case BY_NAME:
|
|
||||||
stream << detail->name;
|
|
||||||
break;
|
|
||||||
case BY_RUN_ORDER:
|
|
||||||
stream << num_nodes - detail->run_order;
|
|
||||||
break;
|
|
||||||
case BY_TIME:
|
|
||||||
stream << detail->rel_end_us.avg();
|
|
||||||
break;
|
|
||||||
case BY_MEMORY:
|
|
||||||
stream << detail->mem_used.avg();
|
|
||||||
break;
|
|
||||||
case BY_TYPE:
|
|
||||||
stream << detail->type;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
stream << "";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
sorted_list.emplace(stream.str(), detail);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!sorted_list.empty()) {
|
|
||||||
auto entry = sorted_list.top();
|
|
||||||
sorted_list.pop();
|
|
||||||
details->push_back(entry.second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void StatSummarizer::ComputeStatsByType(
|
|
||||||
std::map<string, int64>* node_type_map_count,
|
|
||||||
std::map<string, int64>* node_type_map_time,
|
|
||||||
std::map<string, int64>* node_type_map_memory,
|
|
||||||
std::map<string, int64>* node_type_map_times_called,
|
|
||||||
int64* accumulated_us) const {
|
|
||||||
int64 run_count = run_total_us_.count();
|
|
||||||
|
|
||||||
for (const auto& det : details_) {
|
|
||||||
const string node_name = det.first;
|
|
||||||
const Detail& detail = det.second;
|
|
||||||
|
|
||||||
int64 curr_time_val =
|
|
||||||
static_cast<int64>(detail.rel_end_us.sum() / run_count);
|
|
||||||
*accumulated_us += curr_time_val;
|
|
||||||
|
|
||||||
int64 curr_memory_val = detail.mem_used.newest();
|
|
||||||
|
|
||||||
const string& node_type = detail.type;
|
|
||||||
|
|
||||||
(*node_type_map_count)[node_type] += 1;
|
|
||||||
(*node_type_map_time)[node_type] += curr_time_val;
|
|
||||||
(*node_type_map_memory)[node_type] += curr_memory_val;
|
|
||||||
(*node_type_map_times_called)[node_type] += detail.times_called / run_count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string StatSummarizer::GetStatsByNodeType() const {
|
|
||||||
std::stringstream stream;
|
|
||||||
|
|
||||||
stream << "============================== Summary by node type "
|
|
||||||
"=============================="
|
|
||||||
<< std::endl;
|
|
||||||
|
|
||||||
LOG(INFO) << "Number of nodes executed: " << details_.size();
|
|
||||||
|
|
||||||
std::map<string, int64> node_type_map_count;
|
|
||||||
std::map<string, int64> node_type_map_time;
|
|
||||||
std::map<string, int64> node_type_map_memory;
|
|
||||||
std::map<string, int64> node_type_map_times_called;
|
|
||||||
int64 accumulated_us = 0;
|
|
||||||
|
|
||||||
ComputeStatsByType(&node_type_map_count, &node_type_map_time,
|
|
||||||
&node_type_map_memory, &node_type_map_times_called,
|
|
||||||
&accumulated_us);
|
|
||||||
|
|
||||||
// Sort them.
|
|
||||||
std::priority_queue<std::pair<int64, std::pair<string, int64>>> timings;
|
|
||||||
for (const auto& node_type : node_type_map_time) {
|
|
||||||
const int64 mem_used = node_type_map_memory[node_type.first];
|
|
||||||
timings.emplace(node_type.second,
|
|
||||||
std::pair<string, int64>(node_type.first, mem_used));
|
|
||||||
}
|
|
||||||
|
|
||||||
InitField(stream, 24) << "[Node type]";
|
|
||||||
InitField(stream, 9) << "[count]";
|
|
||||||
InitField(stream, 10) << "[avg ms]";
|
|
||||||
InitField(stream, 11) << "[avg %]";
|
|
||||||
InitField(stream, 11) << "[cdf %]";
|
|
||||||
InitField(stream, 10) << "[mem KB]";
|
|
||||||
InitField(stream, 10) << "[times called]";
|
|
||||||
stream << std::endl;
|
|
||||||
|
|
||||||
float cdf = 0.0f;
|
|
||||||
while (!timings.empty()) {
|
|
||||||
auto entry = timings.top();
|
|
||||||
timings.pop();
|
|
||||||
|
|
||||||
const string node_type = entry.second.first;
|
|
||||||
const float memory = entry.second.second / 1000.0f;
|
|
||||||
|
|
||||||
const int64 node_type_total_us = entry.first;
|
|
||||||
const float time_per_run_ms = node_type_total_us / 1000.0f;
|
|
||||||
|
|
||||||
const float percentage =
|
|
||||||
((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
|
|
||||||
cdf += percentage;
|
|
||||||
|
|
||||||
InitField(stream, 24) << node_type;
|
|
||||||
InitField(stream, 9) << node_type_map_count[node_type];
|
|
||||||
InitField(stream, 10) << time_per_run_ms;
|
|
||||||
InitField(stream, 10) << percentage << "%";
|
|
||||||
InitField(stream, 10) << cdf << "%";
|
|
||||||
InitField(stream, 10) << memory;
|
|
||||||
InitField(stream, 9) << node_type_map_times_called[node_type];
|
|
||||||
stream << std::endl;
|
|
||||||
}
|
|
||||||
stream << std::endl;
|
|
||||||
return stream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string StatSummarizer::GetStatsByMetric(const string& title,
|
|
||||||
SortingMetric sorting_metric,
|
|
||||||
int num_stats) const {
|
|
||||||
std::vector<const Detail*> details;
|
|
||||||
OrderNodesByMetric(sorting_metric, &details);
|
|
||||||
|
|
||||||
double cumulative_stat_on_node = 0;
|
|
||||||
|
|
||||||
std::stringstream stream;
|
|
||||||
stream << HeaderString(title) << std::endl;
|
|
||||||
int stat_num = 0;
|
|
||||||
for (auto detail : details) {
|
|
||||||
++stat_num;
|
|
||||||
if (num_stats > 0 && stat_num > num_stats) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(andrewharp): Make this keep track of the particular metric for cdf.
|
|
||||||
cumulative_stat_on_node += detail->rel_end_us.sum();
|
|
||||||
stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
|
|
||||||
<< std::endl;
|
|
||||||
}
|
|
||||||
stream << std::endl;
|
|
||||||
return stream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string StatSummarizer::GetOutputString() const {
|
|
||||||
std::stringstream stream;
|
|
||||||
if (options_.show_run_order) {
|
|
||||||
stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
|
|
||||||
options_.run_order_limit);
|
|
||||||
}
|
|
||||||
if (options_.show_time) {
|
|
||||||
stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
|
|
||||||
options_.time_limit);
|
|
||||||
}
|
|
||||||
if (options_.show_memory) {
|
|
||||||
stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
|
|
||||||
options_.memory_limit);
|
|
||||||
}
|
|
||||||
if (options_.show_type) {
|
|
||||||
stream << GetStatsByNodeType();
|
|
||||||
}
|
|
||||||
if (options_.show_summary) {
|
|
||||||
stream << ShortSummary() << std::endl;
|
|
||||||
}
|
|
||||||
return stream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
void StatSummarizer::PrintStepStats() const {
|
|
||||||
string output = GetOutputString();
|
|
||||||
std::istringstream iss(output);
|
|
||||||
for (std::string line; std::getline(iss, line);) {
|
|
||||||
LOG(INFO) << line;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void StatSummarizer::PrintOutputs() const {
|
void StatSummarizer::PrintOutputs() const {
|
||||||
std::priority_queue<
|
std::priority_queue<
|
||||||
std::pair<int64, const std::pair<const std::string, Detail>*>>
|
std::pair<int64, const std::pair<const std::string, Detail>*>>
|
||||||
timings;
|
timings;
|
||||||
for (const auto& entry : details_) {
|
for (const auto& entry : stats_calculator_->GetDetails()) {
|
||||||
timings.emplace(-entry.second.start_us.avg(), &entry);
|
timings.emplace(-entry.second.start_us.avg(), &entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -481,10 +229,10 @@ void StatSummarizer::PrintOutputs() const {
|
|||||||
while (!timings.empty()) {
|
while (!timings.empty()) {
|
||||||
auto entry = timings.top();
|
auto entry = timings.top();
|
||||||
timings.pop();
|
timings.pop();
|
||||||
const Detail& detail = entry.second->second;
|
|
||||||
std::stringstream stream;
|
std::stringstream stream;
|
||||||
stream << entry.second->first << "\t" << detail.outputs.size();
|
const auto detail_outputs = outputs_.at(entry.second->first);
|
||||||
for (const auto& tensor : detail.outputs) {
|
stream << entry.second->first << "\t" << detail_outputs.size();
|
||||||
|
for (const auto& tensor : detail_outputs) {
|
||||||
stream << "\t" << DataTypeString(tensor.dtype());
|
stream << "\t" << DataTypeString(tensor.dtype());
|
||||||
stream << "\t" << tensor.shape().dim_size();
|
stream << "\t" << tensor.shape().dim_size();
|
||||||
for (const auto& d : tensor.shape().dim()) {
|
for (const auto& d : tensor.shape().dim()) {
|
||||||
|
@ -13,20 +13,23 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
|
#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
|
||||||
#define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
|
#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "tensorflow/core/framework/tensor.h"
|
#include "tensorflow/core/framework/tensor.h"
|
||||||
#include "tensorflow/core/framework/types.pb.h"
|
#include "tensorflow/core/framework/types.pb.h"
|
||||||
#include "tensorflow/core/platform/types.h"
|
#include "tensorflow/core/platform/types.h"
|
||||||
|
#include "tensorflow/core/util/stat_summarizer_options.h"
|
||||||
|
#include "tensorflow/core/util/stats_calculator.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
@ -34,103 +37,6 @@ class GraphDef;
|
|||||||
class StepStats;
|
class StepStats;
|
||||||
class NodeExecStats;
|
class NodeExecStats;
|
||||||
|
|
||||||
template <typename ValueType, typename HighPrecisionValueType = double>
|
|
||||||
class Stat {
|
|
||||||
public:
|
|
||||||
void UpdateStat(ValueType v) {
|
|
||||||
if (count_ == 0) {
|
|
||||||
first_ = v;
|
|
||||||
}
|
|
||||||
|
|
||||||
newest_ = v;
|
|
||||||
max_ = std::max(v, max_);
|
|
||||||
min_ = std::min(v, min_);
|
|
||||||
++count_;
|
|
||||||
sum_ += v;
|
|
||||||
squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
|
|
||||||
|
|
||||||
bool empty() const { return count_ == 0; }
|
|
||||||
|
|
||||||
ValueType first() const { return first_; }
|
|
||||||
|
|
||||||
ValueType newest() const { return newest_; }
|
|
||||||
|
|
||||||
ValueType max() const { return max_; }
|
|
||||||
|
|
||||||
ValueType min() const { return min_; }
|
|
||||||
|
|
||||||
int64 count() const { return count_; }
|
|
||||||
|
|
||||||
ValueType sum() const { return sum_; }
|
|
||||||
|
|
||||||
HighPrecisionValueType squared_sum() const { return squared_sum_; }
|
|
||||||
|
|
||||||
bool all_same() const { return (count_ == 0 || min_ == max_); }
|
|
||||||
|
|
||||||
HighPrecisionValueType avg() const {
|
|
||||||
return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
|
|
||||||
: static_cast<HighPrecisionValueType>(sum_) / count_;
|
|
||||||
}
|
|
||||||
|
|
||||||
ValueType std_deviation() const {
|
|
||||||
return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
|
|
||||||
}
|
|
||||||
|
|
||||||
void OutputToStream(std::ostream* stream) const {
|
|
||||||
if (empty()) {
|
|
||||||
*stream << "count=0";
|
|
||||||
} else if (all_same()) {
|
|
||||||
*stream << "count=" << count_ << " curr=" << newest_;
|
|
||||||
if (count_ > 1) *stream << "(all same)";
|
|
||||||
} else {
|
|
||||||
*stream << "count=" << count_ << " first=" << first_
|
|
||||||
<< " curr=" << newest_ << " min=" << min_ << " max=" << max_
|
|
||||||
<< " avg=" << avg() << " std=" << std_deviation();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& stream,
|
|
||||||
const Stat<ValueType>& stat) {
|
|
||||||
stat.OutputToStream(&stream);
|
|
||||||
return stream;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
ValueType first_ = 0;
|
|
||||||
ValueType newest_ = 0;
|
|
||||||
ValueType max_ = std::numeric_limits<ValueType>::min();
|
|
||||||
ValueType min_ = std::numeric_limits<ValueType>::max();
|
|
||||||
int64 count_ = 0;
|
|
||||||
ValueType sum_ = 0;
|
|
||||||
HighPrecisionValueType squared_sum_ = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Used to control the output of the statistics summarizer;
|
|
||||||
class StatSummarizerOptions {
|
|
||||||
public:
|
|
||||||
StatSummarizerOptions()
|
|
||||||
: show_run_order(true),
|
|
||||||
run_order_limit(0),
|
|
||||||
show_time(true),
|
|
||||||
time_limit(10),
|
|
||||||
show_memory(true),
|
|
||||||
memory_limit(10),
|
|
||||||
show_type(true),
|
|
||||||
show_summary(true) {}
|
|
||||||
|
|
||||||
bool show_run_order;
|
|
||||||
int run_order_limit;
|
|
||||||
bool show_time;
|
|
||||||
int time_limit;
|
|
||||||
bool show_memory;
|
|
||||||
int memory_limit;
|
|
||||||
bool show_type;
|
|
||||||
bool show_summary;
|
|
||||||
};
|
|
||||||
|
|
||||||
// A StatSummarizer assists in performance analysis of Graph executions.
|
// A StatSummarizer assists in performance analysis of Graph executions.
|
||||||
//
|
//
|
||||||
// It summarizes time spent executing (on GPU/CPU), memory used etc. across
|
// It summarizes time spent executing (on GPU/CPU), memory used etc. across
|
||||||
@ -140,14 +46,6 @@ class StatSummarizerOptions {
|
|||||||
// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage.
|
// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage.
|
||||||
class StatSummarizer {
|
class StatSummarizer {
|
||||||
public:
|
public:
|
||||||
enum SortingMetric {
|
|
||||||
BY_NAME,
|
|
||||||
BY_RUN_ORDER,
|
|
||||||
BY_TIME,
|
|
||||||
BY_MEMORY,
|
|
||||||
BY_TYPE,
|
|
||||||
};
|
|
||||||
|
|
||||||
explicit StatSummarizer(const StatSummarizerOptions& options);
|
explicit StatSummarizer(const StatSummarizerOptions& options);
|
||||||
|
|
||||||
// Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The
|
// Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The
|
||||||
@ -161,65 +59,51 @@ class StatSummarizer {
|
|||||||
|
|
||||||
// Returns a string detailing the accumulated runtime stats in a tab-separated
|
// Returns a string detailing the accumulated runtime stats in a tab-separated
|
||||||
// format which can be pasted into a spreadsheet for further analysis.
|
// format which can be pasted into a spreadsheet for further analysis.
|
||||||
std::string GetOutputString() const;
|
std::string GetOutputString() const {
|
||||||
|
return stats_calculator_->GetOutputString();
|
||||||
|
}
|
||||||
|
|
||||||
std::string ShortSummary() const;
|
std::string ShortSummary() const {
|
||||||
|
return stats_calculator_->GetShortSummary();
|
||||||
|
}
|
||||||
|
|
||||||
// Prints the string returned by GetOutputString().
|
// Prints the string returned by GetOutputString().
|
||||||
void PrintStepStats() const;
|
void PrintStepStats() const { stats_calculator_->PrintStepStats(); }
|
||||||
|
|
||||||
// Prints the output tensor sizes and types for each node.
|
// Prints the output tensor sizes and types for each node.
|
||||||
void PrintOutputs() const;
|
void PrintOutputs() const;
|
||||||
|
|
||||||
void ComputeStatsByType(std::map<string, int64>* node_type_map_count,
|
void ComputeStatsByType(
|
||||||
std::map<string, int64>* node_type_map_time,
|
std::map<std::string, int64_t>* node_type_map_count,
|
||||||
std::map<string, int64>* node_type_map_memory,
|
std::map<std::string, int64_t>* node_type_map_time,
|
||||||
std::map<string, int64>* node_type_map_times_called,
|
std::map<std::string, int64_t>* node_type_map_memory,
|
||||||
int64* accumulated_us) const;
|
std::map<std::string, int64_t>* node_type_map_times_called,
|
||||||
|
int64_t* accumulated_us) const {
|
||||||
|
stats_calculator_->ComputeStatsByType(
|
||||||
|
node_type_map_count, node_type_map_time, node_type_map_memory,
|
||||||
|
node_type_map_times_called, accumulated_us);
|
||||||
|
}
|
||||||
|
|
||||||
std::string GetStatsByNodeType() const;
|
std::string GetStatsByNodeType() const {
|
||||||
|
return stats_calculator_->GetStatsByNodeType();
|
||||||
|
}
|
||||||
|
|
||||||
std::string GetStatsByMetric(const string& title,
|
std::string GetStatsByMetric(const string& title,
|
||||||
SortingMetric sorting_metric,
|
StatsCalculator::SortingMetric sorting_metric,
|
||||||
int num_stats) const;
|
int num_stats) const {
|
||||||
|
return stats_calculator_->GetStatsByMetric(title, sorting_metric,
|
||||||
void Reset();
|
num_stats);
|
||||||
|
}
|
||||||
// Returns number of runs.
|
|
||||||
int num_runs() const { return static_cast<int>(run_total_us_.count()); }
|
|
||||||
|
|
||||||
// Returns stats of total microseconds spent by all nodes in each run.
|
|
||||||
const Stat<int64>& run_total_us() const { return run_total_us_; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Detail {
|
void Validate(const std::vector<TensorDescription>* outputs,
|
||||||
string name;
|
const NodeExecStats& ns) const;
|
||||||
string type;
|
|
||||||
int64 run_order;
|
|
||||||
Stat<int64> start_us;
|
|
||||||
Stat<int64> rel_end_us;
|
|
||||||
Stat<int64> mem_used;
|
|
||||||
std::vector<TensorDescription> outputs;
|
|
||||||
int64 times_called;
|
|
||||||
};
|
|
||||||
|
|
||||||
void Validate(const Detail* detail, const NodeExecStats& ns) const;
|
std::map<std::string, std::vector<TensorDescription> > outputs_;
|
||||||
|
|
||||||
void OrderNodesByMetric(SortingMetric sorting_metric,
|
std::unique_ptr<StatsCalculator> stats_calculator_;
|
||||||
std::vector<const Detail*>* details) const;
|
|
||||||
|
|
||||||
std::string HeaderString(const string& title) const;
|
|
||||||
std::string ColumnString(const Detail& detail,
|
|
||||||
const int64 cumulative_stat_on_node,
|
|
||||||
const Stat<int64>& stat) const;
|
|
||||||
|
|
||||||
Stat<int64> run_total_us_;
|
|
||||||
Stat<int64> memory_;
|
|
||||||
|
|
||||||
std::map<std::string, Detail> details_;
|
|
||||||
StatSummarizerOptions options_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
|
#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
|
||||||
|
43
tensorflow/core/util/stat_summarizer_options.h
Normal file
43
tensorflow/core/util/stat_summarizer_options.h
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
|
||||||
|
#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
|
||||||
|
namespace tensorflow {
|
||||||
|
// Used to control the output of the statistics summarizer;
|
||||||
|
class StatSummarizerOptions {
|
||||||
|
public:
|
||||||
|
StatSummarizerOptions()
|
||||||
|
: show_run_order(true),
|
||||||
|
run_order_limit(0),
|
||||||
|
show_time(true),
|
||||||
|
time_limit(10),
|
||||||
|
show_memory(true),
|
||||||
|
memory_limit(10),
|
||||||
|
show_type(true),
|
||||||
|
show_summary(true) {}
|
||||||
|
|
||||||
|
bool show_run_order;
|
||||||
|
int run_order_limit;
|
||||||
|
bool show_time;
|
||||||
|
int time_limit;
|
||||||
|
bool show_memory;
|
||||||
|
int memory_limit;
|
||||||
|
bool show_type;
|
||||||
|
bool show_summary;
|
||||||
|
};
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
|
289
tensorflow/core/util/stats_calculator.cc
Normal file
289
tensorflow/core/util/stats_calculator.cc
Normal file
@ -0,0 +1,289 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/core/util/stats_calculator.h"
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
#include <map>
|
||||||
|
#include <queue>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "tensorflow/core/platform/logging.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
|
||||||
|
StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
|
||||||
|
: options_(options) {}
|
||||||
|
|
||||||
|
std::string StatsCalculator::GetShortSummary() const {
|
||||||
|
std::stringstream stream;
|
||||||
|
stream << "Timings (microseconds): ";
|
||||||
|
run_total_us_.OutputToStream(&stream);
|
||||||
|
stream << std::endl;
|
||||||
|
|
||||||
|
stream << "Memory (bytes): ";
|
||||||
|
memory_.OutputToStream(&stream);
|
||||||
|
stream << std::endl;
|
||||||
|
|
||||||
|
stream << details_.size() << " nodes observed" << std::endl;
|
||||||
|
return stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& InitField(std::ostream& stream, int width) {
|
||||||
|
stream << "\t" << std::right << std::setw(width) << std::fixed
|
||||||
|
<< std::setprecision(3);
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StatsCalculator::HeaderString(const std::string& title) const {
|
||||||
|
std::stringstream stream;
|
||||||
|
|
||||||
|
stream << "============================== " << title
|
||||||
|
<< " ==============================" << std::endl;
|
||||||
|
|
||||||
|
InitField(stream, 24) << "[node type]";
|
||||||
|
InitField(stream, 9) << "[start]";
|
||||||
|
InitField(stream, 9) << "[first]";
|
||||||
|
InitField(stream, 9) << "[avg ms]";
|
||||||
|
InitField(stream, 8) << "[%]";
|
||||||
|
InitField(stream, 8) << "[cdf%]";
|
||||||
|
InitField(stream, 10) << "[mem KB]";
|
||||||
|
InitField(stream, 9) << "[times called]";
|
||||||
|
stream << "\t"
|
||||||
|
<< "[Name]";
|
||||||
|
return stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StatsCalculator::ColumnString(const Detail& detail,
|
||||||
|
const int64_t cumulative_stat_on_node,
|
||||||
|
const Stat<int64_t>& stat) const {
|
||||||
|
const double start_ms = detail.start_us.avg() / 1000.0;
|
||||||
|
const double first_time_ms = detail.rel_end_us.first() / 1000.0;
|
||||||
|
const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
|
||||||
|
const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
|
||||||
|
const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
|
||||||
|
const int64_t times_called = detail.times_called / num_runs();
|
||||||
|
|
||||||
|
std::stringstream stream;
|
||||||
|
InitField(stream, 24) << detail.type;
|
||||||
|
InitField(stream, 9) << start_ms;
|
||||||
|
InitField(stream, 9) << first_time_ms;
|
||||||
|
InitField(stream, 9) << avg_time_ms;
|
||||||
|
InitField(stream, 7) << percentage << "%";
|
||||||
|
InitField(stream, 7) << cdf_percentage << "%";
|
||||||
|
InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
|
||||||
|
InitField(stream, 9) << times_called;
|
||||||
|
stream << "\t" << detail.name;
|
||||||
|
|
||||||
|
return stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatsCalculator::OrderNodesByMetric(
|
||||||
|
SortingMetric metric, std::vector<const Detail*>* details) const {
|
||||||
|
std::priority_queue<std::pair<string, const Detail*>> sorted_list;
|
||||||
|
const int num_nodes = details_.size();
|
||||||
|
|
||||||
|
for (const auto& det : details_) {
|
||||||
|
const Detail* detail = &(det.second);
|
||||||
|
std::stringstream stream;
|
||||||
|
stream << std::setw(20) << std::right << std::setprecision(10)
|
||||||
|
<< std::fixed;
|
||||||
|
|
||||||
|
switch (metric) {
|
||||||
|
case BY_NAME:
|
||||||
|
stream << detail->name;
|
||||||
|
break;
|
||||||
|
case BY_RUN_ORDER:
|
||||||
|
stream << num_nodes - detail->run_order;
|
||||||
|
break;
|
||||||
|
case BY_TIME:
|
||||||
|
stream << detail->rel_end_us.avg();
|
||||||
|
break;
|
||||||
|
case BY_MEMORY:
|
||||||
|
stream << detail->mem_used.avg();
|
||||||
|
break;
|
||||||
|
case BY_TYPE:
|
||||||
|
stream << detail->type;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
stream << "";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted_list.emplace(stream.str(), detail);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!sorted_list.empty()) {
|
||||||
|
auto entry = sorted_list.top();
|
||||||
|
sorted_list.pop();
|
||||||
|
details->push_back(entry.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatsCalculator::ComputeStatsByType(
|
||||||
|
std::map<std::string, int64_t>* node_type_map_count,
|
||||||
|
std::map<std::string, int64_t>* node_type_map_time,
|
||||||
|
std::map<std::string, int64_t>* node_type_map_memory,
|
||||||
|
std::map<std::string, int64_t>* node_type_map_times_called,
|
||||||
|
int64_t* accumulated_us) const {
|
||||||
|
int64_t run_count = run_total_us_.count();
|
||||||
|
|
||||||
|
for (const auto& det : details_) {
|
||||||
|
const string node_name = det.first;
|
||||||
|
const Detail& detail = det.second;
|
||||||
|
|
||||||
|
int64_t curr_time_val =
|
||||||
|
static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
|
||||||
|
*accumulated_us += curr_time_val;
|
||||||
|
|
||||||
|
int64_t curr_memory_val = detail.mem_used.newest();
|
||||||
|
|
||||||
|
const string& node_type = detail.type;
|
||||||
|
|
||||||
|
(*node_type_map_count)[node_type] += 1;
|
||||||
|
(*node_type_map_time)[node_type] += curr_time_val;
|
||||||
|
(*node_type_map_memory)[node_type] += curr_memory_val;
|
||||||
|
(*node_type_map_times_called)[node_type] += detail.times_called / run_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StatsCalculator::GetStatsByNodeType() const {
|
||||||
|
std::stringstream stream;
|
||||||
|
|
||||||
|
stream << "============================== Summary by node type "
|
||||||
|
"=============================="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
LOG(INFO) << "Number of nodes executed: " << details_.size();
|
||||||
|
|
||||||
|
std::map<std::string, int64_t> node_type_map_count;
|
||||||
|
std::map<std::string, int64_t> node_type_map_time;
|
||||||
|
std::map<std::string, int64_t> node_type_map_memory;
|
||||||
|
std::map<std::string, int64_t> node_type_map_times_called;
|
||||||
|
int64_t accumulated_us = 0;
|
||||||
|
|
||||||
|
ComputeStatsByType(&node_type_map_count, &node_type_map_time,
|
||||||
|
&node_type_map_memory, &node_type_map_times_called,
|
||||||
|
&accumulated_us);
|
||||||
|
|
||||||
|
// Sort them.
|
||||||
|
std::priority_queue<std::pair<int64_t, std::pair<string, int64_t>>> timings;
|
||||||
|
for (const auto& node_type : node_type_map_time) {
|
||||||
|
const int64_t mem_used = node_type_map_memory[node_type.first];
|
||||||
|
timings.emplace(node_type.second,
|
||||||
|
std::pair<string, int64_t>(node_type.first, mem_used));
|
||||||
|
}
|
||||||
|
|
||||||
|
InitField(stream, 24) << "[Node type]";
|
||||||
|
InitField(stream, 9) << "[count]";
|
||||||
|
InitField(stream, 10) << "[avg ms]";
|
||||||
|
InitField(stream, 11) << "[avg %]";
|
||||||
|
InitField(stream, 11) << "[cdf %]";
|
||||||
|
InitField(stream, 10) << "[mem KB]";
|
||||||
|
InitField(stream, 10) << "[times called]";
|
||||||
|
stream << std::endl;
|
||||||
|
|
||||||
|
float cdf = 0.0f;
|
||||||
|
while (!timings.empty()) {
|
||||||
|
auto entry = timings.top();
|
||||||
|
timings.pop();
|
||||||
|
|
||||||
|
const string node_type = entry.second.first;
|
||||||
|
const float memory = entry.second.second / 1000.0f;
|
||||||
|
|
||||||
|
const int64_t node_type_total_us = entry.first;
|
||||||
|
const float time_per_run_ms = node_type_total_us / 1000.0f;
|
||||||
|
|
||||||
|
const float percentage =
|
||||||
|
((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
|
||||||
|
cdf += percentage;
|
||||||
|
|
||||||
|
InitField(stream, 24) << node_type;
|
||||||
|
InitField(stream, 9) << node_type_map_count[node_type];
|
||||||
|
InitField(stream, 10) << time_per_run_ms;
|
||||||
|
InitField(stream, 10) << percentage << "%";
|
||||||
|
InitField(stream, 10) << cdf << "%";
|
||||||
|
InitField(stream, 10) << memory;
|
||||||
|
InitField(stream, 9) << node_type_map_times_called[node_type];
|
||||||
|
stream << std::endl;
|
||||||
|
}
|
||||||
|
stream << std::endl;
|
||||||
|
return stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StatsCalculator::GetStatsByMetric(const std::string& title,
|
||||||
|
SortingMetric sorting_metric,
|
||||||
|
int num_stats) const {
|
||||||
|
std::vector<const Detail*> details;
|
||||||
|
OrderNodesByMetric(sorting_metric, &details);
|
||||||
|
|
||||||
|
double cumulative_stat_on_node = 0;
|
||||||
|
|
||||||
|
std::stringstream stream;
|
||||||
|
stream << HeaderString(title) << std::endl;
|
||||||
|
int stat_num = 0;
|
||||||
|
for (auto detail : details) {
|
||||||
|
++stat_num;
|
||||||
|
if (num_stats > 0 && stat_num > num_stats) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(andrewharp): Make this keep track of the particular metric for cdf.
|
||||||
|
cumulative_stat_on_node += detail->rel_end_us.sum();
|
||||||
|
stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
stream << std::endl;
|
||||||
|
return stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StatsCalculator::GetOutputString() const {
|
||||||
|
std::stringstream stream;
|
||||||
|
if (options_.show_run_order) {
|
||||||
|
stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
|
||||||
|
options_.run_order_limit);
|
||||||
|
}
|
||||||
|
if (options_.show_time) {
|
||||||
|
stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
|
||||||
|
options_.time_limit);
|
||||||
|
}
|
||||||
|
if (options_.show_memory) {
|
||||||
|
stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
|
||||||
|
options_.memory_limit);
|
||||||
|
}
|
||||||
|
if (options_.show_type) {
|
||||||
|
stream << GetStatsByNodeType();
|
||||||
|
}
|
||||||
|
if (options_.show_summary) {
|
||||||
|
stream << GetShortSummary() << std::endl;
|
||||||
|
}
|
||||||
|
return stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatsCalculator::PrintStepStats() const {
|
||||||
|
string output = GetOutputString();
|
||||||
|
std::istringstream iss(output);
|
||||||
|
for (std::string line; std::getline(iss, line);) {
|
||||||
|
LOG(INFO) << line;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatsCalculator::UpdateDetails(
|
||||||
|
const std::map<std::string, Detail>& details) {
|
||||||
|
details_.insert(details.begin(), details.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tensorflow
|
189
tensorflow/core/util/stats_calculator.h
Normal file
189
tensorflow/core/util/stats_calculator.h
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
|
||||||
|
#define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <limits>
|
||||||
|
#include <map>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "tensorflow/core/util/stat_summarizer_options.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
|
||||||
|
template <typename ValueType, typename HighPrecisionValueType = double>
|
||||||
|
class Stat {
|
||||||
|
public:
|
||||||
|
void UpdateStat(ValueType v) {
|
||||||
|
if (count_ == 0) {
|
||||||
|
first_ = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
newest_ = v;
|
||||||
|
max_ = std::max(v, max_);
|
||||||
|
min_ = std::min(v, min_);
|
||||||
|
++count_;
|
||||||
|
sum_ += v;
|
||||||
|
squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
|
||||||
|
|
||||||
|
bool empty() const { return count_ == 0; }
|
||||||
|
|
||||||
|
ValueType first() const { return first_; }
|
||||||
|
|
||||||
|
ValueType newest() const { return newest_; }
|
||||||
|
|
||||||
|
ValueType max() const { return max_; }
|
||||||
|
|
||||||
|
ValueType min() const { return min_; }
|
||||||
|
|
||||||
|
int64_t count() const { return count_; }
|
||||||
|
|
||||||
|
ValueType sum() const { return sum_; }
|
||||||
|
|
||||||
|
HighPrecisionValueType squared_sum() const { return squared_sum_; }
|
||||||
|
|
||||||
|
bool all_same() const { return (count_ == 0 || min_ == max_); }
|
||||||
|
|
||||||
|
HighPrecisionValueType avg() const {
|
||||||
|
return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
|
||||||
|
: static_cast<HighPrecisionValueType>(sum_) / count_;
|
||||||
|
}
|
||||||
|
|
||||||
|
ValueType std_deviation() const {
|
||||||
|
return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
|
||||||
|
}
|
||||||
|
|
||||||
|
void OutputToStream(std::ostream* stream) const {
|
||||||
|
if (empty()) {
|
||||||
|
*stream << "count=0";
|
||||||
|
} else if (all_same()) {
|
||||||
|
*stream << "count=" << count_ << " curr=" << newest_;
|
||||||
|
if (count_ > 1) *stream << "(all same)";
|
||||||
|
} else {
|
||||||
|
*stream << "count=" << count_ << " first=" << first_
|
||||||
|
<< " curr=" << newest_ << " min=" << min_ << " max=" << max_
|
||||||
|
<< " avg=" << avg() << " std=" << std_deviation();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
friend std::ostream& operator<<(std::ostream& stream,
|
||||||
|
const Stat<ValueType>& stat) {
|
||||||
|
stat.OutputToStream(&stream);
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ValueType first_ = 0;
|
||||||
|
ValueType newest_ = 0;
|
||||||
|
ValueType max_ = std::numeric_limits<ValueType>::min();
|
||||||
|
ValueType min_ = std::numeric_limits<ValueType>::max();
|
||||||
|
int64_t count_ = 0;
|
||||||
|
ValueType sum_ = 0;
|
||||||
|
HighPrecisionValueType squared_sum_ = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// A StatsCalculator assists in performance analysis of Graph executions.
|
||||||
|
//
|
||||||
|
// It summarizes time spent executing (on GPU/CPU), memory used etc for
|
||||||
|
// graph execution.
|
||||||
|
//
|
||||||
|
// For example usage see StatsSummarizer.
|
||||||
|
class StatsCalculator {
|
||||||
|
public:
|
||||||
|
enum SortingMetric {
|
||||||
|
BY_NAME,
|
||||||
|
BY_RUN_ORDER,
|
||||||
|
BY_TIME,
|
||||||
|
BY_MEMORY,
|
||||||
|
BY_TYPE,
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit StatsCalculator(const StatSummarizerOptions& options);
|
||||||
|
|
||||||
|
// Returns a string detailing the accumulated runtime stats in a tab-separated
|
||||||
|
// format which can be pasted into a spreadsheet for further analysis.
|
||||||
|
std::string GetOutputString() const;
|
||||||
|
|
||||||
|
std::string GetShortSummary() const;
|
||||||
|
|
||||||
|
// Prints the string returned by GetOutputString().
|
||||||
|
void PrintStepStats() const;
|
||||||
|
|
||||||
|
void ComputeStatsByType(
|
||||||
|
std::map<std::string, int64_t>* node_type_map_count,
|
||||||
|
std::map<std::string, int64_t>* node_type_map_time,
|
||||||
|
std::map<std::string, int64_t>* node_type_map_memory,
|
||||||
|
std::map<std::string, int64_t>* node_type_map_times_called,
|
||||||
|
int64_t* accumulated_us) const;
|
||||||
|
|
||||||
|
std::string GetStatsByNodeType() const;
|
||||||
|
|
||||||
|
std::string GetStatsByMetric(const std::string& title,
|
||||||
|
SortingMetric sorting_metric,
|
||||||
|
int num_stats) const;
|
||||||
|
|
||||||
|
// Returns number of runs.
|
||||||
|
int num_runs() const { return static_cast<int>(run_total_us_.count()); }
|
||||||
|
|
||||||
|
// Returns stats of total microseconds spent by all nodes in each run.
|
||||||
|
const Stat<int64_t>& run_total_us() const { return run_total_us_; }
|
||||||
|
|
||||||
|
void UpdateRunTotalUs(int64_t run_total_us) {
|
||||||
|
run_total_us_.UpdateStat(run_total_us);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); }
|
||||||
|
|
||||||
|
struct Detail {
|
||||||
|
std::string name;
|
||||||
|
std::string type;
|
||||||
|
int64_t run_order;
|
||||||
|
Stat<int64_t> start_us;
|
||||||
|
Stat<int64_t> rel_end_us;
|
||||||
|
Stat<int64_t> mem_used;
|
||||||
|
int64_t times_called;
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::map<std::string, Detail>& GetDetails() const { return details_; }
|
||||||
|
void UpdateDetails(const std::map<std::string, Detail>& details);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void OrderNodesByMetric(SortingMetric sorting_metric,
|
||||||
|
std::vector<const Detail*>* details) const;
|
||||||
|
|
||||||
|
std::string HeaderString(const std::string& title) const;
|
||||||
|
std::string ColumnString(const Detail& detail,
|
||||||
|
const int64_t cumulative_stat_on_node,
|
||||||
|
const Stat<int64_t>& stat) const;
|
||||||
|
|
||||||
|
Stat<int64_t> run_total_us_;
|
||||||
|
Stat<int64_t> memory_;
|
||||||
|
|
||||||
|
std::map<std::string, Detail> details_;
|
||||||
|
StatSummarizerOptions options_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
|
@ -73,7 +73,7 @@ void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
|
|||||||
return ss;
|
return ss;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
%include "tensorflow/core/util/stat_summarizer_options.h"
|
||||||
%include "tensorflow/core/util/stat_summarizer.h"
|
%include "tensorflow/core/util/stat_summarizer.h"
|
||||||
%unignoreall
|
%unignoreall
|
||||||
|
|
||||||
|
@ -667,12 +667,12 @@ int Main(int argc, char** argv) {
|
|||||||
output_prefix, benchmark_name, "meta-init-plus-first-inference", 1,
|
output_prefix, benchmark_name, "meta-init-plus-first-inference", 1,
|
||||||
initialization_time_s + (warmup_time_us / 1000000.0) / warmup_runs);
|
initialization_time_s + (warmup_time_us / 1000000.0) / warmup_runs);
|
||||||
|
|
||||||
std::map<string, int64> node_type_map_count;
|
std::map<std::string, int64_t> node_type_map_count;
|
||||||
std::map<string, int64> node_type_map_time;
|
std::map<std::string, int64_t> node_type_map_time;
|
||||||
std::map<string, int64> node_type_map_memory;
|
std::map<std::string, int64_t> node_type_map_memory;
|
||||||
std::map<string, int64> node_type_map_times_called;
|
std::map<std::string, int64_t> node_type_map_times_called;
|
||||||
|
|
||||||
int64 accumulated_us;
|
int64_t accumulated_us;
|
||||||
stats->ComputeStatsByType(&node_type_map_count, &node_type_map_time,
|
stats->ComputeStatsByType(&node_type_map_count, &node_type_map_time,
|
||||||
&node_type_map_memory,
|
&node_type_map_memory,
|
||||||
&node_type_map_times_called, &accumulated_us);
|
&node_type_map_times_called, &accumulated_us);
|
||||||
|
Loading…
Reference in New Issue
Block a user