Add init op and target node support to benchmark

PiperOrigin-RevId: 188335233
This commit is contained in:
A. Unique TensorFlower 2018-03-08 08:23:31 -08:00 committed by TensorFlower Gardener
parent 6a619489c6
commit 16a6666c1c
3 changed files with 93 additions and 56 deletions

View File

@ -48,33 +48,14 @@ limitations under the License.
namespace tensorflow {
namespace benchmark_model {
Status InitializeSession(int num_threads, const string& graph,
std::unique_ptr<Session>* session,
std::unique_ptr<GraphDef>* graph_def) {
LOG(INFO) << "Loading TensorFlow.";
namespace {
tensorflow::SessionOptions options;
tensorflow::ConfigProto& config = options.config;
if (num_threads > 0) {
config.set_intra_op_parallelism_threads(num_threads);
Status InitializeVariables(Session* session,
const std::vector<string>& init_ops) {
LOG(INFO) << "Initializing graph variables";
for (const string& init_op : init_ops) {
TF_RETURN_IF_ERROR(session->Run({}, {}, {init_op}, nullptr));
}
LOG(INFO) << "Got config, " << config.device_count_size() << " devices";
session->reset(tensorflow::NewSession(options));
graph_def->reset(new GraphDef());
tensorflow::GraphDef tensorflow_graph;
Status s = ReadBinaryProto(Env::Default(), graph, graph_def->get());
if (!s.ok()) {
LOG(ERROR) << "Could not create TensorFlow Graph: " << s;
return s;
}
s = (*session)->Create(*(graph_def->get()));
if (!s.ok()) {
LOG(ERROR) << "Could not create TensorFlow Session: " << s;
return s;
}
return Status::OK();
}
@ -247,8 +228,56 @@ void RecordBenchmarkEntry(const string& output_prefix,
TF_QCHECK_OK(node_reporter.Close());
}
void SleepSeconds(double sleep_seconds) {
if (sleep_seconds <= 0.0) {
return;
}
#ifdef PLATFORM_WINDOWS
Sleep(sleep_seconds * 1000);
#else
// Convert the inference_delay string into a timespec.
timespec req;
req.tv_sec = static_cast<time_t>(sleep_seconds);
req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000;
nanosleep(&req, nullptr);
#endif
}
} // namespace
Status InitializeSession(int num_threads, const string& graph,
std::unique_ptr<Session>* session,
std::unique_ptr<GraphDef>* graph_def) {
LOG(INFO) << "Loading TensorFlow.";
tensorflow::SessionOptions options;
tensorflow::ConfigProto& config = options.config;
if (num_threads > 0) {
config.set_intra_op_parallelism_threads(num_threads);
}
LOG(INFO) << "Got config, " << config.device_count_size() << " devices";
session->reset(tensorflow::NewSession(options));
graph_def->reset(new GraphDef());
tensorflow::GraphDef tensorflow_graph;
Status s = ReadBinaryProto(Env::Default(), graph, graph_def->get());
if (!s.ok()) {
LOG(ERROR) << "Could not create TensorFlow Graph: " << s;
return s;
}
s = (*session)->Create(*(graph_def->get()));
if (!s.ok()) {
LOG(ERROR) << "Could not create TensorFlow Session: " << s;
return s;
}
return Status::OK();
}
Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
const std::vector<string>& outputs, Session* session,
const std::vector<string>& outputs,
const std::vector<string>& targets, Session* session,
StatSummarizer* stats, int64* inference_time_us) {
std::vector<std::pair<string, tensorflow::Tensor> > input_tensors;
CreateTensorsFromInputInfo(inputs, &input_tensors);
@ -264,8 +293,8 @@ Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
RunMetadata run_metadata;
const int64 start_time = Env::Default()->NowMicros();
s = session->Run(run_options, input_tensors, outputs, {}, &output_tensors,
&run_metadata);
s = session->Run(run_options, input_tensors, outputs, targets,
&output_tensors, &run_metadata);
const int64 end_time = Env::Default()->NowMicros();
*inference_time_us = end_time - start_time;
@ -283,24 +312,10 @@ Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
return s;
}
void SleepSeconds(double sleep_seconds) {
if (sleep_seconds <= 0.0) {
return;
}
#ifdef PLATFORM_WINDOWS
Sleep(sleep_seconds * 1000);
#else
// Convert the inference_delay string into a timespec.
timespec req;
req.tv_sec = static_cast<time_t>(sleep_seconds);
req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000;
nanosleep(&req, nullptr);
#endif
}
Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
const std::vector<InputLayerInfo>& inputs,
const std::vector<string>& outputs, Session* session,
const std::vector<string>& outputs,
const std::vector<string>& targets, Session* session,
StatSummarizer* stats, int64* total_time_us,
int64* actual_num_runs) {
*total_time_us = 0;
@ -315,7 +330,8 @@ Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
const bool until_max_time = num_runs <= 0;
for (int i = 0; until_max_time || i < num_runs; ++i) {
int64 time;
Status run_status = RunBenchmark(inputs, outputs, session, stats, &time);
Status run_status =
RunBenchmark(inputs, outputs, targets, session, stats, &time);
stat.UpdateStat(time);
(*total_time_us) += time;
++(*actual_num_runs);
@ -345,11 +361,13 @@ Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
int Main(int argc, char** argv) {
string graph = "/data/local/tmp/tensorflow_inception_graph.pb";
string init_ops_string = "";
string input_layer_string = "input:0";
string input_layer_shape_string = "1,224,224,3";
string input_layer_type_string = "float";
string input_layer_values_string = "";
string output_layer_string = "output:0";
string target_layer_string = "";
int max_num_runs = 1000;
string max_time = "10.0";
string inference_delay = "-1.0";
@ -371,12 +389,14 @@ int Main(int argc, char** argv) {
std::vector<Flag> flag_list = {
Flag("graph", &graph, "graph file name"),
Flag("init_ops", &init_ops_string, "init ops"),
Flag("input_layer", &input_layer_string, "input layer names"),
Flag("input_layer_shape", &input_layer_shape_string, "input layer shape"),
Flag("input_layer_type", &input_layer_type_string, "input layer type"),
Flag("input_layer_values", &input_layer_values_string,
"values to initialize the inputs with"),
Flag("output_layer", &output_layer_string, "output layer name"),
Flag("target_layer", &target_layer_string, "target layer name"),
Flag("max_num_runs", &max_num_runs, "number of runs max"),
Flag("max_time", &max_time, "length to run max"),
Flag("inference_delay", &inference_delay,
@ -410,6 +430,7 @@ int Main(int argc, char** argv) {
return -1;
}
std::vector<string> init_ops = str_util::Split(init_ops_string, ',');
std::vector<string> input_layers = str_util::Split(input_layer_string, ',');
std::vector<string> input_layer_shapes =
str_util::Split(input_layer_shape_string, ':');
@ -418,6 +439,7 @@ int Main(int argc, char** argv) {
std::vector<string> input_layer_values =
str_util::Split(input_layer_values_string, ':');
std::vector<string> output_layers = str_util::Split(output_layer_string, ',');
std::vector<string> target_layers = str_util::Split(target_layer_string, ',');
if ((input_layers.size() != input_layer_shapes.size()) ||
(input_layers.size() != input_layer_types.size())) {
LOG(ERROR) << "There must be the same number of items in --input_layer,"
@ -441,10 +463,12 @@ int Main(int argc, char** argv) {
}
LOG(INFO) << "Graph: [" << graph << "]";
LOG(INFO) << "Init ops:" << init_ops_string;
LOG(INFO) << "Input layers: [" << input_layer_string << "]";
LOG(INFO) << "Input shapes: [" << input_layer_shape_string << "]";
LOG(INFO) << "Input types: [" << input_layer_type_string << "]";
LOG(INFO) << "Output layers: [" << output_layer_string << "]";
LOG(INFO) << "Target layers: [" << target_layer_string << "]";
LOG(INFO) << "Num runs: [" << max_num_runs << "]";
LOG(INFO) << "Inter-inference delay (seconds): [" << inference_delay << "]";
LOG(INFO) << "Inter-benchmark delay (seconds): [" << inter_benchmark_delay
@ -470,6 +494,16 @@ int Main(int argc, char** argv) {
return -1;
}
if (!init_ops.empty()) {
Status initialize_variables_status =
InitializeVariables(session.get(), init_ops);
if (!initialize_variables_status.ok()) {
LOG(ERROR) << "Graph variables initialization failed with "
<< initialize_variables_status;
return -1;
}
}
StatSummarizerOptions stats_options;
stats_options.show_run_order = show_run_order;
stats_options.run_order_limit = run_order_limit;
@ -520,9 +554,10 @@ int Main(int argc, char** argv) {
int64 warmup_time_us = 0;
int64 num_warmup_runs = 0;
if (warmup_runs > 0) {
Status warmup_time_status = TimeMultipleRuns(
inter_inference_sleep_seconds, warmup_runs, -1.0, inputs, output_layers,
session.get(), nullptr, &warmup_time_us, &num_warmup_runs);
Status warmup_time_status =
TimeMultipleRuns(inter_inference_sleep_seconds, warmup_runs, -1.0,
inputs, output_layers, target_layers, session.get(),
nullptr, &warmup_time_us, &num_warmup_runs);
if (!warmup_time_status.ok()) {
LOG(ERROR) << "Timing failed with " << warmup_time_status;
return -1;
@ -536,8 +571,8 @@ int Main(int argc, char** argv) {
int64 no_stat_num_runs = 0;
Status no_stat_time_status = TimeMultipleRuns(
inter_inference_sleep_seconds, max_num_runs, max_benchmark_time_seconds,
inputs, output_layers, session.get(), nullptr, &no_stat_time_us,
&no_stat_num_runs);
inputs, output_layers, target_layers, session.get(), nullptr,
&no_stat_time_us, &no_stat_num_runs);
const double no_stat_wall_time = no_stat_time_us / 1000000.0;
if (!no_stat_time_status.ok()) {
LOG(ERROR) << "Timing failed with " << no_stat_time_status;
@ -551,8 +586,8 @@ int Main(int argc, char** argv) {
int64 stat_num_runs = 0;
Status stat_time_status = TimeMultipleRuns(
inter_inference_sleep_seconds, max_num_runs, max_benchmark_time_seconds,
inputs, output_layers, session.get(), stats.get(), &stat_time_us,
&stat_num_runs);
inputs, output_layers, target_layers, session.get(), stats.get(),
&stat_time_us, &stat_num_runs);
if (!stat_time_status.ok()) {
LOG(ERROR) << "Timing failed with " << stat_time_status;
return -1;

View File

@ -37,13 +37,15 @@ Status InitializeSession(int num_threads, const string& graph,
// Does a single run of the model that's been loaded into the given session.
Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
const std::vector<string>& outputs, Session* session,
const std::vector<string>& outputs,
const std::vector<string>& targets, Session* session,
StatSummarizer* stats, int64* inference_time_us);
// Runs the model multiple time, keeping track of timing information.
Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
const std::vector<InputLayerInfo>& inputs,
const std::vector<string>& outputs, Session* session,
const std::vector<string>& outputs,
const std::vector<string>& targets, Session* session,
StatSummarizer* stats, int64* total_time_us,
int64* actual_num_runs);

View File

@ -64,8 +64,8 @@ TEST(BenchmarkModelTest, InitializeAndRun) {
int64 time;
int64 num_runs = 0;
TF_ASSERT_OK(benchmark_model::TimeMultipleRuns(
0.0, 10, 0.0, {input}, {output_name}, session.get(), stats.get(), &time,
&num_runs));
0.0, 10, 0.0, {input}, {output_name}, {}, session.get(), stats.get(),
&time, &num_runs));
ASSERT_EQ(num_runs, 10);
}