Add init op and target node support to benchmark
PiperOrigin-RevId: 188335233
This commit is contained in:
parent
6a619489c6
commit
16a6666c1c
@ -48,33 +48,14 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
namespace benchmark_model {
|
||||
|
||||
Status InitializeSession(int num_threads, const string& graph,
|
||||
std::unique_ptr<Session>* session,
|
||||
std::unique_ptr<GraphDef>* graph_def) {
|
||||
LOG(INFO) << "Loading TensorFlow.";
|
||||
namespace {
|
||||
|
||||
tensorflow::SessionOptions options;
|
||||
tensorflow::ConfigProto& config = options.config;
|
||||
if (num_threads > 0) {
|
||||
config.set_intra_op_parallelism_threads(num_threads);
|
||||
Status InitializeVariables(Session* session,
|
||||
const std::vector<string>& init_ops) {
|
||||
LOG(INFO) << "Initializing graph variables";
|
||||
for (const string& init_op : init_ops) {
|
||||
TF_RETURN_IF_ERROR(session->Run({}, {}, {init_op}, nullptr));
|
||||
}
|
||||
LOG(INFO) << "Got config, " << config.device_count_size() << " devices";
|
||||
|
||||
session->reset(tensorflow::NewSession(options));
|
||||
graph_def->reset(new GraphDef());
|
||||
tensorflow::GraphDef tensorflow_graph;
|
||||
Status s = ReadBinaryProto(Env::Default(), graph, graph_def->get());
|
||||
if (!s.ok()) {
|
||||
LOG(ERROR) << "Could not create TensorFlow Graph: " << s;
|
||||
return s;
|
||||
}
|
||||
|
||||
s = (*session)->Create(*(graph_def->get()));
|
||||
if (!s.ok()) {
|
||||
LOG(ERROR) << "Could not create TensorFlow Session: " << s;
|
||||
return s;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -247,8 +228,56 @@ void RecordBenchmarkEntry(const string& output_prefix,
|
||||
TF_QCHECK_OK(node_reporter.Close());
|
||||
}
|
||||
|
||||
void SleepSeconds(double sleep_seconds) {
|
||||
if (sleep_seconds <= 0.0) {
|
||||
return;
|
||||
}
|
||||
#ifdef PLATFORM_WINDOWS
|
||||
Sleep(sleep_seconds * 1000);
|
||||
#else
|
||||
// Convert the inference_delay string into a timespec.
|
||||
timespec req;
|
||||
req.tv_sec = static_cast<time_t>(sleep_seconds);
|
||||
req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000;
|
||||
nanosleep(&req, nullptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Status InitializeSession(int num_threads, const string& graph,
|
||||
std::unique_ptr<Session>* session,
|
||||
std::unique_ptr<GraphDef>* graph_def) {
|
||||
LOG(INFO) << "Loading TensorFlow.";
|
||||
|
||||
tensorflow::SessionOptions options;
|
||||
tensorflow::ConfigProto& config = options.config;
|
||||
if (num_threads > 0) {
|
||||
config.set_intra_op_parallelism_threads(num_threads);
|
||||
}
|
||||
LOG(INFO) << "Got config, " << config.device_count_size() << " devices";
|
||||
|
||||
session->reset(tensorflow::NewSession(options));
|
||||
graph_def->reset(new GraphDef());
|
||||
tensorflow::GraphDef tensorflow_graph;
|
||||
Status s = ReadBinaryProto(Env::Default(), graph, graph_def->get());
|
||||
if (!s.ok()) {
|
||||
LOG(ERROR) << "Could not create TensorFlow Graph: " << s;
|
||||
return s;
|
||||
}
|
||||
|
||||
s = (*session)->Create(*(graph_def->get()));
|
||||
if (!s.ok()) {
|
||||
LOG(ERROR) << "Could not create TensorFlow Session: " << s;
|
||||
return s;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
|
||||
const std::vector<string>& outputs, Session* session,
|
||||
const std::vector<string>& outputs,
|
||||
const std::vector<string>& targets, Session* session,
|
||||
StatSummarizer* stats, int64* inference_time_us) {
|
||||
std::vector<std::pair<string, tensorflow::Tensor> > input_tensors;
|
||||
CreateTensorsFromInputInfo(inputs, &input_tensors);
|
||||
@ -264,8 +293,8 @@ Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
|
||||
|
||||
RunMetadata run_metadata;
|
||||
const int64 start_time = Env::Default()->NowMicros();
|
||||
s = session->Run(run_options, input_tensors, outputs, {}, &output_tensors,
|
||||
&run_metadata);
|
||||
s = session->Run(run_options, input_tensors, outputs, targets,
|
||||
&output_tensors, &run_metadata);
|
||||
const int64 end_time = Env::Default()->NowMicros();
|
||||
*inference_time_us = end_time - start_time;
|
||||
|
||||
@ -283,24 +312,10 @@ Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
|
||||
return s;
|
||||
}
|
||||
|
||||
void SleepSeconds(double sleep_seconds) {
|
||||
if (sleep_seconds <= 0.0) {
|
||||
return;
|
||||
}
|
||||
#ifdef PLATFORM_WINDOWS
|
||||
Sleep(sleep_seconds * 1000);
|
||||
#else
|
||||
// Convert the inference_delay string into a timespec.
|
||||
timespec req;
|
||||
req.tv_sec = static_cast<time_t>(sleep_seconds);
|
||||
req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000;
|
||||
nanosleep(&req, nullptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
|
||||
const std::vector<InputLayerInfo>& inputs,
|
||||
const std::vector<string>& outputs, Session* session,
|
||||
const std::vector<string>& outputs,
|
||||
const std::vector<string>& targets, Session* session,
|
||||
StatSummarizer* stats, int64* total_time_us,
|
||||
int64* actual_num_runs) {
|
||||
*total_time_us = 0;
|
||||
@ -315,7 +330,8 @@ Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
|
||||
const bool until_max_time = num_runs <= 0;
|
||||
for (int i = 0; until_max_time || i < num_runs; ++i) {
|
||||
int64 time;
|
||||
Status run_status = RunBenchmark(inputs, outputs, session, stats, &time);
|
||||
Status run_status =
|
||||
RunBenchmark(inputs, outputs, targets, session, stats, &time);
|
||||
stat.UpdateStat(time);
|
||||
(*total_time_us) += time;
|
||||
++(*actual_num_runs);
|
||||
@ -345,11 +361,13 @@ Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
|
||||
|
||||
int Main(int argc, char** argv) {
|
||||
string graph = "/data/local/tmp/tensorflow_inception_graph.pb";
|
||||
string init_ops_string = "";
|
||||
string input_layer_string = "input:0";
|
||||
string input_layer_shape_string = "1,224,224,3";
|
||||
string input_layer_type_string = "float";
|
||||
string input_layer_values_string = "";
|
||||
string output_layer_string = "output:0";
|
||||
string target_layer_string = "";
|
||||
int max_num_runs = 1000;
|
||||
string max_time = "10.0";
|
||||
string inference_delay = "-1.0";
|
||||
@ -371,12 +389,14 @@ int Main(int argc, char** argv) {
|
||||
|
||||
std::vector<Flag> flag_list = {
|
||||
Flag("graph", &graph, "graph file name"),
|
||||
Flag("init_ops", &init_ops_string, "init ops"),
|
||||
Flag("input_layer", &input_layer_string, "input layer names"),
|
||||
Flag("input_layer_shape", &input_layer_shape_string, "input layer shape"),
|
||||
Flag("input_layer_type", &input_layer_type_string, "input layer type"),
|
||||
Flag("input_layer_values", &input_layer_values_string,
|
||||
"values to initialize the inputs with"),
|
||||
Flag("output_layer", &output_layer_string, "output layer name"),
|
||||
Flag("target_layer", &target_layer_string, "target layer name"),
|
||||
Flag("max_num_runs", &max_num_runs, "number of runs max"),
|
||||
Flag("max_time", &max_time, "length to run max"),
|
||||
Flag("inference_delay", &inference_delay,
|
||||
@ -410,6 +430,7 @@ int Main(int argc, char** argv) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<string> init_ops = str_util::Split(init_ops_string, ',');
|
||||
std::vector<string> input_layers = str_util::Split(input_layer_string, ',');
|
||||
std::vector<string> input_layer_shapes =
|
||||
str_util::Split(input_layer_shape_string, ':');
|
||||
@ -418,6 +439,7 @@ int Main(int argc, char** argv) {
|
||||
std::vector<string> input_layer_values =
|
||||
str_util::Split(input_layer_values_string, ':');
|
||||
std::vector<string> output_layers = str_util::Split(output_layer_string, ',');
|
||||
std::vector<string> target_layers = str_util::Split(target_layer_string, ',');
|
||||
if ((input_layers.size() != input_layer_shapes.size()) ||
|
||||
(input_layers.size() != input_layer_types.size())) {
|
||||
LOG(ERROR) << "There must be the same number of items in --input_layer,"
|
||||
@ -441,10 +463,12 @@ int Main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
LOG(INFO) << "Graph: [" << graph << "]";
|
||||
LOG(INFO) << "Init ops:" << init_ops_string;
|
||||
LOG(INFO) << "Input layers: [" << input_layer_string << "]";
|
||||
LOG(INFO) << "Input shapes: [" << input_layer_shape_string << "]";
|
||||
LOG(INFO) << "Input types: [" << input_layer_type_string << "]";
|
||||
LOG(INFO) << "Output layers: [" << output_layer_string << "]";
|
||||
LOG(INFO) << "Target layers: [" << target_layer_string << "]";
|
||||
LOG(INFO) << "Num runs: [" << max_num_runs << "]";
|
||||
LOG(INFO) << "Inter-inference delay (seconds): [" << inference_delay << "]";
|
||||
LOG(INFO) << "Inter-benchmark delay (seconds): [" << inter_benchmark_delay
|
||||
@ -470,6 +494,16 @@ int Main(int argc, char** argv) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!init_ops.empty()) {
|
||||
Status initialize_variables_status =
|
||||
InitializeVariables(session.get(), init_ops);
|
||||
if (!initialize_variables_status.ok()) {
|
||||
LOG(ERROR) << "Graph variables initialization failed with "
|
||||
<< initialize_variables_status;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
StatSummarizerOptions stats_options;
|
||||
stats_options.show_run_order = show_run_order;
|
||||
stats_options.run_order_limit = run_order_limit;
|
||||
@ -520,9 +554,10 @@ int Main(int argc, char** argv) {
|
||||
int64 warmup_time_us = 0;
|
||||
int64 num_warmup_runs = 0;
|
||||
if (warmup_runs > 0) {
|
||||
Status warmup_time_status = TimeMultipleRuns(
|
||||
inter_inference_sleep_seconds, warmup_runs, -1.0, inputs, output_layers,
|
||||
session.get(), nullptr, &warmup_time_us, &num_warmup_runs);
|
||||
Status warmup_time_status =
|
||||
TimeMultipleRuns(inter_inference_sleep_seconds, warmup_runs, -1.0,
|
||||
inputs, output_layers, target_layers, session.get(),
|
||||
nullptr, &warmup_time_us, &num_warmup_runs);
|
||||
if (!warmup_time_status.ok()) {
|
||||
LOG(ERROR) << "Timing failed with " << warmup_time_status;
|
||||
return -1;
|
||||
@ -536,8 +571,8 @@ int Main(int argc, char** argv) {
|
||||
int64 no_stat_num_runs = 0;
|
||||
Status no_stat_time_status = TimeMultipleRuns(
|
||||
inter_inference_sleep_seconds, max_num_runs, max_benchmark_time_seconds,
|
||||
inputs, output_layers, session.get(), nullptr, &no_stat_time_us,
|
||||
&no_stat_num_runs);
|
||||
inputs, output_layers, target_layers, session.get(), nullptr,
|
||||
&no_stat_time_us, &no_stat_num_runs);
|
||||
const double no_stat_wall_time = no_stat_time_us / 1000000.0;
|
||||
if (!no_stat_time_status.ok()) {
|
||||
LOG(ERROR) << "Timing failed with " << no_stat_time_status;
|
||||
@ -551,8 +586,8 @@ int Main(int argc, char** argv) {
|
||||
int64 stat_num_runs = 0;
|
||||
Status stat_time_status = TimeMultipleRuns(
|
||||
inter_inference_sleep_seconds, max_num_runs, max_benchmark_time_seconds,
|
||||
inputs, output_layers, session.get(), stats.get(), &stat_time_us,
|
||||
&stat_num_runs);
|
||||
inputs, output_layers, target_layers, session.get(), stats.get(),
|
||||
&stat_time_us, &stat_num_runs);
|
||||
if (!stat_time_status.ok()) {
|
||||
LOG(ERROR) << "Timing failed with " << stat_time_status;
|
||||
return -1;
|
||||
|
@ -37,13 +37,15 @@ Status InitializeSession(int num_threads, const string& graph,
|
||||
|
||||
// Does a single run of the model that's been loaded into the given session.
|
||||
Status RunBenchmark(const std::vector<InputLayerInfo>& inputs,
|
||||
const std::vector<string>& outputs, Session* session,
|
||||
const std::vector<string>& outputs,
|
||||
const std::vector<string>& targets, Session* session,
|
||||
StatSummarizer* stats, int64* inference_time_us);
|
||||
|
||||
// Runs the model multiple time, keeping track of timing information.
|
||||
Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s,
|
||||
const std::vector<InputLayerInfo>& inputs,
|
||||
const std::vector<string>& outputs, Session* session,
|
||||
const std::vector<string>& outputs,
|
||||
const std::vector<string>& targets, Session* session,
|
||||
StatSummarizer* stats, int64* total_time_us,
|
||||
int64* actual_num_runs);
|
||||
|
||||
|
@ -64,8 +64,8 @@ TEST(BenchmarkModelTest, InitializeAndRun) {
|
||||
int64 time;
|
||||
int64 num_runs = 0;
|
||||
TF_ASSERT_OK(benchmark_model::TimeMultipleRuns(
|
||||
0.0, 10, 0.0, {input}, {output_name}, session.get(), stats.get(), &time,
|
||||
&num_runs));
|
||||
0.0, 10, 0.0, {input}, {output_name}, {}, session.get(), stats.get(),
|
||||
&time, &num_runs));
|
||||
ASSERT_EQ(num_runs, 10);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user