Improve tfprof doc. Support binary GraphDef.

Change: 138711410
This commit is contained in:
A. Unique TensorFlower 2016-11-09 18:16:12 -08:00 committed by TensorFlower Gardener
parent e01b641fc2
commit f5bd8e1c34
9 changed files with 182 additions and 95 deletions

View File

@ -1,17 +1,11 @@
# tfprof: A Profiling Tool for TensorFlow Models # tfprof: A Profiling Tool for TensorFlow Models
Internal User Please Use: go/tfprof # Full Docment in tensorflow/tools/tfprof/README.md
Author: Xin Pan (xpan@google.com, github: panyx0718) Author: Xin Pan (xpan@google.com, github: panyx0718)
Consultants: Jon Shlens, Pete Warden Consultants: Jon Shlens, Pete Warden
## Introduction
tfprof is a profiling tool for TensorFlow that analyzes model architectures
and measures system performance.
###Major Features ###Major Features
1. Measure model parameters, float operations, tensor shapes. 1. Measure model parameters, float operations, tensor shapes.
@ -20,9 +14,63 @@ and measures system performance.
4. Explore model based on name scope or graph structure. 4. Explore model based on name scope or graph structure.
5. Selectively grouping/filtering/accounting/ordering ops. 5. Selectively grouping/filtering/accounting/ordering ops.
tfprof can be used as CommandLine Interface (CLI) and Python API. tfprof can be used as Python API, Interactive CLI and One-shot Script.
CLI locates in tensorflow/tools/tfprof.
Python API locates in tensorflow/contrib/tfprof.
Tutorial locates in tensorflow/tools/tfprof/README.md
Enjoy! ## Python API Tutorials
tfprof is part of TensorFlow core. Simply ```import tensorflow as tf```.
### Examine the shapes and sizes of all trainiable Variables.
```python
# Print trainable variable parameter statistics to stdout.
param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
tf.get_default_graph(),
tfprof_options=tf.contrib.tfprof.model_analyzer.
TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
# param_stats is tensorflow.tfprof.TFProfNode proto. It organize the statistics
# of each graph node in tree scructure. Let's print the root below.
sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
```
### Examine the number of floating point operations
``` python
# Print to stdout an analysis of the number of floating point operations in the
# model broken down by individual operations.
#
# Note: Only Ops with RegisterStatistics('flops') defined have flop stats. It
# also requires complete shape information. It is common that shape is unknown
# statically. To complete the shape, provide run-time shape information with
# tf.RunMetadata to the API (See next example on how to provide RunMetadata).
tf.contrib.tfprof.model_analyzer.print_model_analysis(
tf.get_default_graph(),
tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
```
### Examine the timing and memory usage
You will first need to run the following set up in your model in order to
compute the memory and timing statistics.
```python
# Generate the meta information for the model that contains the memory usage
# and timing information.
run_metadata = tf.RunMetadata()
with tf.Session() as sess:
_ = sess.run(train_op,
options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
run_metadata=run_metadata)
```
Finally, you may run `print_model_analysis` to explore the timing and memory
demands of the model.
``` python
# Print to stdout an analysis of the memory usage and the timing information
# from running the graph broken down by operations.
tf.contrib.tfprof.model_analyzer.print_model_analysis(
tf.get_default_graph(),
run_meta=run_metadata,
tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY)
```
Users can change ```tfprof_options``` to fully leverage tfprof's power.

View File

@ -71,6 +71,7 @@ def _get_logged_ops(graph, run_meta=None):
if run_meta: if run_meta:
graph = _fill_missing_graph_shape(graph, run_meta) graph = _fill_missing_graph_shape(graph, run_meta)
op_missing_shape = 0
logged_ops = {} logged_ops = {}
graph_def = graph.as_graph_def() graph_def = graph.as_graph_def()
for node in graph_def.node: for node in graph_def.node:
@ -78,6 +79,7 @@ def _get_logged_ops(graph, run_meta=None):
stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS) stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS)
except ValueError: except ValueError:
# Catch Exception When shape is incomplete. Skip it. # Catch Exception When shape is incomplete. Skip it.
op_missing_shape += 1
stats = None stats = None
if not stats or not stats.value: if not stats or not stats.value:
@ -96,6 +98,11 @@ def _get_logged_ops(graph, run_meta=None):
logged_ops[entry.name] = entry logged_ops[entry.name] = entry
else: else:
logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES)
if op_missing_shape > 0 and not run_meta:
sys.stderr.write(
'%d ops no flops stats due to incomplete shapes. '
'Consider passing run_meta to use run_time shapes.\n' %
op_missing_shape)
return logged_ops return logged_ops

View File

@ -1,17 +1,10 @@
# tfprof: A Profiling Tool for TensorFlow Models # tfprof: A Profiling Tool for TensorFlow Models
Internal User Please Use: go/tfprof
Author: Xin Pan (xpan@google.com, github: panyx0718) Author: Xin Pan (xpan@google.com, github: panyx0718)
Consultants: Jon Shlens, Pete Warden Consultants: Jon Shlens, Pete Warden
## Introduction
tfprof is a profiling tool for TensorFlow that analyzes model architectures
and measures system performance.
###Major Features ###Major Features
1. Measure model parameters, float operations, tensor shapes. 1. Measure model parameters, float operations, tensor shapes.
@ -20,17 +13,83 @@ and measures system performance.
4. Explore model based on name scope or graph structure. 4. Explore model based on name scope or graph structure.
5. Selectively grouping/filtering/accounting/ordering ops. 5. Selectively grouping/filtering/accounting/ordering ops.
### Interfaces [Python API Tutorials](#python-api-tutorials): It can be called directly from
Python codes. Results are either printed
to stdout or dumped to file. tensorflow.tfprof.TFProfNode proto is returned from
the API to allow users to perform further analysis.
[CLI Tutorials](#cli-tutorials): [CLI Tutorials](#cli-tutorials):
It supports interactive mode for exploration and single-shot mode for It supports interactive mode for exploration and single-shot mode for
scripts. Outputs can be dumped to files or printed in terminal. scripts. Outputs can be dumped to files or printed in terminal.
Python API Tutorials: Python API is not released yet. [Options](#options):
tfprof supports many options to selectively account/display/order ops and
statistics.
## Python API Tutorials
tfprof is part of TensorFlow core. Simply ```import tensorflow as tf```.
### Examine the shapes and sizes of all trainiable Variables.
```python
# Print trainable variable parameter statistics to stdout.
param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
tf.get_default_graph(),
tfprof_options=tf.contrib.tfprof.model_analyzer.
TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
# param_stats is tensorflow.tfprof.TFProfNode proto. It organize the statistics
# of each graph node in tree scructure. Let's print the root below.
sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
```
### Examine the number of floating point operations
``` python
# Print to stdout an analysis of the number of floating point operations in the
# model broken down by individual operations.
#
# Note: Only Ops with RegisterStatistics('flops') defined have flop stats. It
# also requires complete shape information. It is common that shape is unknown
# statically. To complete the shape, provide run-time shape information with
# tf.RunMetadata to the API (See next example on how to provide RunMetadata).
tf.contrib.tfprof.model_analyzer.print_model_analysis(
tf.get_default_graph(),
tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
```
### Examine the timing and memory usage
You will first need to run the following set up in your model in order to
compute the memory and timing statistics.
```python
# Generate the meta information for the model that contains the memory usage
# and timing information.
run_metadata = tf.RunMetadata()
with tf.Session() as sess:
_ = sess.run(train_op,
options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
run_metadata=run_metadata)
```
Finally, you may run `print_model_analysis` to explore the timing and memory
demands of the model.
``` python
# Print to stdout an analysis of the memory usage and the timing information
# from running the graph broken down by operations.
tf.contrib.tfprof.model_analyzer.print_model_analysis(
tf.get_default_graph(),
run_meta=run_metadata,
tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY)
```
Users can change ```tfprof_options``` to fully leverage tfprof's power.
## CLI Tutorials ## CLI Tutorials
Tutorials are based on a 32 layers ResNet. Tutorials below are based on a 32 layers ResNet.
TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download. TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
### Examples ### Examples
@ -44,6 +103,12 @@ bazel build -c opt tensorflow/tools/tfprof/...
# Help information, including detail 'option' instructions. # Help information, including detail 'option' instructions.
bazel-bin/tensorflow/tools/tfprof/tfprof help bazel-bin/tensorflow/tools/tfprof/tfprof help
# #
# The following command start tfprof in one-shot mode.
#
bazel-bin/tensorflow/tools/tfprof/tfprof scope \
--graph_path=graph.pbtxt \
--max_depth=3
#
# The following commands will start tfprof interactive mode. # The following commands will start tfprof interactive mode.
# #
# Profile model shapes and parameters only. # Profile model shapes and parameters only.
@ -168,7 +233,8 @@ _TFProfRoot (0us/2.29sec)
Note: float operations calculation depends on Note: float operations calculation depends on
1) op.RegisterStatistics. If an op doesnt 1) op.RegisterStatistics. If an op doesnt
have RegisterStatistics defined, its float operations cannot be counted. have RegisterStatistics defined, its float operations cannot be counted.
2) fully defined shape is also necessary in order to calculate flops. 2) fully defined shape is also necessary in order to calculate flops. Sometimes
full shape is not available statically. Use RunMetadata to get run-time shape.
float operations number is provided by tensorflow::tfprof::OpLog logged from float operations number is provided by tensorflow::tfprof::OpLog logged from
Python API. Python API.
@ -276,6 +342,10 @@ Second, call write_op_log to write the OpLog proto.
```python ```python
tf.contrib.tfprof.tfprof_logger.write_op_log( tf.contrib.tfprof.tfprof_logger.write_op_log(
sess.graph, /tmp/my_op_log_dir, op_log) sess.graph, /tmp/my_op_log_dir, op_log)
# Get run-time shape information in order to fill shapes and get flops.
tf.contrib.tfprof.tfprof_logger.write_op_log(
sess.graph, /tmp/my_op_log_dir, op_log, run_meta)
``` ```
Third, when starting the tfprof tool, specify Third, when starting the tfprof tool, specify
@ -372,84 +442,43 @@ TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
provides checkpointed tensors' values. provides checkpointed tensors' values.
## Design ##Options
`-max_depth`: Show ops that are at most this number of hops from starting op in the tree/graph structure.
### In-memory representation `-min_bytes`: Show ops that request at least this number of bytes.
<b>Scope:</b> This representation organizes ops based on name scope hierarchy, `-min_micros`: Show ops that spend at least this number of microseconds to run.
similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
For example op1 with name “name1/name2” is a child of op2 with name “name1”.
<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is `-min_params`: Show ops that contains at least this number of parameters.
a graph structure. The graph is a “directed acyclic graph” (hopefully), with
direction from “output to input”. The direction is design this way so that users
can trace from “result” to its “sources”.
### Command line options `-min_float_ops`: Show ops that contain at least this number of float operations. Only available if an op has op.RegisterStatistics() defined and OpLog is provided
tfprofs major goals are to measure system performance and quicly analyze `-device_regexes`: Show ops that a placed on the specified devices. regexes are comma-separated.
model architectures. Hence, its commands and options should allow users to achieve
these 2 goals easily.
<b>graph:</b> It is expected that users will mostly use graph representation to `-order_by`: Order the results by [name|depth|bytes|micros|params|float_ops]
debug system performance. Hence, tfprof supports graph command, which pulls the
graph in-memory representation described above.
<b>scope:</b> It is expected that some users might want to explore their model `-account_type_regexes`: Account and display the ops whose types match one of the type regexes specified. tfprof allow user to define extra op types for ops through tensorflow.tfprof.OpLog proto. regexes are comma-sperated.
statistics using the name scope information they defined in the Python codes.
Hence, tfprof supports “scope” command, which pulls the tree in-memory
representation.
<b>set:</b> It is used to store the options so that user doesnt need to `-start_name_regexes`: Show ops starting from the ops that matches the regexes, recursively. regexes are comma-separated.
re-type the same option again and again in the follow up command line. Note that
tfprof has traditional terminals history and auto-complete support.
<b>help:</b> print help information. `-trim_name_regexes`: Hide ops starting from the ops that matches the regexes, recursively, regexes are comma-seprated.
<b>Options:</b> Run “tfprof help” to get detailed explanations. `-show_name_regexes`: Show ops that match the regexes. regexes are comma-seprated.
```python `-hide_name_regexes`: Hide ops that match the regexes. regexes are comma-seprated.
"-max_depth",
"-min_bytes",
"-min_micros",
"-min_params",
"-min_float_ops",
"-order_by",
"-account_type_regexes",
"-start_name_regexes",
"-trim_name_regexes",
"-show_name_regexes",
"-hide_name_regexes",
"-account_displayed_op_only",
"-select",
"-viz", # Only supported for graph command.
"-dump_to_file",
```
A key design is that stats are aggregated from descendants up to ancestors. Notes: For each op, `-account_type_regexes` is first evaluated, only ops with
`-account_type_regexes` is used to decide which ops stat is accounted. It makes types matching the specified regexes are accounted and selected for displayed.
decision based on op type. Usually set it to `.*` if no extra type information `-start/trim/show/hide_name_regexes` are used to further filter ops for display.
is added to the ops using OpLog. Intuitively, only accounted ops are displayed. `-start_name_regexes` is evaluated first to search the starting ops to display.
`-min/max` and `-show/hide/trim/start` options are only used the optionally Descendants of starting ops are then evaluated against `-show/hide_name_regexes`
displayed or hide ops based on ops name and stats. However, they dont prevent to make display decision. If an op matches trim_name_regexes, all its
tfprof from accounting stats of hidden ops. Hence, the stat of a op can be descendants are hidden. Ops statistics are *accounted even if they are hidden*
aggregated by its parent even if it is hidden. `-account_displayed_op_only` is as long as they match the `-account_xxx` options.
an option to break this rule. When it is set, only displayed ops are accounted.
Regexes are all comma-separated, for example `-show_name_regexes` `-account_displayed_op_only`: If True, only account the statistics of ops eventually displayed. If False, account all op statistics matching -account_type_regexes recursively.
`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
is not expected to show up in op names.
`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy `-select`: Comma-separated list of metrics to show: [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types].
(notice the indent printed) are sorted according to order_by.
## Future Work `-dump_to_file`: Dump the output to a file, instead of terminal.
* Load SummaryWriter event logs so that it can show the latest summary value.
* Better sorting and aggregation of outputs. Easier comprehension.
* Currently, shape information is based on `graph.pbtxt`. When the shape
information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
and `Checkpoint` to complete shape information.

View File

@ -38,7 +38,7 @@ class TFProfShowTest : public ::testing::Test {
io::JoinPath(testing::TensorFlowSrcRoot(), io::JoinPath(testing::TensorFlowSrcRoot(),
"tools/tfprof/internal/testdata/graph.pbtxt"); "tools/tfprof/internal/testdata/graph.pbtxt");
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef()); std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get())); TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb( std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
new tensorflow::RunMetadata()); new tensorflow::RunMetadata());

View File

@ -39,7 +39,7 @@ class TFProfStatsTest : public ::testing::Test {
io::JoinPath(testing::TensorFlowSrcRoot(), io::JoinPath(testing::TensorFlowSrcRoot(),
"tools/tfprof/internal/testdata/graph.pbtxt"); "tools/tfprof/internal/testdata/graph.pbtxt");
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef()); std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get())); TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb( std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
new tensorflow::RunMetadata()); new tensorflow::RunMetadata());

View File

@ -34,7 +34,7 @@ class TFProfTensorTest : public ::testing::Test {
io::JoinPath(testing::TensorFlowSrcRoot(), io::JoinPath(testing::TensorFlowSrcRoot(),
"tools/tfprof/internal/testdata/graph.pbtxt"); "tools/tfprof/internal/testdata/graph.pbtxt");
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef()); std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get())); TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb; std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
std::unique_ptr<OpLog> op_log_pb; std::unique_ptr<OpLog> op_log_pb;

View File

@ -72,12 +72,15 @@ string StringReplace(const string& str, const string& oldsub,
return out; return out;
} }
Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def) { Status ReadGraphDef(Env* env, const string& fname, GraphDef* graph_def) {
string out; string out;
Status s = ReadFileToString(env, fname, &out); Status s = ReadFileToString(env, fname, &out);
if (!s.ok()) return s; if (!s.ok()) return s;
if (protobuf::TextFormat::ParseFromString(out, graph_def)) { if (protobuf::TextFormat::ParseFromString(out, graph_def)) {
return Status(); return Status();
} else if (ReadBinaryProto(tensorflow::Env::Default(), fname, graph_def)
.ok()) {
return Status();
} }
return errors::InvalidArgument("Cannot parse proto string."); return errors::InvalidArgument("Cannot parse proto string.");
} }

View File

@ -40,7 +40,7 @@ tensorflow::Status ParseCmdLine(const string& line, string* cmd,
string StringReplace(const string& str, const string& oldsub, string StringReplace(const string& str, const string& oldsub,
const string& newsub); const string& newsub);
Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def); Status ReadGraphDef(Env* env, const string& fname, GraphDef* graph_def);
void PrintHelp(); void PrintHelp();

View File

@ -172,8 +172,8 @@ int main(int argc, char** argv) {
printf("Reading Files...\n"); printf("Reading Files...\n");
std::unique_ptr<tensorflow::GraphDef> graph(new tensorflow::GraphDef()); std::unique_ptr<tensorflow::GraphDef> graph(new tensorflow::GraphDef());
TF_CHECK_OK(tensorflow::tfprof::ReadGraphDefText( TF_CHECK_OK(tensorflow::tfprof::ReadGraphDef(tensorflow::Env::Default(),
tensorflow::Env::Default(), FLAGS_graph_path, graph.get())); FLAGS_graph_path, graph.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta( std::unique_ptr<tensorflow::RunMetadata> run_meta(
new tensorflow::RunMetadata()); new tensorflow::RunMetadata());