Add tfprof python API to tf.contrib and move tfprof CLI to tensorflow/tools.
Change: 137207286
This commit is contained in:
parent
d97c2ad2b6
commit
289ddb1cb6
@ -121,8 +121,6 @@ filegroup(
|
||||
"//tensorflow/contrib/tensorboard:all_files",
|
||||
"//tensorflow/contrib/testing:all_files",
|
||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:all_files",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
|
||||
"//tensorflow/contrib/training:all_files",
|
||||
"//tensorflow/contrib/util:all_files",
|
||||
"//tensorflow/core:all_files",
|
||||
@ -180,6 +178,8 @@ filegroup(
|
||||
"//tensorflow/tools/proto_text:all_files",
|
||||
"//tensorflow/tools/quantization:all_files",
|
||||
"//tensorflow/tools/test:all_files",
|
||||
"//tensorflow/tools/tfprof:all_files",
|
||||
"//tensorflow/tools/tfprof/internal:all_files",
|
||||
"//tensorflow/user_ops:all_files",
|
||||
"//third_party/hadoop:all_files",
|
||||
],
|
||||
|
@ -12,6 +12,7 @@ py_library(
|
||||
srcs_version = "PY2AND3",
|
||||
visibility = ["//tensorflow:__subpackages__"],
|
||||
deps = [
|
||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:model_analyzer",
|
||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
|
||||
],
|
||||
)
|
||||
|
@ -20,434 +20,9 @@ and measures system performance.
|
||||
4. Explore model based on name scope or graph structure.
|
||||
5. Selectively grouping/filtering/accounting/ordering ops.
|
||||
|
||||
### Interfaces
|
||||
tfprof can be used as CommandLine Interface (CLI) and Python API.
|
||||
CLI locates in tensorflow/tools/tfprof.
|
||||
Python API locates in tensorflow/contrib/tfprof.
|
||||
Tutorial locates in tensorflow/tools/tfprof/README.md
|
||||
|
||||
[CLI Tutorials](#cli-tutorials):
|
||||
It supports interactive mode for exploration and single-shot mode for
|
||||
scripts. Outputs can be dumped to files or printed in terminal.
|
||||
|
||||
Python API Tutorials: Python API is not released yet.
|
||||
|
||||
## CLI Tutorials
|
||||
|
||||
Tutorials are based on a 32 layers ResNet.
|
||||
TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
|
||||
|
||||
### Examples
|
||||
|
||||
1) Start `tfprof` command line tool
|
||||
|
||||
```shell
|
||||
# Build the tool.
|
||||
bazel build -c opt tensorflow/contrib/tfprof/...
|
||||
|
||||
# Help information, including detail 'option' instructions.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
|
||||
#
|
||||
# The following commands will start tfprof interactive mode.
|
||||
#
|
||||
# Profile model shapes and parameters only.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=/graph.pbtxt
|
||||
#
|
||||
# Additionally profile checkpoint statistics and values.
|
||||
# Use '-account_type_regexes _checkpoint_variables' to select
|
||||
# checkpoint tensors.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--checkpoint_path=model.ckpt
|
||||
#
|
||||
# Additionally profile ops requested memory and timing.
|
||||
# See CLI Input Files section on generating run_meta file.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--run_meta_path=run_meta \
|
||||
--checkpoint_path=model.ckpt
|
||||
#
|
||||
# tfprof_log is used to define customized op types and float ops.
|
||||
# Use tfprof_logger.write_op_log() to create tfprof_log.
|
||||
# See 11) in Examples section on generating tfprof_log file.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--run_meta_path=run_meta \
|
||||
--op_log_path=tfprof_log \
|
||||
--checkpoint_path=model.ckpt
|
||||
```
|
||||
Note that `graph.pbtxt` is an ASCII text format.
|
||||
|
||||
2) Press enter to show the default options
|
||||
|
||||
```shell
|
||||
tfprof>
|
||||
tfprof>
|
||||
-max_depth 4
|
||||
-min_bytes 0
|
||||
-min_micros 0
|
||||
-min_params 0
|
||||
-min_float_ops 0
|
||||
-device_regexes .*
|
||||
-order_by name
|
||||
-account_type_regexes Variable
|
||||
-start_name_regexes .*
|
||||
-trim_name_regexes
|
||||
-show_name_regexes .*
|
||||
-hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
|
||||
-account_displayed_op_only false
|
||||
# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
|
||||
# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
|
||||
-select params
|
||||
-viz false
|
||||
-dump_to_file
|
||||
```
|
||||
|
||||
3) I want to see the `BatchNorm`'s gamma value in checkpoint.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --checkpoint_path.
|
||||
tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
|
||||
_TFProfRoot ()
|
||||
unit_1_0/shared_activation/init_bn/gamma ()
|
||||
[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
|
||||
unit_1_0/sub2/bn2/gamma ()
|
||||
[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
|
||||
```
|
||||
|
||||
4) I want to see my checkpoint tensors shape and number of parameters.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --checkpoint_path.
|
||||
# Increase -max_depth to see all tensors.
|
||||
tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (0/0 params)
|
||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
pool_logit/biases/Momentum (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/128 params)
|
||||
unit_last/final_bn/gamma (64, 64/128 params)
|
||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||
```
|
||||
|
||||
5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
|
||||
it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
|
||||
graph dependencies.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --run_meta_path.
|
||||
tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
|
||||
_TFProfRoot (0us/3.61sec)
|
||||
init/init_conv/Conv2D (11.75ms/3.10sec)
|
||||
random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
|
||||
unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
|
||||
unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
|
||||
unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
|
||||
unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
|
||||
```
|
||||
|
||||
6) I want to know the expensive operations during the back propagation.
|
||||
Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
|
||||
command to explore based on name scope hierarchies.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --run_meta_path.
|
||||
tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
|
||||
_TFProfRoot (0us/2.29sec)
|
||||
gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
|
||||
gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
|
||||
gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
|
||||
gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
|
||||
gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
|
||||
gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
|
||||
...
|
||||
```
|
||||
|
||||
7) Show the number of float operations in the model.
|
||||
Note: float operations calculation depends on
|
||||
1) op.RegisterStatistics. If an op doesn’t
|
||||
have RegisterStatistics defined, its float operations cannot be counted.
|
||||
2) fully defined shape is also necessary in order to calculate flops.
|
||||
float operations number is provided by tensorflow::tfprof::OpLog logged from
|
||||
Python API.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --op_log_path.
|
||||
tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
|
||||
_TFProfRoot (0/17.63b flops)
|
||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
|
||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
|
||||
init/init_conv/Conv2D (113.25m/113.25m flops)
|
||||
pool_logit/xw_plus_b (1.28k/165.12k flops)
|
||||
pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
|
||||
unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||
...
|
||||
```
|
||||
|
||||
8) Show the number of parameters of all `tf.trainable_variables()` in the model.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path --op_log_path.
|
||||
# store option for future commands.
|
||||
tfprof> set -account_type_regexes _trainable_variables
|
||||
tfprof> scope -max_depth 4 -select params
|
||||
_TFProfRoot (--/464.15k params)
|
||||
init/init_conv/DW (3x3x3x16, 432/432 params)
|
||||
pool_logit/DW (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/64 params)
|
||||
unit_last/final_bn/gamma (64, 64/64 params)
|
||||
```
|
||||
|
||||
Where does “_trainable_variables” come from? It is from the OpLog file
|
||||
generated by write_op_log() Python API. write_op_log() help users create some
|
||||
common op types implicitly. Users can define their own op types and log it
|
||||
through the write_op_log() API.
|
||||
|
||||
9) What if I’m lazy and don’t want to define op type? I have given my ops
|
||||
well-defined names in my model’s code. And want to use names to select a group
|
||||
of ops. Let’s try it!
|
||||
|
||||
```shell
|
||||
tfprof> set -account_type_regexes .*
|
||||
tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
|
||||
_TFProfRoot (0/18.43k params)
|
||||
unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
|
||||
unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
|
||||
```
|
||||
|
||||
The above command allows you to filter ops that match specific names.
|
||||
`-account_displayed_op_only` asks tfprof to only account ops displayed
|
||||
in terminal. Otherwise, tfprof accounts all ops matched by
|
||||
`-account_type_regexes` recursively even if they are hidden due to some
|
||||
options such as -max_depth.
|
||||
|
||||
10) TensorFlow has built-in op types. For example, built-in op type `Variable`
|
||||
seems to include `Variable's` created by your model. However, be careful when
|
||||
depending on it because TensorFlow creates extra `Variable` ops implicitly and
|
||||
the implicitly created ops can have the same prefix as the `Variable's` you
|
||||
defined.
|
||||
|
||||
In the following example, extra `Variables` are created and “/Momentum” is
|
||||
appended to their names. This might cause you “model capacity” calculation
|
||||
to get wrong.
|
||||
|
||||
```shell
|
||||
tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (1/1 params)
|
||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
pool_logit/biases/Momentum (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/128 params)
|
||||
unit_last/final_bn/gamma (64, 64/128 params)
|
||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||
```
|
||||
|
||||
|
||||
11) A example of defining extra op type for ops using `OpLog`
|
||||
|
||||
First, in Python code, create an `OpLog` proto and add op type
|
||||
information to it:
|
||||
|
||||
```python
|
||||
|
||||
op_log = tfprof_log_pb2.OpLog()
|
||||
entry = op_log.log_entries.add()
|
||||
entry.name = 'pool_logit/DW'
|
||||
entry.types.append('pool_logit')
|
||||
entry = op_log.log_entries.add()
|
||||
entry.name = 'pool_logit/biases'
|
||||
# Alternatively:
|
||||
# var = tf.get_variable(xxx)
|
||||
# entry.name = var.op.name
|
||||
entry.types.append('pool_logit')
|
||||
```
|
||||
|
||||
Second, call write_op_log to write the OpLog proto.
|
||||
|
||||
```python
|
||||
tf.tfprof.tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
|
||||
```
|
||||
|
||||
Third, when starting the tfprof tool, specify
|
||||
"--op_log_path /tmp/my_op_log_dir/op_log"
|
||||
|
||||
```shell
|
||||
tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
|
||||
_TFProfRoot (--/650 params)
|
||||
pool_logit/DW (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/10 params)
|
||||
```
|
||||
|
||||
Note that when you call
|
||||
`tf.tfprof.tfprof_logger.write_op_log(...)`, the tool adds all `Variables`
|
||||
inside `tf.trainable_variables()` to `_trainable_variables`.
|
||||
|
||||
12) Run tfprof in one-shot mode and dump result to file.
|
||||
|
||||
```shell
|
||||
# Printed to stdout if --dump_to_file is not set.
|
||||
tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt \
|
||||
--max_depth 3 \
|
||||
--dump_to_file "/tmp/dump"
|
||||
Reading Files...
|
||||
Parsing GraphDef...
|
||||
Preparing Views...
|
||||
|
||||
cat /tmp/dump
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (0/0 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
```
|
||||
|
||||
13) Analyze how balanced Variable are on parameter servers.
|
||||
|
||||
In this tutorial, I'm going to use a seq2seq model, which are split
|
||||
on several gpus at workers and several parameter servers.
|
||||
|
||||
In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
|
||||
gpu0. They share an op_type called 'gpu0'.
|
||||
|
||||
```shell
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path ~/tfprof/textsum/graph.pbtxt \
|
||||
--run_meta_path ~/tfprof/textsum/run_meta
|
||||
|
||||
# Looks like ps task 1 is holding twice more parameters than task 0.
|
||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
|
||||
_TFProfRoot (--/25.81m params)
|
||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
|
||||
_TFProfRoot (--/58.84m params)
|
||||
```
|
||||
|
||||
### CLI Input Files
|
||||
|
||||
tfprof command line inference (CLI) loads dumped files from a tensorflow model.
|
||||
Convert them into in-memory data structures. To use it, users need to specify
|
||||
the locations of the dumped files. The following are the dumped files loaded
|
||||
by tfprof:
|
||||
|
||||
<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
|
||||
representation of the model. For example, graph.pbtxt written by tf.Supervisor
|
||||
is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
|
||||
using tf.Graph.as_graph_def() or other API.
|
||||
|
||||
<b>--run_meta_path:</b> tensorflow::RunMetadata.
|
||||
Used to get the memory and time consumption of
|
||||
each op of the model. Users need to enable it. For example, the following code
|
||||
snippet writes a RunMetadata file:
|
||||
|
||||
```python
|
||||
run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
|
||||
run_metadata = config_pb2.RunMetadata()
|
||||
# Once a while, call it the get the RunMeta.
|
||||
_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
|
||||
with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
|
||||
f.write(run_metadata.SerializeToString())
|
||||
```
|
||||
|
||||
<b>--op_log_path:</b>
|
||||
tensorflow::tfprof::OpLog. A proto used to provide extra op information
|
||||
for ops. By giving a group of ops a type name, users can easily aggregate the
|
||||
statistics for those ops without accidently missing or including extra ops.
|
||||
tfprof exposes the following Python API to add op information and logging.
|
||||
|
||||
```python
|
||||
tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
|
||||
```
|
||||
|
||||
<b>--checkpoint_path:</b>
|
||||
TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
|
||||
provides checkpointed tensors' values.
|
||||
|
||||
|
||||
## Design
|
||||
|
||||
|
||||
### In-memory representation
|
||||
|
||||
<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
|
||||
similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
|
||||
For example op1 with name “name1/name2” is a child of op2 with name “name1”.
|
||||
|
||||
<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
|
||||
a graph structure. The graph is a “directed acyclic graph” (hopefully), with
|
||||
direction from “output to input”. The direction is design this way so that users
|
||||
can trace from “result” to its “sources”.
|
||||
|
||||
### Command line options
|
||||
|
||||
tfprof’s major goals are to measure system performance and quicly analyze
|
||||
model architectures. Hence, its commands and options should allow users to achieve
|
||||
these 2 goals easily.
|
||||
|
||||
<b>graph:</b> It is expected that users will mostly use graph representation to
|
||||
debug system performance. Hence, tfprof supports graph command, which pulls the
|
||||
graph in-memory representation described above.
|
||||
|
||||
<b>scope:</b> It is expected that some users might want to explore their model
|
||||
statistics using the name scope information they defined in the Python codes.
|
||||
Hence, tfprof supports “scope” command, which pulls the tree in-memory
|
||||
representation.
|
||||
|
||||
<b>set:</b> It is used to store the options so that user doesn’t need to
|
||||
re-type the same option again and again in the follow up command line. Note that
|
||||
tfprof has traditional terminal’s history and auto-complete support.
|
||||
|
||||
<b>help:</b> print help information.
|
||||
|
||||
<b>Options:</b> Run “tfprof help” to get detailed explanations.
|
||||
|
||||
```python
|
||||
"-max_depth",
|
||||
"-min_bytes",
|
||||
"-min_micros",
|
||||
"-min_params",
|
||||
"-min_float_ops",
|
||||
"-order_by",
|
||||
"-account_type_regexes",
|
||||
"-start_name_regexes",
|
||||
"-trim_name_regexes",
|
||||
"-show_name_regexes",
|
||||
"-hide_name_regexes",
|
||||
"-account_displayed_op_only",
|
||||
"-select",
|
||||
"-viz", # Only supported for graph command.
|
||||
"-dump_to_file",
|
||||
```
|
||||
|
||||
A key design is that stats are aggregated from descendants up to ancestors.
|
||||
`-account_type_regexes` is used to decide which ops stat is accounted. It makes
|
||||
decision based on op type. Usually set it to `.*` if no extra type information
|
||||
is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
|
||||
`-min/max` and `-show/hide/trim/start` options are only used the optionally
|
||||
displayed or hide ops based on ops’ name and stats. However, they don’t prevent
|
||||
tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
|
||||
aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
|
||||
an option to break this rule. When it is set, only displayed ops are accounted.
|
||||
|
||||
Regexes are all comma-separated, for example `-show_name_regexes`
|
||||
`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
|
||||
is not expected to show up in op names.
|
||||
|
||||
`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
|
||||
(notice the indent printed) are sorted according to order_by.
|
||||
|
||||
## Future Work
|
||||
|
||||
* Load SummaryWriter event logs so that it can show the latest summary value.
|
||||
|
||||
* Better sorting and aggregation of outputs. Easier comprehension.
|
||||
|
||||
* Currently, shape information is based on `graph.pbtxt`. When the shape
|
||||
information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
|
||||
and `Checkpoint` to complete shape information.
|
||||
Enjoy!
|
@ -17,5 +17,6 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.contrib.tfprof.python.tools.tfprof import model_analyzer
|
||||
from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
|
||||
from tensorflow.python.util.all_util import make_all
|
||||
|
@ -3,14 +3,36 @@ licenses(["notice"]) # Apache 2.0
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load("//tensorflow:tensorflow.bzl", "tf_py_test")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
|
||||
|
||||
py_library(
|
||||
name = "model_analyzer",
|
||||
srcs = ["model_analyzer.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:pywrap_tensorflow_print_model_analysis_lib",
|
||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
|
||||
"//tensorflow/tools/tfprof:protos_all_py",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "model_analyzer_test",
|
||||
srcs = ["model_analyzer_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":model_analyzer",
|
||||
"//tensorflow:tensorflow_py",
|
||||
],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "tfprof_logger",
|
||||
srcs = ["tfprof_logger.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
|
||||
"//tensorflow/python:framework_for_generated_wrappers",
|
||||
"//tensorflow/tools/tfprof:protos_all_py",
|
||||
],
|
||||
)
|
||||
|
||||
@ -20,7 +42,34 @@ tf_py_test(
|
||||
additional_deps = [
|
||||
":tfprof_logger",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
|
||||
"//tensorflow/tools/tfprof:protos_all_py",
|
||||
],
|
||||
)
|
||||
|
||||
tf_py_wrap_cc(
|
||||
name = "pywrap_tensorflow_print_model_analysis_lib",
|
||||
srcs = ["pywrap_tensorflow_print_model_analysis.i"],
|
||||
swig_includes = [
|
||||
"//tensorflow/python:lib/core/strings.i",
|
||||
"//tensorflow/python:platform/base.i",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core:framework_headers_lib",
|
||||
"//tensorflow/tools/tfprof/internal:print_model_analysis_hdr",
|
||||
"//util/python:python_headers",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "print_model_analysis_test",
|
||||
srcs = ["print_model_analysis_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":pywrap_tensorflow_print_model_analysis_lib",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
"//tensorflow/python:platform_test",
|
||||
"//tensorflow/tools/tfprof:protos_all_py",
|
||||
],
|
||||
)
|
||||
|
||||
|
187
tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
Normal file
187
tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
Normal file
@ -0,0 +1,187 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Model Analyzer.
|
||||
|
||||
Analyze model, including shape, params, time, memory, structure, etc.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
|
||||
from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
|
||||
from tensorflow.tools.tfprof import tfprof_options_pb2
|
||||
from tensorflow.tools.tfprof import tfprof_output_pb2
|
||||
|
||||
# pylint: disable=bad-whitespace
|
||||
# pylint: disable=bad-continuation
|
||||
# 2 example tfprof_options for print_model_analysis API.
|
||||
#
|
||||
# Show the parameter statistics of trainable variables.
|
||||
TRAINABLE_VARS_PARAMS_STAT_OPTIONS = {
|
||||
'max_depth': 10000,
|
||||
'min_bytes': 0,
|
||||
'min_micros': 0,
|
||||
'min_params': 0,
|
||||
'min_float_ops': 0,
|
||||
'device_regexes': ['.*'],
|
||||
'order_by': 'name',
|
||||
'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
|
||||
'start_name_regexes': ['.*'],
|
||||
'trim_name_regexes': [],
|
||||
'show_name_regexes': ['.*'],
|
||||
'hide_name_regexes': [],
|
||||
'account_displayed_op_only': True,
|
||||
'select': ['params'],
|
||||
'viz': False,
|
||||
'dump_to_file': ''
|
||||
}
|
||||
|
||||
# Show the number float operations.
|
||||
FLOAT_OPS_OPTIONS = {
|
||||
'max_depth': 10000,
|
||||
'min_bytes': 0,
|
||||
'min_micros': 0,
|
||||
'min_params': 0,
|
||||
'min_float_ops': 1,
|
||||
'device_regexes': ['.*'],
|
||||
'order_by': 'float_ops',
|
||||
'account_type_regexes': ['.*'],
|
||||
'start_name_regexes': ['.*'],
|
||||
'trim_name_regexes': [],
|
||||
'show_name_regexes': ['.*'],
|
||||
'hide_name_regexes': [],
|
||||
'account_displayed_op_only': True,
|
||||
'select': ['float_ops'],
|
||||
'viz': False,
|
||||
'dump_to_file': ''
|
||||
}
|
||||
|
||||
# Show number of parameters on parameter server 0.
|
||||
# It is recommended to provide`run_meta` argument
|
||||
# to have complete device placement info.
|
||||
PRINT_PARAMS_ON_DEVICE = {
|
||||
'max_depth': 1,
|
||||
'min_bytes': 0,
|
||||
'min_micros': 0,
|
||||
'min_params': 0,
|
||||
'min_float_ops': 0,
|
||||
'device_regexes': ['.*'],
|
||||
'order_by': 'name',
|
||||
'account_type_regexes': ['.*ps.*task:0.*'],
|
||||
'start_name_regexes': ['.*'],
|
||||
'trim_name_regexes': [],
|
||||
'show_name_regexes': ['.*'],
|
||||
'hide_name_regexes': [],
|
||||
'account_displayed_op_only': False,
|
||||
'select': ['device', 'params'],
|
||||
'viz': False,
|
||||
'dump_to_file': ''
|
||||
}
|
||||
|
||||
# Show the timing stats and memory demands.
|
||||
PRINT_ALL_TIMING_MEMORY = {
|
||||
'max_depth': 10000,
|
||||
'min_bytes': 1, # Only >=1
|
||||
'min_micros': 1, # Only >=1
|
||||
'min_params': 0,
|
||||
'min_float_ops': 0,
|
||||
'device_regexes': ['.*'],
|
||||
'order_by': 'name',
|
||||
'account_type_regexes': ['.*'],
|
||||
'start_name_regexes': ['.*'],
|
||||
'trim_name_regexes': [],
|
||||
'show_name_regexes': ['.*'],
|
||||
'hide_name_regexes': [],
|
||||
'account_displayed_op_only': True,
|
||||
'select': ['micros', 'bytes'],
|
||||
'viz': False,
|
||||
'dump_to_file': ''
|
||||
}
|
||||
|
||||
# pylint: enable=bad-whitespace
|
||||
# pylint: enable=bad-continuation
|
||||
|
||||
|
||||
def print_model_analysis(graph,
|
||||
run_meta=None,
|
||||
op_log=None,
|
||||
tfprof_cmd='scope',
|
||||
tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS):
|
||||
"""Print model statistics.
|
||||
|
||||
Prints the model statistics to stdout. Also returns the results
|
||||
in a TFProfNode proto. See go/tfprof or run tfprof tool:
|
||||
'bazel run third_party/tensorflow/tools/tfprof help'
|
||||
|
||||
Examples:
|
||||
Show the parameter/shape statistics of tf.trainable_variables().
|
||||
print_model_analysis(sess.graph).
|
||||
|
||||
Show number of float ops. Only ops with RegisterStatistics defined
|
||||
are counted.
|
||||
show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS
|
||||
print_model_analysis(sess.graph, tfprof_options=show_float_op_opts)
|
||||
|
||||
Args:
|
||||
graph: tf.Graph.
|
||||
run_meta: tensorflow::RunMetadata proto. When provided, also shows valid
|
||||
timing and memory information when 'select' option contains
|
||||
'micros' and 'bytes'.
|
||||
op_log: tensorflow::tfprof::OpLog proto. users can use this proto to
|
||||
group together ops and use a op_type to select the group.
|
||||
tfprof_cmd: string. Either 'scope' or 'graph'. 'scope' view organize
|
||||
ops using their name scopes. 'graph' view organize ops using
|
||||
their graph inputs.
|
||||
tfprof_options: See 'tfprof help' for details.
|
||||
Returns:
|
||||
TFProfNode proto. Side effect: a formatted output to stdout.
|
||||
"""
|
||||
# pylint: disable=protected-access
|
||||
op_log = tfprof_logger._merge_default_with_oplog(graph, op_log, run_meta)
|
||||
# pylint: enable=protected-access
|
||||
opts = tfprof_options_pb2.OptionsProto()
|
||||
opts.max_depth = tfprof_options['max_depth']
|
||||
opts.min_bytes = tfprof_options['min_bytes']
|
||||
opts.min_micros = tfprof_options['min_micros']
|
||||
opts.min_params = tfprof_options['min_params']
|
||||
opts.min_float_ops = tfprof_options['min_float_ops']
|
||||
for p in tfprof_options['device_regexes']:
|
||||
opts.device_regexes.append(p)
|
||||
opts.order_by = tfprof_options['order_by']
|
||||
for p in tfprof_options['account_type_regexes']:
|
||||
opts.account_type_regexes.append(p)
|
||||
for p in tfprof_options['start_name_regexes']:
|
||||
opts.start_name_regexes.append(p)
|
||||
for p in tfprof_options['trim_name_regexes']:
|
||||
opts.trim_name_regexes.append(p)
|
||||
for p in tfprof_options['show_name_regexes']:
|
||||
opts.show_name_regexes.append(p)
|
||||
for p in tfprof_options['hide_name_regexes']:
|
||||
opts.hide_name_regexes.append(p)
|
||||
opts.account_displayed_op_only = tfprof_options['account_displayed_op_only']
|
||||
for p in tfprof_options['select']:
|
||||
opts.select.append(p)
|
||||
opts.viz = tfprof_options['viz']
|
||||
opts.dump_to_file = tfprof_options['dump_to_file']
|
||||
|
||||
run_meta_str = run_meta.SerializeToString() if run_meta else b''
|
||||
op_log_str = op_log.SerializeToString() if op_log else b''
|
||||
|
||||
tfprof_node = tfprof_output_pb2.TFProfNode()
|
||||
tfprof_node.ParseFromString(
|
||||
print_mdl.PrintModelAnalysis(
|
||||
graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str,
|
||||
tfprof_cmd.encode('utf-8'), opts.SerializeToString()))
|
@ -0,0 +1,84 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class PrintModelAnalysisTest(tf.test.TestCase):
|
||||
|
||||
def _BuildSmallModel(self):
|
||||
image = tf.zeros([2, 6, 6, 3])
|
||||
kernel = tf.get_variable(
|
||||
'DW', [3, 3, 3, 6],
|
||||
tf.float32,
|
||||
initializer=tf.random_normal_initializer(stddev=0.001))
|
||||
x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
|
||||
kernel = tf.get_variable(
|
||||
'DW2', [2, 2, 6, 12],
|
||||
tf.float32,
|
||||
initializer=tf.random_normal_initializer(stddev=0.001))
|
||||
x = tf.nn.conv2d(x, kernel, [1, 2, 2, 1], padding='SAME')
|
||||
return x
|
||||
|
||||
def testDumpToFile(self):
|
||||
opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
|
||||
opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
|
||||
|
||||
with tf.Session() as sess:
|
||||
_ = self._BuildSmallModel()
|
||||
tf.contrib.tfprof.model_analyzer.print_model_analysis(
|
||||
sess.graph, tfprof_options=opts)
|
||||
|
||||
with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
|
||||
self.assertEqual('_TFProfRoot (--/450 params)\n'
|
||||
' DW (3x3x3x6, 162/162 params)\n'
|
||||
' DW2 (2x2x6x12, 288/288 params)\n',
|
||||
f.read().decode('utf-8'))
|
||||
|
||||
def testSelectEverything(self):
|
||||
opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
|
||||
opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
|
||||
opts['account_type_regexes'] = ['.*']
|
||||
opts['select'] = [
|
||||
'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types'
|
||||
]
|
||||
|
||||
with tf.Session() as sess:
|
||||
x = self._BuildSmallModel()
|
||||
|
||||
sess.run(tf.initialize_all_variables())
|
||||
run_meta = tf.RunMetadata()
|
||||
_ = sess.run(x,
|
||||
options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
|
||||
run_metadata=run_meta)
|
||||
|
||||
tf.contrib.tfprof.model_analyzer.print_model_analysis(
|
||||
sess.graph, run_meta, tfprof_options=opts)
|
||||
|
||||
with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
|
||||
# pylint: disable=line-too-long
|
||||
self.assertEqual(
|
||||
'_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
|
||||
f.read().decode('utf-8'))
|
||||
# pylint: enable=line-too-long
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
@ -0,0 +1,227 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""print_model_analysis test."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from google.protobuf import text_format
|
||||
from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
|
||||
from tensorflow.tools.tfprof import tfprof_options_pb2
|
||||
from tensorflow.tools.tfprof import tfprof_output_pb2
|
||||
|
||||
# pylint: disable=bad-whitespace
|
||||
# pylint: disable=bad-continuation
|
||||
TEST_OPTIONS = {
|
||||
'max_depth': 10000,
|
||||
'min_bytes': 0,
|
||||
'min_micros': 0,
|
||||
'min_params': 0,
|
||||
'min_float_ops': 0,
|
||||
'device_regexes': ['.*'],
|
||||
'order_by': 'name',
|
||||
'account_type_regexes': ['.*'],
|
||||
'start_name_regexes': ['.*'],
|
||||
'trim_name_regexes': [],
|
||||
'show_name_regexes': ['.*'],
|
||||
'hide_name_regexes': [],
|
||||
'account_displayed_op_only': True,
|
||||
'select': ['params'],
|
||||
'viz': False
|
||||
}
|
||||
|
||||
# pylint: enable=bad-whitespace
|
||||
# pylint: enable=bad-continuation
|
||||
|
||||
|
||||
class PrintModelAnalysisTest(tf.test.TestCase):
|
||||
|
||||
def _BuildSmallModel(self):
|
||||
image = tf.zeros([2, 6, 6, 3])
|
||||
kernel = tf.get_variable(
|
||||
'DW', [6, 6, 3, 6],
|
||||
tf.float32,
|
||||
initializer=tf.random_normal_initializer(stddev=0.001))
|
||||
x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
|
||||
return x
|
||||
|
||||
def testPrintModelAnalysis(self):
|
||||
opts = tfprof_options_pb2.OptionsProto()
|
||||
opts.max_depth = TEST_OPTIONS['max_depth']
|
||||
opts.min_bytes = TEST_OPTIONS['min_bytes']
|
||||
opts.min_micros = TEST_OPTIONS['min_micros']
|
||||
opts.min_params = TEST_OPTIONS['min_params']
|
||||
opts.min_float_ops = TEST_OPTIONS['min_float_ops']
|
||||
for p in TEST_OPTIONS['device_regexes']:
|
||||
opts.device_regexes.append(p)
|
||||
opts.order_by = TEST_OPTIONS['order_by']
|
||||
for p in TEST_OPTIONS['account_type_regexes']:
|
||||
opts.account_type_regexes.append(p)
|
||||
for p in TEST_OPTIONS['start_name_regexes']:
|
||||
opts.start_name_regexes.append(p)
|
||||
for p in TEST_OPTIONS['trim_name_regexes']:
|
||||
opts.trim_name_regexes.append(p)
|
||||
for p in TEST_OPTIONS['show_name_regexes']:
|
||||
opts.show_name_regexes.append(p)
|
||||
for p in TEST_OPTIONS['hide_name_regexes']:
|
||||
opts.hide_name_regexes.append(p)
|
||||
opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only']
|
||||
for p in TEST_OPTIONS['select']:
|
||||
opts.select.append(p)
|
||||
opts.viz = TEST_OPTIONS['viz']
|
||||
|
||||
with tf.Session() as sess:
|
||||
_ = self._BuildSmallModel()
|
||||
tfprof_pb = tfprof_output_pb2.TFProfNode()
|
||||
tfprof_pb.ParseFromString(
|
||||
print_mdl.PrintModelAnalysis(sess.graph.as_graph_def(
|
||||
).SerializeToString(), b'', b'', b'scope', opts.SerializeToString()))
|
||||
|
||||
expected_pb = tfprof_output_pb2.TFProfNode()
|
||||
text_format.Merge(r"""name: "_TFProfRoot"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 648
|
||||
children {
|
||||
name: "Conv2D"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "DW"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
parameters: 648
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 648
|
||||
children {
|
||||
name: "DW/Assign"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "DW/Initializer"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
children {
|
||||
name: "DW/Initializer/random_normal"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
children {
|
||||
name: "DW/Initializer/random_normal/RandomStandardNormal"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "DW/Initializer/random_normal/mean"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "DW/Initializer/random_normal/mul"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "DW/Initializer/random_normal/shape"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "DW/Initializer/random_normal/stddev"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "DW/read"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
children {
|
||||
name: "zeros"
|
||||
exec_micros: 0
|
||||
requested_bytes: 0
|
||||
total_exec_micros: 0
|
||||
total_requested_bytes: 0
|
||||
total_parameters: 0
|
||||
float_ops: 0
|
||||
total_float_ops: 0
|
||||
}
|
||||
float_ops: 0
|
||||
total_float_ops: 0""", expected_pb)
|
||||
self.assertEqual(expected_pb, tfprof_pb)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
@ -0,0 +1,43 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
%include "tensorflow/python/lib/core/strings.i"
|
||||
%include "tensorflow/python/platform/base.i"
|
||||
|
||||
%{
|
||||
#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
%}
|
||||
|
||||
%typemap(typecheck) const string & = char *;
|
||||
%typemap(in) const string& (string temp) {
|
||||
if (!_PyObjAs<string>($input, &temp)) return NULL;
|
||||
$1 = &temp;
|
||||
}
|
||||
%typemap(out) const string& {
|
||||
$result = PyString_FromStringAndSize($1->data(), $1->size());
|
||||
}
|
||||
%apply const string & {string &};
|
||||
%apply const string & {string *};
|
||||
|
||||
%ignoreall
|
||||
|
||||
%unignore tensorflow;
|
||||
%unignore tensorflow::tfprof;
|
||||
%unignore tensorflow::tfprof::PrintModelAnalysis;
|
||||
|
||||
%include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
|
||||
|
||||
%unignoreall
|
@ -24,8 +24,8 @@ import os
|
||||
import sys
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.tfprof.tools.tfprof import tfprof_log_pb2
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.tools.tfprof import tfprof_log_pb2
|
||||
|
||||
TRAINABLE_VARIABLES = '_trainable_variables'
|
||||
REGISTERED_FLOP_STATS = 'flops'
|
||||
@ -85,7 +85,7 @@ def _get_logged_ops(graph, run_meta=None):
|
||||
if node.name not in logged_ops:
|
||||
entry = tfprof_log_pb2.OpLogEntry()
|
||||
entry.name = node.name
|
||||
entry.float_ops = stats.value
|
||||
entry.float_ops = int(stats.value)
|
||||
logged_ops[entry.name] = entry
|
||||
|
||||
for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
|
||||
|
@ -1317,7 +1317,7 @@ cc_library(
|
||||
"platform/regexp.h",
|
||||
],
|
||||
visibility = [
|
||||
"//tensorflow/contrib/tfprof:__subpackages__",
|
||||
"//tensorflow/tools/tfprof:__subpackages__",
|
||||
],
|
||||
deps = [":lib_internal"],
|
||||
)
|
||||
|
@ -1861,6 +1861,7 @@ tf_py_wrap_cc(
|
||||
"//tensorflow/c:tf_status_helper",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core/distributed_runtime:server_lib",
|
||||
"//tensorflow/tools/tfprof/internal:print_model_analysis",
|
||||
"//util/python:python_headers",
|
||||
] + tf_additional_lib_deps(),
|
||||
)
|
||||
|
@ -26,13 +26,13 @@ cc_binary(
|
||||
":protos_all_cc",
|
||||
"//tensorflow/c:c_api",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
|
||||
"//tensorflow/core:framework_headers_lib",
|
||||
"//tensorflow/core:framework_internal",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/tools/tfprof/internal:tfprof_options",
|
||||
"//tensorflow/tools/tfprof/internal:tfprof_stats",
|
||||
"//tensorflow/tools/tfprof/internal:tfprof_utils",
|
||||
"@linenoise//:linenoise",
|
||||
],
|
||||
)
|
455
tensorflow/tools/tfprof/README.md
Normal file
455
tensorflow/tools/tfprof/README.md
Normal file
@ -0,0 +1,455 @@
|
||||
# tfprof: A Profiling Tool for TensorFlow Models
|
||||
|
||||
Internal User Please Use: go/tfprof
|
||||
|
||||
Author: Xin Pan (xpan@google.com, github: panyx0718)
|
||||
|
||||
Consultants: Jon Shlens, Pete Warden
|
||||
|
||||
|
||||
## Introduction
|
||||
|
||||
tfprof is a profiling tool for TensorFlow that analyzes model architectures
|
||||
and measures system performance.
|
||||
|
||||
###Major Features
|
||||
|
||||
1. Measure model parameters, float operations, tensor shapes.
|
||||
2. Measure op execution times, requested memory size and device placement.
|
||||
3. Inspect checkpoint tensors' shapes and their values.
|
||||
4. Explore model based on name scope or graph structure.
|
||||
5. Selectively grouping/filtering/accounting/ordering ops.
|
||||
|
||||
### Interfaces
|
||||
|
||||
[CLI Tutorials](#cli-tutorials):
|
||||
It supports interactive mode for exploration and single-shot mode for
|
||||
scripts. Outputs can be dumped to files or printed in terminal.
|
||||
|
||||
Python API Tutorials: Python API is not released yet.
|
||||
|
||||
## CLI Tutorials
|
||||
|
||||
Tutorials are based on a 32 layers ResNet.
|
||||
TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
|
||||
|
||||
### Examples
|
||||
|
||||
1) Start `tfprof` command line tool
|
||||
|
||||
```shell
|
||||
# Build the tool.
|
||||
bazel build -c opt tensorflow/tools/tfprof/...
|
||||
|
||||
# Help information, including detail 'option' instructions.
|
||||
bazel-bin/tensorflow/tools/tfprof/tfprof help
|
||||
#
|
||||
# The following commands will start tfprof interactive mode.
|
||||
#
|
||||
# Profile model shapes and parameters only.
|
||||
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt
|
||||
#
|
||||
# Additionally profile checkpoint statistics and values.
|
||||
# Use '-account_type_regexes _checkpoint_variables' to select
|
||||
# checkpoint tensors.
|
||||
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--checkpoint_path=model.ckpt
|
||||
#
|
||||
# Additionally profile ops requested memory and timing.
|
||||
# See CLI Input Files section on generating run_meta file.
|
||||
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--run_meta_path=run_meta \
|
||||
--checkpoint_path=model.ckpt
|
||||
#
|
||||
# tfprof_log is used to define customized op types and float ops.
|
||||
# Use tfprof_logger.write_op_log() to create tfprof_log.
|
||||
# See 11) in Examples section on generating tfprof_log file.
|
||||
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--run_meta_path=run_meta \
|
||||
--op_log_path=tfprof_log \
|
||||
--checkpoint_path=model.ckpt
|
||||
```
|
||||
Note that `graph.pbtxt` is an ASCII text format.
|
||||
|
||||
2) Press enter to show the default options
|
||||
|
||||
```shell
|
||||
tfprof>
|
||||
tfprof>
|
||||
-max_depth 4
|
||||
-min_bytes 0
|
||||
-min_micros 0
|
||||
-min_params 0
|
||||
-min_float_ops 0
|
||||
-device_regexes .*
|
||||
-order_by name
|
||||
-account_type_regexes Variable
|
||||
-start_name_regexes .*
|
||||
-trim_name_regexes
|
||||
-show_name_regexes .*
|
||||
-hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
|
||||
-account_displayed_op_only false
|
||||
# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
|
||||
# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
|
||||
-select params
|
||||
-viz false
|
||||
-dump_to_file
|
||||
```
|
||||
|
||||
3) I want to see the `BatchNorm`'s gamma value in checkpoint.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --checkpoint_path.
|
||||
tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
|
||||
_TFProfRoot ()
|
||||
unit_1_0/shared_activation/init_bn/gamma ()
|
||||
[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
|
||||
unit_1_0/sub2/bn2/gamma ()
|
||||
[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
|
||||
```
|
||||
|
||||
4) I want to see my checkpoint tensors shape and number of parameters.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --checkpoint_path.
|
||||
# Increase -max_depth to see all tensors.
|
||||
tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (0/0 params)
|
||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
pool_logit/biases/Momentum (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/128 params)
|
||||
unit_last/final_bn/gamma (64, 64/128 params)
|
||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||
```
|
||||
|
||||
5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
|
||||
it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
|
||||
graph dependencies.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --run_meta_path.
|
||||
tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
|
||||
_TFProfRoot (0us/3.61sec)
|
||||
init/init_conv/Conv2D (11.75ms/3.10sec)
|
||||
random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
|
||||
unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
|
||||
unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
|
||||
unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
|
||||
unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
|
||||
```
|
||||
|
||||
6) I want to know the expensive operations during the back propagation.
|
||||
Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
|
||||
command to explore based on name scope hierarchies.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --run_meta_path.
|
||||
tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
|
||||
_TFProfRoot (0us/2.29sec)
|
||||
gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
|
||||
gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
|
||||
gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
|
||||
gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
|
||||
gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
|
||||
gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
|
||||
...
|
||||
```
|
||||
|
||||
7) Show the number of float operations in the model.
|
||||
Note: float operations calculation depends on
|
||||
1) op.RegisterStatistics. If an op doesn’t
|
||||
have RegisterStatistics defined, its float operations cannot be counted.
|
||||
2) fully defined shape is also necessary in order to calculate flops.
|
||||
float operations number is provided by tensorflow::tfprof::OpLog logged from
|
||||
Python API.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --op_log_path.
|
||||
tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
|
||||
_TFProfRoot (0/17.63b flops)
|
||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
|
||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
|
||||
init/init_conv/Conv2D (113.25m/113.25m flops)
|
||||
pool_logit/xw_plus_b (1.28k/165.12k flops)
|
||||
pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
|
||||
unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||
...
|
||||
```
|
||||
|
||||
8) Show the number of parameters of all `tf.trainable_variables()` in the model.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path --op_log_path.
|
||||
# store option for future commands.
|
||||
tfprof> set -account_type_regexes _trainable_variables
|
||||
tfprof> scope -max_depth 4 -select params
|
||||
_TFProfRoot (--/464.15k params)
|
||||
init/init_conv/DW (3x3x3x16, 432/432 params)
|
||||
pool_logit/DW (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/64 params)
|
||||
unit_last/final_bn/gamma (64, 64/64 params)
|
||||
```
|
||||
|
||||
Where does “_trainable_variables” come from? It is from the OpLog file
|
||||
generated by write_op_log() Python API. write_op_log() help users create some
|
||||
common op types implicitly. Users can define their own op types and log it
|
||||
through the write_op_log() API.
|
||||
|
||||
9) What if I’m lazy and don’t want to define op type? I have given my ops
|
||||
well-defined names in my model’s code. And want to use names to select a group
|
||||
of ops. Let’s try it!
|
||||
|
||||
```shell
|
||||
tfprof> set -account_type_regexes .*
|
||||
tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
|
||||
_TFProfRoot (0/18.43k params)
|
||||
unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
|
||||
unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
|
||||
```
|
||||
|
||||
The above command allows you to filter ops that match specific names.
|
||||
`-account_displayed_op_only` asks tfprof to only account ops displayed
|
||||
in terminal. Otherwise, tfprof accounts all ops matched by
|
||||
`-account_type_regexes` recursively even if they are hidden due to some
|
||||
options such as -max_depth.
|
||||
|
||||
10) TensorFlow has built-in op types. For example, built-in op type `Variable`
|
||||
seems to include `Variable's` created by your model. However, be careful when
|
||||
depending on it because TensorFlow creates extra `Variable` ops implicitly and
|
||||
the implicitly created ops can have the same prefix as the `Variable's` you
|
||||
defined.
|
||||
|
||||
In the following example, extra `Variables` are created and “/Momentum” is
|
||||
appended to their names. This might cause you “model capacity” calculation
|
||||
to get wrong.
|
||||
|
||||
```shell
|
||||
tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (1/1 params)
|
||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
pool_logit/biases/Momentum (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/128 params)
|
||||
unit_last/final_bn/gamma (64, 64/128 params)
|
||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||
```
|
||||
|
||||
|
||||
11) A example of defining extra op type for ops using `OpLog`
|
||||
|
||||
First, in Python code, create an `OpLog` proto and add op type
|
||||
information to it:
|
||||
|
||||
```python
|
||||
|
||||
op_log = tfprof_log_pb2.OpLog()
|
||||
entry = op_log.log_entries.add()
|
||||
entry.name = 'pool_logit/DW'
|
||||
entry.types.append('pool_logit')
|
||||
entry = op_log.log_entries.add()
|
||||
entry.name = 'pool_logit/biases'
|
||||
# Alternatively:
|
||||
# var = tf.get_variable(xxx)
|
||||
# entry.name = var.op.name
|
||||
entry.types.append('pool_logit')
|
||||
```
|
||||
|
||||
Second, call write_op_log to write the OpLog proto.
|
||||
|
||||
```python
|
||||
tf.contrib.tfprof.tfprof_logger.write_op_log(
|
||||
sess.graph, /tmp/my_op_log_dir, op_log)
|
||||
```
|
||||
|
||||
Third, when starting the tfprof tool, specify
|
||||
"--op_log_path /tmp/my_op_log_dir/op_log"
|
||||
|
||||
```shell
|
||||
tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
|
||||
_TFProfRoot (--/650 params)
|
||||
pool_logit/DW (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/10 params)
|
||||
```
|
||||
|
||||
Note that when you call
|
||||
`tf.contrib.tfprof.tfprof_logger.write_op_log(...)`,
|
||||
the tool adds all `Variables` inside `tf.trainable_variables()` to
|
||||
`_trainable_variables`.
|
||||
|
||||
12) Run tfprof in one-shot mode and dump result to file.
|
||||
|
||||
```shell
|
||||
# Printed to stdout if --dump_to_file is not set.
|
||||
tfprof scope --graph_path=graph.pbtxt \
|
||||
--max_depth=3 \
|
||||
--dump_to_file="/tmp/dump"
|
||||
Reading Files...
|
||||
Parsing GraphDef...
|
||||
Preparing Views...
|
||||
|
||||
cat /tmp/dump
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (0/0 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
```
|
||||
|
||||
13) Analyze how balanced Variable are on parameter servers.
|
||||
|
||||
In this tutorial, I'm going to use a seq2seq model, which are split
|
||||
on several gpus at workers and several parameter servers.
|
||||
|
||||
In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
|
||||
gpu0. They share an op_type called 'gpu0'.
|
||||
|
||||
```shell
|
||||
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||
--graph_path ~/tfprof/textsum/graph.pbtxt \
|
||||
--run_meta_path ~/tfprof/textsum/run_meta
|
||||
|
||||
# Looks like ps task 1 is holding twice more parameters than task 0.
|
||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
|
||||
_TFProfRoot (--/25.81m params)
|
||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
|
||||
_TFProfRoot (--/58.84m params)
|
||||
```
|
||||
|
||||
### CLI Input Files
|
||||
|
||||
tfprof command line inference (CLI) loads dumped files from a tensorflow model.
|
||||
Convert them into in-memory data structures. To use it, users need to specify
|
||||
the locations of the dumped files. The following are the dumped files loaded
|
||||
by tfprof:
|
||||
|
||||
<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
|
||||
representation of the model. For example, graph.pbtxt written by tf.Supervisor
|
||||
is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
|
||||
using tf.Graph.as_graph_def() or other API.
|
||||
|
||||
<b>--run_meta_path:</b> tensorflow::RunMetadata.
|
||||
Used to get the memory and time consumption of
|
||||
each op of the model. Users need to enable it. For example, the following code
|
||||
snippet writes a RunMetadata file:
|
||||
|
||||
```python
|
||||
run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
|
||||
run_metadata = config_pb2.RunMetadata()
|
||||
# Once a while, call it the get the RunMeta.
|
||||
_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
|
||||
with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
|
||||
f.write(run_metadata.SerializeToString())
|
||||
```
|
||||
|
||||
<b>--op_log_path:</b>
|
||||
tensorflow::tfprof::OpLog. A proto used to provide extra op information
|
||||
for ops. By giving a group of ops a type name, users can easily aggregate the
|
||||
statistics for those ops without accidently missing or including extra ops.
|
||||
tfprof exposes the following Python API to add op information and logging.
|
||||
|
||||
```python
|
||||
tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
|
||||
```
|
||||
|
||||
<b>--checkpoint_path:</b>
|
||||
TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
|
||||
provides checkpointed tensors' values.
|
||||
|
||||
|
||||
## Design
|
||||
|
||||
|
||||
### In-memory representation
|
||||
|
||||
<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
|
||||
similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
|
||||
For example op1 with name “name1/name2” is a child of op2 with name “name1”.
|
||||
|
||||
<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
|
||||
a graph structure. The graph is a “directed acyclic graph” (hopefully), with
|
||||
direction from “output to input”. The direction is design this way so that users
|
||||
can trace from “result” to its “sources”.
|
||||
|
||||
### Command line options
|
||||
|
||||
tfprof’s major goals are to measure system performance and quicly analyze
|
||||
model architectures. Hence, its commands and options should allow users to achieve
|
||||
these 2 goals easily.
|
||||
|
||||
<b>graph:</b> It is expected that users will mostly use graph representation to
|
||||
debug system performance. Hence, tfprof supports graph command, which pulls the
|
||||
graph in-memory representation described above.
|
||||
|
||||
<b>scope:</b> It is expected that some users might want to explore their model
|
||||
statistics using the name scope information they defined in the Python codes.
|
||||
Hence, tfprof supports “scope” command, which pulls the tree in-memory
|
||||
representation.
|
||||
|
||||
<b>set:</b> It is used to store the options so that user doesn’t need to
|
||||
re-type the same option again and again in the follow up command line. Note that
|
||||
tfprof has traditional terminal’s history and auto-complete support.
|
||||
|
||||
<b>help:</b> print help information.
|
||||
|
||||
<b>Options:</b> Run “tfprof help” to get detailed explanations.
|
||||
|
||||
```python
|
||||
"-max_depth",
|
||||
"-min_bytes",
|
||||
"-min_micros",
|
||||
"-min_params",
|
||||
"-min_float_ops",
|
||||
"-order_by",
|
||||
"-account_type_regexes",
|
||||
"-start_name_regexes",
|
||||
"-trim_name_regexes",
|
||||
"-show_name_regexes",
|
||||
"-hide_name_regexes",
|
||||
"-account_displayed_op_only",
|
||||
"-select",
|
||||
"-viz", # Only supported for graph command.
|
||||
"-dump_to_file",
|
||||
```
|
||||
|
||||
A key design is that stats are aggregated from descendants up to ancestors.
|
||||
`-account_type_regexes` is used to decide which ops stat is accounted. It makes
|
||||
decision based on op type. Usually set it to `.*` if no extra type information
|
||||
is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
|
||||
`-min/max` and `-show/hide/trim/start` options are only used the optionally
|
||||
displayed or hide ops based on ops’ name and stats. However, they don’t prevent
|
||||
tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
|
||||
aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
|
||||
an option to break this rule. When it is set, only displayed ops are accounted.
|
||||
|
||||
Regexes are all comma-separated, for example `-show_name_regexes`
|
||||
`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
|
||||
is not expected to show up in op names.
|
||||
|
||||
`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
|
||||
(notice the indent printed) are sorted according to order_by.
|
||||
|
||||
## Future Work
|
||||
|
||||
* Load SummaryWriter event logs so that it can show the latest summary value.
|
||||
|
||||
* Better sorting and aggregation of outputs. Easier comprehension.
|
||||
|
||||
* Currently, shape information is based on `graph.pbtxt`. When the shape
|
||||
information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
|
||||
and `Checkpoint` to complete shape information.
|
@ -1,5 +1,9 @@
|
||||
package(
|
||||
default_visibility = ["//tensorflow:__subpackages__"],
|
||||
features = [
|
||||
"-layering_check",
|
||||
"-parse_headers",
|
||||
],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
@ -18,10 +22,10 @@ cc_library(
|
||||
":tfprof_show",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:regexp_internal",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -49,11 +53,11 @@ cc_library(
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:c_api",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:regexp_internal",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -69,10 +73,10 @@ cc_library(
|
||||
":tfprof_tensor",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:regexp_internal",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -87,10 +91,10 @@ cc_library(
|
||||
":tfprof_tensor",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:regexp_internal",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -109,12 +113,12 @@ tf_cc_test(
|
||||
":tfprof_stats",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -138,6 +142,18 @@ cc_library(
|
||||
deps = [
|
||||
"//tensorflow/core:framework_headers_lib",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "print_model_analysis_hdr",
|
||||
hdrs = [
|
||||
"print_model_analysis.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -149,10 +165,11 @@ cc_library(
|
||||
":tfprof_options",
|
||||
":tfprof_stats",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
@ -170,12 +187,12 @@ tf_cc_test(
|
||||
":tfprof_stats",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -185,9 +202,9 @@ cc_library(
|
||||
hdrs = ["tfprof_tensor.h"],
|
||||
copts = ["-Wno-sign-compare"],
|
||||
deps = [
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
@ -203,12 +220,12 @@ tf_cc_test(
|
||||
":tfprof_stats",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
@ -13,20 +13,26 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
|
||||
#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
||||
const string* op_log, const string* command,
|
||||
const Options* options) {
|
||||
const string* options) {
|
||||
CHECK(graph) << "graph mustn't be null";
|
||||
CHECK(command) << "command mustn't be null";
|
||||
CHECK(options) << "options mustn't be null";
|
||||
@ -50,16 +56,18 @@ string PrintModelAnalysis(const string* graph, const string* run_meta,
|
||||
TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
|
||||
std::move(op_log_ptr), std::move(ckpt_reader));
|
||||
|
||||
if (options->dump_to_file.empty()) {
|
||||
Options opts = Options::FromProtoStr(*options);
|
||||
|
||||
if (opts.dump_to_file.empty()) {
|
||||
printf("\n=========================Options=============================\n");
|
||||
printf("%s", options->ToString().c_str());
|
||||
printf("%s", opts.ToString().c_str());
|
||||
printf("\n==================Model Analysis Report======================\n");
|
||||
TFProfNode root(tf_stats.PrintGraph(*command, *options));
|
||||
TFProfNode root(tf_stats.PrintGraph(*command, opts));
|
||||
printf("\n======================End of Report==========================\n");
|
||||
fflush(stdout);
|
||||
return root.SerializeAsString();
|
||||
}
|
||||
return tf_stats.PrintGraph(*command, *options).SerializeAsString();
|
||||
return tf_stats.PrintGraph(*command, opts).SerializeAsString();
|
||||
}
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
// ***This API is only for swig.***
|
||||
class Options;
|
||||
// ***This API is only for swig. Don't user it directory!***
|
||||
//
|
||||
// Interface defined for Python API swig. Calls the tfprof core API.
|
||||
// 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
|
||||
@ -37,9 +32,9 @@ namespace tfprof {
|
||||
// if not available.
|
||||
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
||||
const string* op_log, const string* command,
|
||||
const Options* options);
|
||||
const string* options);
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -34,4 +34,4 @@ static const char* const kCkptVarType = "_checkpoint_variables";
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/regexp.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
// Build a graph structure based on op inputs/outputs. The graph is a directed
|
||||
// acyclic graph pointing *from outputs to inputs*.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
@ -27,13 +27,13 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -113,4 +113,4 @@ class TFGraph : public TFShow {
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||
|
||||
#include "tensorflow/core/framework/allocation_description.pb.h"
|
||||
#include "tensorflow/core/framework/tensor_description.pb.h"
|
@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/core/framework/allocation_description.pb.h"
|
||||
#include "tensorflow/core/framework/attr_value.pb.h"
|
||||
#include "tensorflow/core/framework/node_def.pb.h"
|
||||
@ -29,6 +28,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/tensor_description.pb.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -103,4 +103,4 @@ class TFNode {
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
@ -13,13 +13,41 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_options.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
Options Options::FromProtoStr(const string& opts_proto_str) {
|
||||
OptionsProto opts_pb;
|
||||
CHECK(opts_pb.ParseFromString(opts_proto_str));
|
||||
Options opts(
|
||||
opts_pb.max_depth(), opts_pb.min_bytes(), opts_pb.min_micros(),
|
||||
opts_pb.min_params(), opts_pb.min_float_ops(),
|
||||
std::vector<string>(opts_pb.device_regexes().begin(),
|
||||
opts_pb.device_regexes().end()),
|
||||
opts_pb.order_by(),
|
||||
std::vector<string>(opts_pb.account_type_regexes().begin(),
|
||||
opts_pb.account_type_regexes().end()),
|
||||
std::vector<string>(opts_pb.start_name_regexes().begin(),
|
||||
opts_pb.start_name_regexes().end()),
|
||||
std::vector<string>(opts_pb.trim_name_regexes().begin(),
|
||||
opts_pb.trim_name_regexes().end()),
|
||||
std::vector<string>(opts_pb.show_name_regexes().begin(),
|
||||
opts_pb.show_name_regexes().end()),
|
||||
std::vector<string>(opts_pb.hide_name_regexes().begin(),
|
||||
opts_pb.hide_name_regexes().end()),
|
||||
opts_pb.account_displayed_op_only(),
|
||||
std::vector<string>(opts_pb.select().begin(), opts_pb.select().end()),
|
||||
opts_pb.viz(), opts_pb.dump_to_file());
|
||||
return opts;
|
||||
}
|
||||
|
||||
string Options::ToString() const {
|
||||
const string s = strings::Printf(
|
||||
"%-28s%d\n"
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
@ -22,8 +22,6 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -62,6 +60,8 @@ static const char* const kCmds[] = {
|
||||
|
||||
struct Options {
|
||||
public:
|
||||
static Options FromProtoStr(const string& opts_proto_str);
|
||||
|
||||
virtual ~Options() {}
|
||||
Options(int max_depth, tensorflow::int64 min_bytes,
|
||||
tensorflow::int64 min_micros, tensorflow::int64 min_params,
|
||||
@ -116,4 +116,4 @@ struct Options {
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/c_api.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/regexp.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
@ -17,8 +17,8 @@ limitations under the License.
|
||||
// For example, 'name1/name2' is a child of 'name1'.
|
||||
// Stats are aggregated from descendants from ancestors.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
@ -26,13 +26,13 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -85,4 +85,4 @@ class TFScope : public TFShow {
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
@ -15,23 +15,23 @@ limitations under the License.
|
||||
|
||||
// Parent class and utilities for tfprof_graph and tfprof_scope.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -124,4 +124,4 @@ class TFShow {
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
@ -13,30 +13,30 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class TFProfShowTest : public ::testing::Test {
|
||||
protected:
|
||||
TFProfShowTest() {
|
||||
string graph_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
string graph_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||
|
||||
@ -44,19 +44,18 @@ class TFProfShowTest : public ::testing::Test {
|
||||
new tensorflow::RunMetadata());
|
||||
string run_meta_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
|
||||
"tools/tfprof/internal/testdata/run_meta");
|
||||
TF_CHECK_OK(
|
||||
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
||||
|
||||
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
||||
string op_log_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
|
||||
string op_log_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/tfprof_log");
|
||||
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
||||
|
||||
string ckpt_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
||||
string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/ckpt");
|
||||
TF_Status* status = TF_NewStatus();
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <utility>
|
@ -20,8 +20,8 @@ limitations under the License.
|
||||
// 3. Accept command and options to selectively aggregate stats for analysis
|
||||
// and print out the results.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
@ -29,20 +29,20 @@ limitations under the License.
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/attr_value.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/framework/step_stats.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -79,4 +79,4 @@ class TFStats {
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
@ -13,31 +13,31 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class TFProfStatsTest : public ::testing::Test {
|
||||
protected:
|
||||
TFProfStatsTest() {
|
||||
string graph_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
string graph_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||
|
||||
@ -45,19 +45,18 @@ class TFProfStatsTest : public ::testing::Test {
|
||||
new tensorflow::RunMetadata());
|
||||
string run_meta_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
|
||||
"tools/tfprof/internal/testdata/run_meta");
|
||||
TF_CHECK_OK(
|
||||
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
||||
|
||||
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
||||
string op_log_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
|
||||
string op_log_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/tfprof_log");
|
||||
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
||||
|
||||
string ckpt_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
||||
string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/ckpt");
|
||||
TF_Status* status = TF_NewStatus();
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
@ -19,16 +19,16 @@ limitations under the License.
|
||||
// is not supported by TensorFlow CheckPointReader library, though it is
|
||||
// supported in current code.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||
|
||||
#include <typeinfo>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/lib/strings/numbers.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -117,4 +117,4 @@ class TFProfTensor {
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
@ -14,34 +14,33 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class TFProfTensorTest : public ::testing::Test {
|
||||
protected:
|
||||
TFProfTensorTest() {
|
||||
string graph_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
string graph_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||
|
||||
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
|
||||
std::unique_ptr<OpLog> op_log_pb;
|
||||
|
||||
string ckpt_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
||||
string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"tools/tfprof/internal/testdata/ckpt");
|
||||
TF_Status* status = TF_NewStatus();
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
@ -47,4 +47,4 @@ void PrintHelp();
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
@ -24,10 +24,6 @@ limitations under the License.
|
||||
#include "linenoise.h"
|
||||
#include "tensorflow/c/c_api.h"
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
@ -36,6 +32,10 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/init_main.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/core/util/command_line_flags.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||
|
||||
using tensorflow::str_util::Split;
|
||||
|
24
tensorflow/tools/tfprof/tfprof_options.proto
Normal file
24
tensorflow/tools/tfprof/tfprof_options.proto
Normal file
@ -0,0 +1,24 @@
|
||||
syntax = "proto2";
|
||||
|
||||
package tensorflow.tfprof;
|
||||
|
||||
// Refers to tfprof_options.h/cc for documentation.
|
||||
// Only used to pass tfprof options from Python to C++.
|
||||
message OptionsProto {
|
||||
optional int64 max_depth = 1;
|
||||
optional int64 min_bytes = 2;
|
||||
optional int64 min_micros = 3;
|
||||
optional int64 min_params = 4;
|
||||
optional int64 min_float_ops = 5;
|
||||
repeated string device_regexes = 6;
|
||||
optional string order_by = 7;
|
||||
repeated string account_type_regexes = 8;
|
||||
repeated string start_name_regexes = 9;
|
||||
repeated string trim_name_regexes = 10;
|
||||
repeated string show_name_regexes = 11;
|
||||
repeated string hide_name_regexes = 12;
|
||||
optional bool account_displayed_op_only = 13;
|
||||
repeated string select = 14;
|
||||
optional bool viz = 15;
|
||||
optional string dump_to_file = 16;
|
||||
}
|
Loading…
Reference in New Issue
Block a user