Add tfprof python API to tf.contrib and move tfprof CLI to tensorflow/tools.
Change: 137207286
This commit is contained in:
parent
d97c2ad2b6
commit
289ddb1cb6
@ -121,8 +121,6 @@ filegroup(
|
|||||||
"//tensorflow/contrib/tensorboard:all_files",
|
"//tensorflow/contrib/tensorboard:all_files",
|
||||||
"//tensorflow/contrib/testing:all_files",
|
"//tensorflow/contrib/testing:all_files",
|
||||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
|
"//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:all_files",
|
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
|
|
||||||
"//tensorflow/contrib/training:all_files",
|
"//tensorflow/contrib/training:all_files",
|
||||||
"//tensorflow/contrib/util:all_files",
|
"//tensorflow/contrib/util:all_files",
|
||||||
"//tensorflow/core:all_files",
|
"//tensorflow/core:all_files",
|
||||||
@ -180,6 +178,8 @@ filegroup(
|
|||||||
"//tensorflow/tools/proto_text:all_files",
|
"//tensorflow/tools/proto_text:all_files",
|
||||||
"//tensorflow/tools/quantization:all_files",
|
"//tensorflow/tools/quantization:all_files",
|
||||||
"//tensorflow/tools/test:all_files",
|
"//tensorflow/tools/test:all_files",
|
||||||
|
"//tensorflow/tools/tfprof:all_files",
|
||||||
|
"//tensorflow/tools/tfprof/internal:all_files",
|
||||||
"//tensorflow/user_ops:all_files",
|
"//tensorflow/user_ops:all_files",
|
||||||
"//third_party/hadoop:all_files",
|
"//third_party/hadoop:all_files",
|
||||||
],
|
],
|
||||||
|
@ -12,6 +12,7 @@ py_library(
|
|||||||
srcs_version = "PY2AND3",
|
srcs_version = "PY2AND3",
|
||||||
visibility = ["//tensorflow:__subpackages__"],
|
visibility = ["//tensorflow:__subpackages__"],
|
||||||
deps = [
|
deps = [
|
||||||
|
"//tensorflow/contrib/tfprof/python/tools/tfprof:model_analyzer",
|
||||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
|
"//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -20,434 +20,9 @@ and measures system performance.
|
|||||||
4. Explore model based on name scope or graph structure.
|
4. Explore model based on name scope or graph structure.
|
||||||
5. Selectively grouping/filtering/accounting/ordering ops.
|
5. Selectively grouping/filtering/accounting/ordering ops.
|
||||||
|
|
||||||
### Interfaces
|
tfprof can be used as CommandLine Interface (CLI) and Python API.
|
||||||
|
CLI locates in tensorflow/tools/tfprof.
|
||||||
|
Python API locates in tensorflow/contrib/tfprof.
|
||||||
|
Tutorial locates in tensorflow/tools/tfprof/README.md
|
||||||
|
|
||||||
[CLI Tutorials](#cli-tutorials):
|
Enjoy!
|
||||||
It supports interactive mode for exploration and single-shot mode for
|
|
||||||
scripts. Outputs can be dumped to files or printed in terminal.
|
|
||||||
|
|
||||||
Python API Tutorials: Python API is not released yet.
|
|
||||||
|
|
||||||
## CLI Tutorials
|
|
||||||
|
|
||||||
Tutorials are based on a 32 layers ResNet.
|
|
||||||
TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
|
|
||||||
1) Start `tfprof` command line tool
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Build the tool.
|
|
||||||
bazel build -c opt tensorflow/contrib/tfprof/...
|
|
||||||
|
|
||||||
# Help information, including detail 'option' instructions.
|
|
||||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
|
|
||||||
#
|
|
||||||
# The following commands will start tfprof interactive mode.
|
|
||||||
#
|
|
||||||
# Profile model shapes and parameters only.
|
|
||||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
|
||||||
--graph_path=/graph.pbtxt
|
|
||||||
#
|
|
||||||
# Additionally profile checkpoint statistics and values.
|
|
||||||
# Use '-account_type_regexes _checkpoint_variables' to select
|
|
||||||
# checkpoint tensors.
|
|
||||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
|
||||||
--graph_path=graph.pbtxt \
|
|
||||||
--checkpoint_path=model.ckpt
|
|
||||||
#
|
|
||||||
# Additionally profile ops requested memory and timing.
|
|
||||||
# See CLI Input Files section on generating run_meta file.
|
|
||||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
|
||||||
--graph_path=graph.pbtxt \
|
|
||||||
--run_meta_path=run_meta \
|
|
||||||
--checkpoint_path=model.ckpt
|
|
||||||
#
|
|
||||||
# tfprof_log is used to define customized op types and float ops.
|
|
||||||
# Use tfprof_logger.write_op_log() to create tfprof_log.
|
|
||||||
# See 11) in Examples section on generating tfprof_log file.
|
|
||||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
|
||||||
--graph_path=graph.pbtxt \
|
|
||||||
--run_meta_path=run_meta \
|
|
||||||
--op_log_path=tfprof_log \
|
|
||||||
--checkpoint_path=model.ckpt
|
|
||||||
```
|
|
||||||
Note that `graph.pbtxt` is an ASCII text format.
|
|
||||||
|
|
||||||
2) Press enter to show the default options
|
|
||||||
|
|
||||||
```shell
|
|
||||||
tfprof>
|
|
||||||
tfprof>
|
|
||||||
-max_depth 4
|
|
||||||
-min_bytes 0
|
|
||||||
-min_micros 0
|
|
||||||
-min_params 0
|
|
||||||
-min_float_ops 0
|
|
||||||
-device_regexes .*
|
|
||||||
-order_by name
|
|
||||||
-account_type_regexes Variable
|
|
||||||
-start_name_regexes .*
|
|
||||||
-trim_name_regexes
|
|
||||||
-show_name_regexes .*
|
|
||||||
-hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
|
|
||||||
-account_displayed_op_only false
|
|
||||||
# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
|
|
||||||
# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
|
|
||||||
-select params
|
|
||||||
-viz false
|
|
||||||
-dump_to_file
|
|
||||||
```
|
|
||||||
|
|
||||||
3) I want to see the `BatchNorm`'s gamma value in checkpoint.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Requires --graph_path, --checkpoint_path.
|
|
||||||
tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
|
|
||||||
_TFProfRoot ()
|
|
||||||
unit_1_0/shared_activation/init_bn/gamma ()
|
|
||||||
[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
|
|
||||||
unit_1_0/sub2/bn2/gamma ()
|
|
||||||
[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
|
|
||||||
```
|
|
||||||
|
|
||||||
4) I want to see my checkpoint tensors shape and number of parameters.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Requires --graph_path, --checkpoint_path.
|
|
||||||
# Increase -max_depth to see all tensors.
|
|
||||||
tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
|
|
||||||
_TFProfRoot (--/930.58k params)
|
|
||||||
global_step (0/0 params)
|
|
||||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
|
||||||
pool_logit/DW (64x10, 640/1.28k params)
|
|
||||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
|
||||||
pool_logit/biases (10, 10/20 params)
|
|
||||||
pool_logit/biases/Momentum (10, 10/10 params)
|
|
||||||
unit_last/final_bn/beta (64, 64/128 params)
|
|
||||||
unit_last/final_bn/gamma (64, 64/128 params)
|
|
||||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
|
||||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
|
||||||
```
|
|
||||||
|
|
||||||
5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
|
|
||||||
it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
|
|
||||||
graph dependencies.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Requires --graph_path, --run_meta_path.
|
|
||||||
tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
|
|
||||||
_TFProfRoot (0us/3.61sec)
|
|
||||||
init/init_conv/Conv2D (11.75ms/3.10sec)
|
|
||||||
random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
|
|
||||||
unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
|
|
||||||
unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
|
|
||||||
unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
|
|
||||||
unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
|
|
||||||
```
|
|
||||||
|
|
||||||
6) I want to know the expensive operations during the back propagation.
|
|
||||||
Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
|
|
||||||
command to explore based on name scope hierarchies.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Requires --graph_path, --run_meta_path.
|
|
||||||
tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
|
|
||||||
_TFProfRoot (0us/2.29sec)
|
|
||||||
gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
|
|
||||||
gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
|
|
||||||
gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
|
|
||||||
gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
|
|
||||||
gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
|
|
||||||
gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
7) Show the number of float operations in the model.
|
|
||||||
Note: float operations calculation depends on
|
|
||||||
1) op.RegisterStatistics. If an op doesn’t
|
|
||||||
have RegisterStatistics defined, its float operations cannot be counted.
|
|
||||||
2) fully defined shape is also necessary in order to calculate flops.
|
|
||||||
float operations number is provided by tensorflow::tfprof::OpLog logged from
|
|
||||||
Python API.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Requires --graph_path, --op_log_path.
|
|
||||||
tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
|
|
||||||
_TFProfRoot (0/17.63b flops)
|
|
||||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
|
|
||||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
|
|
||||||
init/init_conv/Conv2D (113.25m/113.25m flops)
|
|
||||||
pool_logit/xw_plus_b (1.28k/165.12k flops)
|
|
||||||
pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
|
|
||||||
unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
|
||||||
unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
|
||||||
unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
|
||||||
unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
8) Show the number of parameters of all `tf.trainable_variables()` in the model.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Requires --graph_path --op_log_path.
|
|
||||||
# store option for future commands.
|
|
||||||
tfprof> set -account_type_regexes _trainable_variables
|
|
||||||
tfprof> scope -max_depth 4 -select params
|
|
||||||
_TFProfRoot (--/464.15k params)
|
|
||||||
init/init_conv/DW (3x3x3x16, 432/432 params)
|
|
||||||
pool_logit/DW (64x10, 640/640 params)
|
|
||||||
pool_logit/biases (10, 10/10 params)
|
|
||||||
unit_last/final_bn/beta (64, 64/64 params)
|
|
||||||
unit_last/final_bn/gamma (64, 64/64 params)
|
|
||||||
```
|
|
||||||
|
|
||||||
Where does “_trainable_variables” come from? It is from the OpLog file
|
|
||||||
generated by write_op_log() Python API. write_op_log() help users create some
|
|
||||||
common op types implicitly. Users can define their own op types and log it
|
|
||||||
through the write_op_log() API.
|
|
||||||
|
|
||||||
9) What if I’m lazy and don’t want to define op type? I have given my ops
|
|
||||||
well-defined names in my model’s code. And want to use names to select a group
|
|
||||||
of ops. Let’s try it!
|
|
||||||
|
|
||||||
```shell
|
|
||||||
tfprof> set -account_type_regexes .*
|
|
||||||
tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
|
|
||||||
_TFProfRoot (0/18.43k params)
|
|
||||||
unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
|
|
||||||
unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
|
|
||||||
```
|
|
||||||
|
|
||||||
The above command allows you to filter ops that match specific names.
|
|
||||||
`-account_displayed_op_only` asks tfprof to only account ops displayed
|
|
||||||
in terminal. Otherwise, tfprof accounts all ops matched by
|
|
||||||
`-account_type_regexes` recursively even if they are hidden due to some
|
|
||||||
options such as -max_depth.
|
|
||||||
|
|
||||||
10) TensorFlow has built-in op types. For example, built-in op type `Variable`
|
|
||||||
seems to include `Variable's` created by your model. However, be careful when
|
|
||||||
depending on it because TensorFlow creates extra `Variable` ops implicitly and
|
|
||||||
the implicitly created ops can have the same prefix as the `Variable's` you
|
|
||||||
defined.
|
|
||||||
|
|
||||||
In the following example, extra `Variables` are created and “/Momentum” is
|
|
||||||
appended to their names. This might cause you “model capacity” calculation
|
|
||||||
to get wrong.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
|
|
||||||
_TFProfRoot (--/930.58k params)
|
|
||||||
global_step (1/1 params)
|
|
||||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
|
||||||
pool_logit/DW (64x10, 640/1.28k params)
|
|
||||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
|
||||||
pool_logit/biases (10, 10/20 params)
|
|
||||||
pool_logit/biases/Momentum (10, 10/10 params)
|
|
||||||
unit_last/final_bn/beta (64, 64/128 params)
|
|
||||||
unit_last/final_bn/gamma (64, 64/128 params)
|
|
||||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
|
||||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
11) A example of defining extra op type for ops using `OpLog`
|
|
||||||
|
|
||||||
First, in Python code, create an `OpLog` proto and add op type
|
|
||||||
information to it:
|
|
||||||
|
|
||||||
```python
|
|
||||||
|
|
||||||
op_log = tfprof_log_pb2.OpLog()
|
|
||||||
entry = op_log.log_entries.add()
|
|
||||||
entry.name = 'pool_logit/DW'
|
|
||||||
entry.types.append('pool_logit')
|
|
||||||
entry = op_log.log_entries.add()
|
|
||||||
entry.name = 'pool_logit/biases'
|
|
||||||
# Alternatively:
|
|
||||||
# var = tf.get_variable(xxx)
|
|
||||||
# entry.name = var.op.name
|
|
||||||
entry.types.append('pool_logit')
|
|
||||||
```
|
|
||||||
|
|
||||||
Second, call write_op_log to write the OpLog proto.
|
|
||||||
|
|
||||||
```python
|
|
||||||
tf.tfprof.tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
|
|
||||||
```
|
|
||||||
|
|
||||||
Third, when starting the tfprof tool, specify
|
|
||||||
"--op_log_path /tmp/my_op_log_dir/op_log"
|
|
||||||
|
|
||||||
```shell
|
|
||||||
tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
|
|
||||||
_TFProfRoot (--/650 params)
|
|
||||||
pool_logit/DW (64x10, 640/640 params)
|
|
||||||
pool_logit/biases (10, 10/10 params)
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that when you call
|
|
||||||
`tf.tfprof.tfprof_logger.write_op_log(...)`, the tool adds all `Variables`
|
|
||||||
inside `tf.trainable_variables()` to `_trainable_variables`.
|
|
||||||
|
|
||||||
12) Run tfprof in one-shot mode and dump result to file.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# Printed to stdout if --dump_to_file is not set.
|
|
||||||
tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt \
|
|
||||||
--max_depth 3 \
|
|
||||||
--dump_to_file "/tmp/dump"
|
|
||||||
Reading Files...
|
|
||||||
Parsing GraphDef...
|
|
||||||
Preparing Views...
|
|
||||||
|
|
||||||
cat /tmp/dump
|
|
||||||
_TFProfRoot (--/930.58k params)
|
|
||||||
global_step (0/0 params)
|
|
||||||
pool_logit/DW (64x10, 640/1.28k params)
|
|
||||||
pool_logit/biases (10, 10/20 params)
|
|
||||||
```
|
|
||||||
|
|
||||||
13) Analyze how balanced Variable are on parameter servers.
|
|
||||||
|
|
||||||
In this tutorial, I'm going to use a seq2seq model, which are split
|
|
||||||
on several gpus at workers and several parameter servers.
|
|
||||||
|
|
||||||
In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
|
|
||||||
gpu0. They share an op_type called 'gpu0'.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
|
||||||
--graph_path ~/tfprof/textsum/graph.pbtxt \
|
|
||||||
--run_meta_path ~/tfprof/textsum/run_meta
|
|
||||||
|
|
||||||
# Looks like ps task 1 is holding twice more parameters than task 0.
|
|
||||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
|
|
||||||
_TFProfRoot (--/25.81m params)
|
|
||||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
|
|
||||||
_TFProfRoot (--/58.84m params)
|
|
||||||
```
|
|
||||||
|
|
||||||
### CLI Input Files
|
|
||||||
|
|
||||||
tfprof command line inference (CLI) loads dumped files from a tensorflow model.
|
|
||||||
Convert them into in-memory data structures. To use it, users need to specify
|
|
||||||
the locations of the dumped files. The following are the dumped files loaded
|
|
||||||
by tfprof:
|
|
||||||
|
|
||||||
<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
|
|
||||||
representation of the model. For example, graph.pbtxt written by tf.Supervisor
|
|
||||||
is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
|
|
||||||
using tf.Graph.as_graph_def() or other API.
|
|
||||||
|
|
||||||
<b>--run_meta_path:</b> tensorflow::RunMetadata.
|
|
||||||
Used to get the memory and time consumption of
|
|
||||||
each op of the model. Users need to enable it. For example, the following code
|
|
||||||
snippet writes a RunMetadata file:
|
|
||||||
|
|
||||||
```python
|
|
||||||
run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
|
|
||||||
run_metadata = config_pb2.RunMetadata()
|
|
||||||
# Once a while, call it the get the RunMeta.
|
|
||||||
_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
|
|
||||||
with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
|
|
||||||
f.write(run_metadata.SerializeToString())
|
|
||||||
```
|
|
||||||
|
|
||||||
<b>--op_log_path:</b>
|
|
||||||
tensorflow::tfprof::OpLog. A proto used to provide extra op information
|
|
||||||
for ops. By giving a group of ops a type name, users can easily aggregate the
|
|
||||||
statistics for those ops without accidently missing or including extra ops.
|
|
||||||
tfprof exposes the following Python API to add op information and logging.
|
|
||||||
|
|
||||||
```python
|
|
||||||
tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
|
|
||||||
```
|
|
||||||
|
|
||||||
<b>--checkpoint_path:</b>
|
|
||||||
TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
|
|
||||||
provides checkpointed tensors' values.
|
|
||||||
|
|
||||||
|
|
||||||
## Design
|
|
||||||
|
|
||||||
|
|
||||||
### In-memory representation
|
|
||||||
|
|
||||||
<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
|
|
||||||
similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
|
|
||||||
For example op1 with name “name1/name2” is a child of op2 with name “name1”.
|
|
||||||
|
|
||||||
<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
|
|
||||||
a graph structure. The graph is a “directed acyclic graph” (hopefully), with
|
|
||||||
direction from “output to input”. The direction is design this way so that users
|
|
||||||
can trace from “result” to its “sources”.
|
|
||||||
|
|
||||||
### Command line options
|
|
||||||
|
|
||||||
tfprof’s major goals are to measure system performance and quicly analyze
|
|
||||||
model architectures. Hence, its commands and options should allow users to achieve
|
|
||||||
these 2 goals easily.
|
|
||||||
|
|
||||||
<b>graph:</b> It is expected that users will mostly use graph representation to
|
|
||||||
debug system performance. Hence, tfprof supports graph command, which pulls the
|
|
||||||
graph in-memory representation described above.
|
|
||||||
|
|
||||||
<b>scope:</b> It is expected that some users might want to explore their model
|
|
||||||
statistics using the name scope information they defined in the Python codes.
|
|
||||||
Hence, tfprof supports “scope” command, which pulls the tree in-memory
|
|
||||||
representation.
|
|
||||||
|
|
||||||
<b>set:</b> It is used to store the options so that user doesn’t need to
|
|
||||||
re-type the same option again and again in the follow up command line. Note that
|
|
||||||
tfprof has traditional terminal’s history and auto-complete support.
|
|
||||||
|
|
||||||
<b>help:</b> print help information.
|
|
||||||
|
|
||||||
<b>Options:</b> Run “tfprof help” to get detailed explanations.
|
|
||||||
|
|
||||||
```python
|
|
||||||
"-max_depth",
|
|
||||||
"-min_bytes",
|
|
||||||
"-min_micros",
|
|
||||||
"-min_params",
|
|
||||||
"-min_float_ops",
|
|
||||||
"-order_by",
|
|
||||||
"-account_type_regexes",
|
|
||||||
"-start_name_regexes",
|
|
||||||
"-trim_name_regexes",
|
|
||||||
"-show_name_regexes",
|
|
||||||
"-hide_name_regexes",
|
|
||||||
"-account_displayed_op_only",
|
|
||||||
"-select",
|
|
||||||
"-viz", # Only supported for graph command.
|
|
||||||
"-dump_to_file",
|
|
||||||
```
|
|
||||||
|
|
||||||
A key design is that stats are aggregated from descendants up to ancestors.
|
|
||||||
`-account_type_regexes` is used to decide which ops stat is accounted. It makes
|
|
||||||
decision based on op type. Usually set it to `.*` if no extra type information
|
|
||||||
is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
|
|
||||||
`-min/max` and `-show/hide/trim/start` options are only used the optionally
|
|
||||||
displayed or hide ops based on ops’ name and stats. However, they don’t prevent
|
|
||||||
tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
|
|
||||||
aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
|
|
||||||
an option to break this rule. When it is set, only displayed ops are accounted.
|
|
||||||
|
|
||||||
Regexes are all comma-separated, for example `-show_name_regexes`
|
|
||||||
`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
|
|
||||||
is not expected to show up in op names.
|
|
||||||
|
|
||||||
`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
|
|
||||||
(notice the indent printed) are sorted according to order_by.
|
|
||||||
|
|
||||||
## Future Work
|
|
||||||
|
|
||||||
* Load SummaryWriter event logs so that it can show the latest summary value.
|
|
||||||
|
|
||||||
* Better sorting and aggregation of outputs. Easier comprehension.
|
|
||||||
|
|
||||||
* Currently, shape information is based on `graph.pbtxt`. When the shape
|
|
||||||
information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
|
|
||||||
and `Checkpoint` to complete shape information.
|
|
@ -17,5 +17,6 @@ from __future__ import absolute_import
|
|||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from tensorflow.contrib.tfprof.python.tools.tfprof import model_analyzer
|
||||||
from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
|
from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
|
||||||
from tensorflow.python.util.all_util import make_all
|
from tensorflow.python.util.all_util import make_all
|
||||||
|
@ -3,14 +3,36 @@ licenses(["notice"]) # Apache 2.0
|
|||||||
package(default_visibility = ["//visibility:public"])
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
load("//tensorflow:tensorflow.bzl", "tf_py_test")
|
load("//tensorflow:tensorflow.bzl", "tf_py_test")
|
||||||
|
load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
|
||||||
|
|
||||||
|
py_library(
|
||||||
|
name = "model_analyzer",
|
||||||
|
srcs = ["model_analyzer.py"],
|
||||||
|
srcs_version = "PY2AND3",
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/contrib/tfprof/python/tools/tfprof:pywrap_tensorflow_print_model_analysis_lib",
|
||||||
|
"//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_py",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
py_test(
|
||||||
|
name = "model_analyzer_test",
|
||||||
|
srcs = ["model_analyzer_test.py"],
|
||||||
|
srcs_version = "PY2AND3",
|
||||||
|
deps = [
|
||||||
|
":model_analyzer",
|
||||||
|
"//tensorflow:tensorflow_py",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
py_library(
|
py_library(
|
||||||
name = "tfprof_logger",
|
name = "tfprof_logger",
|
||||||
srcs = ["tfprof_logger.py"],
|
srcs = ["tfprof_logger.py"],
|
||||||
srcs_version = "PY2AND3",
|
srcs_version = "PY2AND3",
|
||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
|
|
||||||
"//tensorflow/python:framework_for_generated_wrappers",
|
"//tensorflow/python:framework_for_generated_wrappers",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_py",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -20,7 +42,34 @@ tf_py_test(
|
|||||||
additional_deps = [
|
additional_deps = [
|
||||||
":tfprof_logger",
|
":tfprof_logger",
|
||||||
"//tensorflow:tensorflow_py",
|
"//tensorflow:tensorflow_py",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
|
"//tensorflow/tools/tfprof:protos_all_py",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_py_wrap_cc(
|
||||||
|
name = "pywrap_tensorflow_print_model_analysis_lib",
|
||||||
|
srcs = ["pywrap_tensorflow_print_model_analysis.i"],
|
||||||
|
swig_includes = [
|
||||||
|
"//tensorflow/python:lib/core/strings.i",
|
||||||
|
"//tensorflow/python:platform/base.i",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/core:framework_headers_lib",
|
||||||
|
"//tensorflow/tools/tfprof/internal:print_model_analysis_hdr",
|
||||||
|
"//util/python:python_headers",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
py_test(
|
||||||
|
name = "print_model_analysis_test",
|
||||||
|
srcs = ["print_model_analysis_test.py"],
|
||||||
|
srcs_version = "PY2AND3",
|
||||||
|
deps = [
|
||||||
|
":pywrap_tensorflow_print_model_analysis_lib",
|
||||||
|
"//tensorflow:tensorflow_py",
|
||||||
|
"//tensorflow/python:framework_test_lib",
|
||||||
|
"//tensorflow/python:platform_test",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_py",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
187
tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
Normal file
187
tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""Model Analyzer.
|
||||||
|
|
||||||
|
Analyze model, including shape, params, time, memory, structure, etc.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
|
||||||
|
from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
|
||||||
|
from tensorflow.tools.tfprof import tfprof_options_pb2
|
||||||
|
from tensorflow.tools.tfprof import tfprof_output_pb2
|
||||||
|
|
||||||
|
# pylint: disable=bad-whitespace
|
||||||
|
# pylint: disable=bad-continuation
|
||||||
|
# 2 example tfprof_options for print_model_analysis API.
|
||||||
|
#
|
||||||
|
# Show the parameter statistics of trainable variables.
|
||||||
|
TRAINABLE_VARS_PARAMS_STAT_OPTIONS = {
|
||||||
|
'max_depth': 10000,
|
||||||
|
'min_bytes': 0,
|
||||||
|
'min_micros': 0,
|
||||||
|
'min_params': 0,
|
||||||
|
'min_float_ops': 0,
|
||||||
|
'device_regexes': ['.*'],
|
||||||
|
'order_by': 'name',
|
||||||
|
'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
|
||||||
|
'start_name_regexes': ['.*'],
|
||||||
|
'trim_name_regexes': [],
|
||||||
|
'show_name_regexes': ['.*'],
|
||||||
|
'hide_name_regexes': [],
|
||||||
|
'account_displayed_op_only': True,
|
||||||
|
'select': ['params'],
|
||||||
|
'viz': False,
|
||||||
|
'dump_to_file': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
# Show the number float operations.
|
||||||
|
FLOAT_OPS_OPTIONS = {
|
||||||
|
'max_depth': 10000,
|
||||||
|
'min_bytes': 0,
|
||||||
|
'min_micros': 0,
|
||||||
|
'min_params': 0,
|
||||||
|
'min_float_ops': 1,
|
||||||
|
'device_regexes': ['.*'],
|
||||||
|
'order_by': 'float_ops',
|
||||||
|
'account_type_regexes': ['.*'],
|
||||||
|
'start_name_regexes': ['.*'],
|
||||||
|
'trim_name_regexes': [],
|
||||||
|
'show_name_regexes': ['.*'],
|
||||||
|
'hide_name_regexes': [],
|
||||||
|
'account_displayed_op_only': True,
|
||||||
|
'select': ['float_ops'],
|
||||||
|
'viz': False,
|
||||||
|
'dump_to_file': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
# Show number of parameters on parameter server 0.
|
||||||
|
# It is recommended to provide`run_meta` argument
|
||||||
|
# to have complete device placement info.
|
||||||
|
PRINT_PARAMS_ON_DEVICE = {
|
||||||
|
'max_depth': 1,
|
||||||
|
'min_bytes': 0,
|
||||||
|
'min_micros': 0,
|
||||||
|
'min_params': 0,
|
||||||
|
'min_float_ops': 0,
|
||||||
|
'device_regexes': ['.*'],
|
||||||
|
'order_by': 'name',
|
||||||
|
'account_type_regexes': ['.*ps.*task:0.*'],
|
||||||
|
'start_name_regexes': ['.*'],
|
||||||
|
'trim_name_regexes': [],
|
||||||
|
'show_name_regexes': ['.*'],
|
||||||
|
'hide_name_regexes': [],
|
||||||
|
'account_displayed_op_only': False,
|
||||||
|
'select': ['device', 'params'],
|
||||||
|
'viz': False,
|
||||||
|
'dump_to_file': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
# Show the timing stats and memory demands.
|
||||||
|
PRINT_ALL_TIMING_MEMORY = {
|
||||||
|
'max_depth': 10000,
|
||||||
|
'min_bytes': 1, # Only >=1
|
||||||
|
'min_micros': 1, # Only >=1
|
||||||
|
'min_params': 0,
|
||||||
|
'min_float_ops': 0,
|
||||||
|
'device_regexes': ['.*'],
|
||||||
|
'order_by': 'name',
|
||||||
|
'account_type_regexes': ['.*'],
|
||||||
|
'start_name_regexes': ['.*'],
|
||||||
|
'trim_name_regexes': [],
|
||||||
|
'show_name_regexes': ['.*'],
|
||||||
|
'hide_name_regexes': [],
|
||||||
|
'account_displayed_op_only': True,
|
||||||
|
'select': ['micros', 'bytes'],
|
||||||
|
'viz': False,
|
||||||
|
'dump_to_file': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
# pylint: enable=bad-whitespace
|
||||||
|
# pylint: enable=bad-continuation
|
||||||
|
|
||||||
|
|
||||||
|
def print_model_analysis(graph,
|
||||||
|
run_meta=None,
|
||||||
|
op_log=None,
|
||||||
|
tfprof_cmd='scope',
|
||||||
|
tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS):
|
||||||
|
"""Print model statistics.
|
||||||
|
|
||||||
|
Prints the model statistics to stdout. Also returns the results
|
||||||
|
in a TFProfNode proto. See go/tfprof or run tfprof tool:
|
||||||
|
'bazel run third_party/tensorflow/tools/tfprof help'
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
Show the parameter/shape statistics of tf.trainable_variables().
|
||||||
|
print_model_analysis(sess.graph).
|
||||||
|
|
||||||
|
Show number of float ops. Only ops with RegisterStatistics defined
|
||||||
|
are counted.
|
||||||
|
show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS
|
||||||
|
print_model_analysis(sess.graph, tfprof_options=show_float_op_opts)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
graph: tf.Graph.
|
||||||
|
run_meta: tensorflow::RunMetadata proto. When provided, also shows valid
|
||||||
|
timing and memory information when 'select' option contains
|
||||||
|
'micros' and 'bytes'.
|
||||||
|
op_log: tensorflow::tfprof::OpLog proto. users can use this proto to
|
||||||
|
group together ops and use a op_type to select the group.
|
||||||
|
tfprof_cmd: string. Either 'scope' or 'graph'. 'scope' view organize
|
||||||
|
ops using their name scopes. 'graph' view organize ops using
|
||||||
|
their graph inputs.
|
||||||
|
tfprof_options: See 'tfprof help' for details.
|
||||||
|
Returns:
|
||||||
|
TFProfNode proto. Side effect: a formatted output to stdout.
|
||||||
|
"""
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
op_log = tfprof_logger._merge_default_with_oplog(graph, op_log, run_meta)
|
||||||
|
# pylint: enable=protected-access
|
||||||
|
opts = tfprof_options_pb2.OptionsProto()
|
||||||
|
opts.max_depth = tfprof_options['max_depth']
|
||||||
|
opts.min_bytes = tfprof_options['min_bytes']
|
||||||
|
opts.min_micros = tfprof_options['min_micros']
|
||||||
|
opts.min_params = tfprof_options['min_params']
|
||||||
|
opts.min_float_ops = tfprof_options['min_float_ops']
|
||||||
|
for p in tfprof_options['device_regexes']:
|
||||||
|
opts.device_regexes.append(p)
|
||||||
|
opts.order_by = tfprof_options['order_by']
|
||||||
|
for p in tfprof_options['account_type_regexes']:
|
||||||
|
opts.account_type_regexes.append(p)
|
||||||
|
for p in tfprof_options['start_name_regexes']:
|
||||||
|
opts.start_name_regexes.append(p)
|
||||||
|
for p in tfprof_options['trim_name_regexes']:
|
||||||
|
opts.trim_name_regexes.append(p)
|
||||||
|
for p in tfprof_options['show_name_regexes']:
|
||||||
|
opts.show_name_regexes.append(p)
|
||||||
|
for p in tfprof_options['hide_name_regexes']:
|
||||||
|
opts.hide_name_regexes.append(p)
|
||||||
|
opts.account_displayed_op_only = tfprof_options['account_displayed_op_only']
|
||||||
|
for p in tfprof_options['select']:
|
||||||
|
opts.select.append(p)
|
||||||
|
opts.viz = tfprof_options['viz']
|
||||||
|
opts.dump_to_file = tfprof_options['dump_to_file']
|
||||||
|
|
||||||
|
run_meta_str = run_meta.SerializeToString() if run_meta else b''
|
||||||
|
op_log_str = op_log.SerializeToString() if op_log else b''
|
||||||
|
|
||||||
|
tfprof_node = tfprof_output_pb2.TFProfNode()
|
||||||
|
tfprof_node.ParseFromString(
|
||||||
|
print_mdl.PrintModelAnalysis(
|
||||||
|
graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str,
|
||||||
|
tfprof_cmd.encode('utf-8'), opts.SerializeToString()))
|
@ -0,0 +1,84 @@
|
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
class PrintModelAnalysisTest(tf.test.TestCase):
|
||||||
|
|
||||||
|
def _BuildSmallModel(self):
|
||||||
|
image = tf.zeros([2, 6, 6, 3])
|
||||||
|
kernel = tf.get_variable(
|
||||||
|
'DW', [3, 3, 3, 6],
|
||||||
|
tf.float32,
|
||||||
|
initializer=tf.random_normal_initializer(stddev=0.001))
|
||||||
|
x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
|
||||||
|
kernel = tf.get_variable(
|
||||||
|
'DW2', [2, 2, 6, 12],
|
||||||
|
tf.float32,
|
||||||
|
initializer=tf.random_normal_initializer(stddev=0.001))
|
||||||
|
x = tf.nn.conv2d(x, kernel, [1, 2, 2, 1], padding='SAME')
|
||||||
|
return x
|
||||||
|
|
||||||
|
def testDumpToFile(self):
|
||||||
|
opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
|
||||||
|
opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
|
||||||
|
|
||||||
|
with tf.Session() as sess:
|
||||||
|
_ = self._BuildSmallModel()
|
||||||
|
tf.contrib.tfprof.model_analyzer.print_model_analysis(
|
||||||
|
sess.graph, tfprof_options=opts)
|
||||||
|
|
||||||
|
with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
|
||||||
|
self.assertEqual('_TFProfRoot (--/450 params)\n'
|
||||||
|
' DW (3x3x3x6, 162/162 params)\n'
|
||||||
|
' DW2 (2x2x6x12, 288/288 params)\n',
|
||||||
|
f.read().decode('utf-8'))
|
||||||
|
|
||||||
|
def testSelectEverything(self):
|
||||||
|
opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
|
||||||
|
opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
|
||||||
|
opts['account_type_regexes'] = ['.*']
|
||||||
|
opts['select'] = [
|
||||||
|
'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types'
|
||||||
|
]
|
||||||
|
|
||||||
|
with tf.Session() as sess:
|
||||||
|
x = self._BuildSmallModel()
|
||||||
|
|
||||||
|
sess.run(tf.initialize_all_variables())
|
||||||
|
run_meta = tf.RunMetadata()
|
||||||
|
_ = sess.run(x,
|
||||||
|
options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
|
||||||
|
run_metadata=run_meta)
|
||||||
|
|
||||||
|
tf.contrib.tfprof.model_analyzer.print_model_analysis(
|
||||||
|
sess.graph, run_meta, tfprof_options=opts)
|
||||||
|
|
||||||
|
with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
|
||||||
|
# pylint: disable=line-too-long
|
||||||
|
self.assertEqual(
|
||||||
|
'_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
|
||||||
|
f.read().decode('utf-8'))
|
||||||
|
# pylint: enable=line-too-long
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
tf.test.main()
|
@ -0,0 +1,227 @@
|
|||||||
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""print_model_analysis test."""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
from google.protobuf import text_format
|
||||||
|
from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
|
||||||
|
from tensorflow.tools.tfprof import tfprof_options_pb2
|
||||||
|
from tensorflow.tools.tfprof import tfprof_output_pb2
|
||||||
|
|
||||||
|
# pylint: disable=bad-whitespace
|
||||||
|
# pylint: disable=bad-continuation
|
||||||
|
TEST_OPTIONS = {
|
||||||
|
'max_depth': 10000,
|
||||||
|
'min_bytes': 0,
|
||||||
|
'min_micros': 0,
|
||||||
|
'min_params': 0,
|
||||||
|
'min_float_ops': 0,
|
||||||
|
'device_regexes': ['.*'],
|
||||||
|
'order_by': 'name',
|
||||||
|
'account_type_regexes': ['.*'],
|
||||||
|
'start_name_regexes': ['.*'],
|
||||||
|
'trim_name_regexes': [],
|
||||||
|
'show_name_regexes': ['.*'],
|
||||||
|
'hide_name_regexes': [],
|
||||||
|
'account_displayed_op_only': True,
|
||||||
|
'select': ['params'],
|
||||||
|
'viz': False
|
||||||
|
}
|
||||||
|
|
||||||
|
# pylint: enable=bad-whitespace
|
||||||
|
# pylint: enable=bad-continuation
|
||||||
|
|
||||||
|
|
||||||
|
class PrintModelAnalysisTest(tf.test.TestCase):
|
||||||
|
|
||||||
|
def _BuildSmallModel(self):
|
||||||
|
image = tf.zeros([2, 6, 6, 3])
|
||||||
|
kernel = tf.get_variable(
|
||||||
|
'DW', [6, 6, 3, 6],
|
||||||
|
tf.float32,
|
||||||
|
initializer=tf.random_normal_initializer(stddev=0.001))
|
||||||
|
x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
|
||||||
|
return x
|
||||||
|
|
||||||
|
def testPrintModelAnalysis(self):
|
||||||
|
opts = tfprof_options_pb2.OptionsProto()
|
||||||
|
opts.max_depth = TEST_OPTIONS['max_depth']
|
||||||
|
opts.min_bytes = TEST_OPTIONS['min_bytes']
|
||||||
|
opts.min_micros = TEST_OPTIONS['min_micros']
|
||||||
|
opts.min_params = TEST_OPTIONS['min_params']
|
||||||
|
opts.min_float_ops = TEST_OPTIONS['min_float_ops']
|
||||||
|
for p in TEST_OPTIONS['device_regexes']:
|
||||||
|
opts.device_regexes.append(p)
|
||||||
|
opts.order_by = TEST_OPTIONS['order_by']
|
||||||
|
for p in TEST_OPTIONS['account_type_regexes']:
|
||||||
|
opts.account_type_regexes.append(p)
|
||||||
|
for p in TEST_OPTIONS['start_name_regexes']:
|
||||||
|
opts.start_name_regexes.append(p)
|
||||||
|
for p in TEST_OPTIONS['trim_name_regexes']:
|
||||||
|
opts.trim_name_regexes.append(p)
|
||||||
|
for p in TEST_OPTIONS['show_name_regexes']:
|
||||||
|
opts.show_name_regexes.append(p)
|
||||||
|
for p in TEST_OPTIONS['hide_name_regexes']:
|
||||||
|
opts.hide_name_regexes.append(p)
|
||||||
|
opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only']
|
||||||
|
for p in TEST_OPTIONS['select']:
|
||||||
|
opts.select.append(p)
|
||||||
|
opts.viz = TEST_OPTIONS['viz']
|
||||||
|
|
||||||
|
with tf.Session() as sess:
|
||||||
|
_ = self._BuildSmallModel()
|
||||||
|
tfprof_pb = tfprof_output_pb2.TFProfNode()
|
||||||
|
tfprof_pb.ParseFromString(
|
||||||
|
print_mdl.PrintModelAnalysis(sess.graph.as_graph_def(
|
||||||
|
).SerializeToString(), b'', b'', b'scope', opts.SerializeToString()))
|
||||||
|
|
||||||
|
expected_pb = tfprof_output_pb2.TFProfNode()
|
||||||
|
text_format.Merge(r"""name: "_TFProfRoot"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 648
|
||||||
|
children {
|
||||||
|
name: "Conv2D"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "DW"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
parameters: 648
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 648
|
||||||
|
children {
|
||||||
|
name: "DW/Assign"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "DW/Initializer"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
children {
|
||||||
|
name: "DW/Initializer/random_normal"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
children {
|
||||||
|
name: "DW/Initializer/random_normal/RandomStandardNormal"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "DW/Initializer/random_normal/mean"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "DW/Initializer/random_normal/mul"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "DW/Initializer/random_normal/shape"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "DW/Initializer/random_normal/stddev"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "DW/read"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
children {
|
||||||
|
name: "zeros"
|
||||||
|
exec_micros: 0
|
||||||
|
requested_bytes: 0
|
||||||
|
total_exec_micros: 0
|
||||||
|
total_requested_bytes: 0
|
||||||
|
total_parameters: 0
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0
|
||||||
|
}
|
||||||
|
float_ops: 0
|
||||||
|
total_float_ops: 0""", expected_pb)
|
||||||
|
self.assertEqual(expected_pb, tfprof_pb)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
tf.test.main()
|
@ -0,0 +1,43 @@
|
|||||||
|
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
%include "tensorflow/python/lib/core/strings.i"
|
||||||
|
%include "tensorflow/python/platform/base.i"
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
|
||||||
|
#include "tensorflow/core/framework/types.h"
|
||||||
|
%}
|
||||||
|
|
||||||
|
%typemap(typecheck) const string & = char *;
|
||||||
|
%typemap(in) const string& (string temp) {
|
||||||
|
if (!_PyObjAs<string>($input, &temp)) return NULL;
|
||||||
|
$1 = &temp;
|
||||||
|
}
|
||||||
|
%typemap(out) const string& {
|
||||||
|
$result = PyString_FromStringAndSize($1->data(), $1->size());
|
||||||
|
}
|
||||||
|
%apply const string & {string &};
|
||||||
|
%apply const string & {string *};
|
||||||
|
|
||||||
|
%ignoreall
|
||||||
|
|
||||||
|
%unignore tensorflow;
|
||||||
|
%unignore tensorflow::tfprof;
|
||||||
|
%unignore tensorflow::tfprof::PrintModelAnalysis;
|
||||||
|
|
||||||
|
%include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
|
||||||
|
|
||||||
|
%unignoreall
|
@ -24,8 +24,8 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.contrib.tfprof.tools.tfprof import tfprof_log_pb2
|
|
||||||
from tensorflow.python.framework import ops
|
from tensorflow.python.framework import ops
|
||||||
|
from tensorflow.tools.tfprof import tfprof_log_pb2
|
||||||
|
|
||||||
TRAINABLE_VARIABLES = '_trainable_variables'
|
TRAINABLE_VARIABLES = '_trainable_variables'
|
||||||
REGISTERED_FLOP_STATS = 'flops'
|
REGISTERED_FLOP_STATS = 'flops'
|
||||||
@ -85,7 +85,7 @@ def _get_logged_ops(graph, run_meta=None):
|
|||||||
if node.name not in logged_ops:
|
if node.name not in logged_ops:
|
||||||
entry = tfprof_log_pb2.OpLogEntry()
|
entry = tfprof_log_pb2.OpLogEntry()
|
||||||
entry.name = node.name
|
entry.name = node.name
|
||||||
entry.float_ops = stats.value
|
entry.float_ops = int(stats.value)
|
||||||
logged_ops[entry.name] = entry
|
logged_ops[entry.name] = entry
|
||||||
|
|
||||||
for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
|
for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
|
||||||
|
@ -1317,7 +1317,7 @@ cc_library(
|
|||||||
"platform/regexp.h",
|
"platform/regexp.h",
|
||||||
],
|
],
|
||||||
visibility = [
|
visibility = [
|
||||||
"//tensorflow/contrib/tfprof:__subpackages__",
|
"//tensorflow/tools/tfprof:__subpackages__",
|
||||||
],
|
],
|
||||||
deps = [":lib_internal"],
|
deps = [":lib_internal"],
|
||||||
)
|
)
|
||||||
|
@ -1861,6 +1861,7 @@ tf_py_wrap_cc(
|
|||||||
"//tensorflow/c:tf_status_helper",
|
"//tensorflow/c:tf_status_helper",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core/distributed_runtime:server_lib",
|
"//tensorflow/core/distributed_runtime:server_lib",
|
||||||
|
"//tensorflow/tools/tfprof/internal:print_model_analysis",
|
||||||
"//util/python:python_headers",
|
"//util/python:python_headers",
|
||||||
] + tf_additional_lib_deps(),
|
] + tf_additional_lib_deps(),
|
||||||
)
|
)
|
||||||
|
@ -26,13 +26,13 @@ cc_binary(
|
|||||||
":protos_all_cc",
|
":protos_all_cc",
|
||||||
"//tensorflow/c:c_api",
|
"//tensorflow/c:c_api",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
|
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
|
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
|
|
||||||
"//tensorflow/core:framework_headers_lib",
|
"//tensorflow/core:framework_headers_lib",
|
||||||
"//tensorflow/core:framework_internal",
|
"//tensorflow/core:framework_internal",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
|
"//tensorflow/tools/tfprof/internal:tfprof_options",
|
||||||
|
"//tensorflow/tools/tfprof/internal:tfprof_stats",
|
||||||
|
"//tensorflow/tools/tfprof/internal:tfprof_utils",
|
||||||
"@linenoise//:linenoise",
|
"@linenoise//:linenoise",
|
||||||
],
|
],
|
||||||
)
|
)
|
455
tensorflow/tools/tfprof/README.md
Normal file
455
tensorflow/tools/tfprof/README.md
Normal file
@ -0,0 +1,455 @@
|
|||||||
|
# tfprof: A Profiling Tool for TensorFlow Models
|
||||||
|
|
||||||
|
Internal User Please Use: go/tfprof
|
||||||
|
|
||||||
|
Author: Xin Pan (xpan@google.com, github: panyx0718)
|
||||||
|
|
||||||
|
Consultants: Jon Shlens, Pete Warden
|
||||||
|
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
tfprof is a profiling tool for TensorFlow that analyzes model architectures
|
||||||
|
and measures system performance.
|
||||||
|
|
||||||
|
###Major Features
|
||||||
|
|
||||||
|
1. Measure model parameters, float operations, tensor shapes.
|
||||||
|
2. Measure op execution times, requested memory size and device placement.
|
||||||
|
3. Inspect checkpoint tensors' shapes and their values.
|
||||||
|
4. Explore model based on name scope or graph structure.
|
||||||
|
5. Selectively grouping/filtering/accounting/ordering ops.
|
||||||
|
|
||||||
|
### Interfaces
|
||||||
|
|
||||||
|
[CLI Tutorials](#cli-tutorials):
|
||||||
|
It supports interactive mode for exploration and single-shot mode for
|
||||||
|
scripts. Outputs can be dumped to files or printed in terminal.
|
||||||
|
|
||||||
|
Python API Tutorials: Python API is not released yet.
|
||||||
|
|
||||||
|
## CLI Tutorials
|
||||||
|
|
||||||
|
Tutorials are based on a 32 layers ResNet.
|
||||||
|
TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
1) Start `tfprof` command line tool
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Build the tool.
|
||||||
|
bazel build -c opt tensorflow/tools/tfprof/...
|
||||||
|
|
||||||
|
# Help information, including detail 'option' instructions.
|
||||||
|
bazel-bin/tensorflow/tools/tfprof/tfprof help
|
||||||
|
#
|
||||||
|
# The following commands will start tfprof interactive mode.
|
||||||
|
#
|
||||||
|
# Profile model shapes and parameters only.
|
||||||
|
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||||
|
--graph_path=graph.pbtxt
|
||||||
|
#
|
||||||
|
# Additionally profile checkpoint statistics and values.
|
||||||
|
# Use '-account_type_regexes _checkpoint_variables' to select
|
||||||
|
# checkpoint tensors.
|
||||||
|
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||||
|
--graph_path=graph.pbtxt \
|
||||||
|
--checkpoint_path=model.ckpt
|
||||||
|
#
|
||||||
|
# Additionally profile ops requested memory and timing.
|
||||||
|
# See CLI Input Files section on generating run_meta file.
|
||||||
|
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||||
|
--graph_path=graph.pbtxt \
|
||||||
|
--run_meta_path=run_meta \
|
||||||
|
--checkpoint_path=model.ckpt
|
||||||
|
#
|
||||||
|
# tfprof_log is used to define customized op types and float ops.
|
||||||
|
# Use tfprof_logger.write_op_log() to create tfprof_log.
|
||||||
|
# See 11) in Examples section on generating tfprof_log file.
|
||||||
|
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||||
|
--graph_path=graph.pbtxt \
|
||||||
|
--run_meta_path=run_meta \
|
||||||
|
--op_log_path=tfprof_log \
|
||||||
|
--checkpoint_path=model.ckpt
|
||||||
|
```
|
||||||
|
Note that `graph.pbtxt` is an ASCII text format.
|
||||||
|
|
||||||
|
2) Press enter to show the default options
|
||||||
|
|
||||||
|
```shell
|
||||||
|
tfprof>
|
||||||
|
tfprof>
|
||||||
|
-max_depth 4
|
||||||
|
-min_bytes 0
|
||||||
|
-min_micros 0
|
||||||
|
-min_params 0
|
||||||
|
-min_float_ops 0
|
||||||
|
-device_regexes .*
|
||||||
|
-order_by name
|
||||||
|
-account_type_regexes Variable
|
||||||
|
-start_name_regexes .*
|
||||||
|
-trim_name_regexes
|
||||||
|
-show_name_regexes .*
|
||||||
|
-hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
|
||||||
|
-account_displayed_op_only false
|
||||||
|
# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
|
||||||
|
# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
|
||||||
|
-select params
|
||||||
|
-viz false
|
||||||
|
-dump_to_file
|
||||||
|
```
|
||||||
|
|
||||||
|
3) I want to see the `BatchNorm`'s gamma value in checkpoint.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Requires --graph_path, --checkpoint_path.
|
||||||
|
tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
|
||||||
|
_TFProfRoot ()
|
||||||
|
unit_1_0/shared_activation/init_bn/gamma ()
|
||||||
|
[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
|
||||||
|
unit_1_0/sub2/bn2/gamma ()
|
||||||
|
[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
|
||||||
|
```
|
||||||
|
|
||||||
|
4) I want to see my checkpoint tensors shape and number of parameters.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Requires --graph_path, --checkpoint_path.
|
||||||
|
# Increase -max_depth to see all tensors.
|
||||||
|
tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
|
||||||
|
_TFProfRoot (--/930.58k params)
|
||||||
|
global_step (0/0 params)
|
||||||
|
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||||
|
pool_logit/DW (64x10, 640/1.28k params)
|
||||||
|
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||||
|
pool_logit/biases (10, 10/20 params)
|
||||||
|
pool_logit/biases/Momentum (10, 10/10 params)
|
||||||
|
unit_last/final_bn/beta (64, 64/128 params)
|
||||||
|
unit_last/final_bn/gamma (64, 64/128 params)
|
||||||
|
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||||
|
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||||
|
```
|
||||||
|
|
||||||
|
5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
|
||||||
|
it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
|
||||||
|
graph dependencies.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Requires --graph_path, --run_meta_path.
|
||||||
|
tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
|
||||||
|
_TFProfRoot (0us/3.61sec)
|
||||||
|
init/init_conv/Conv2D (11.75ms/3.10sec)
|
||||||
|
random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
|
||||||
|
unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
|
||||||
|
unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
|
||||||
|
unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
|
||||||
|
unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
|
||||||
|
```
|
||||||
|
|
||||||
|
6) I want to know the expensive operations during the back propagation.
|
||||||
|
Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
|
||||||
|
command to explore based on name scope hierarchies.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Requires --graph_path, --run_meta_path.
|
||||||
|
tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
|
||||||
|
_TFProfRoot (0us/2.29sec)
|
||||||
|
gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
|
||||||
|
gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
|
||||||
|
gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
|
||||||
|
gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
|
||||||
|
gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
|
||||||
|
gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
7) Show the number of float operations in the model.
|
||||||
|
Note: float operations calculation depends on
|
||||||
|
1) op.RegisterStatistics. If an op doesn’t
|
||||||
|
have RegisterStatistics defined, its float operations cannot be counted.
|
||||||
|
2) fully defined shape is also necessary in order to calculate flops.
|
||||||
|
float operations number is provided by tensorflow::tfprof::OpLog logged from
|
||||||
|
Python API.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Requires --graph_path, --op_log_path.
|
||||||
|
tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
|
||||||
|
_TFProfRoot (0/17.63b flops)
|
||||||
|
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
|
||||||
|
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
|
||||||
|
init/init_conv/Conv2D (113.25m/113.25m flops)
|
||||||
|
pool_logit/xw_plus_b (1.28k/165.12k flops)
|
||||||
|
pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
|
||||||
|
unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||||
|
unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||||
|
unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||||
|
unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
8) Show the number of parameters of all `tf.trainable_variables()` in the model.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Requires --graph_path --op_log_path.
|
||||||
|
# store option for future commands.
|
||||||
|
tfprof> set -account_type_regexes _trainable_variables
|
||||||
|
tfprof> scope -max_depth 4 -select params
|
||||||
|
_TFProfRoot (--/464.15k params)
|
||||||
|
init/init_conv/DW (3x3x3x16, 432/432 params)
|
||||||
|
pool_logit/DW (64x10, 640/640 params)
|
||||||
|
pool_logit/biases (10, 10/10 params)
|
||||||
|
unit_last/final_bn/beta (64, 64/64 params)
|
||||||
|
unit_last/final_bn/gamma (64, 64/64 params)
|
||||||
|
```
|
||||||
|
|
||||||
|
Where does “_trainable_variables” come from? It is from the OpLog file
|
||||||
|
generated by write_op_log() Python API. write_op_log() help users create some
|
||||||
|
common op types implicitly. Users can define their own op types and log it
|
||||||
|
through the write_op_log() API.
|
||||||
|
|
||||||
|
9) What if I’m lazy and don’t want to define op type? I have given my ops
|
||||||
|
well-defined names in my model’s code. And want to use names to select a group
|
||||||
|
of ops. Let’s try it!
|
||||||
|
|
||||||
|
```shell
|
||||||
|
tfprof> set -account_type_regexes .*
|
||||||
|
tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
|
||||||
|
_TFProfRoot (0/18.43k params)
|
||||||
|
unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
|
||||||
|
unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
|
||||||
|
```
|
||||||
|
|
||||||
|
The above command allows you to filter ops that match specific names.
|
||||||
|
`-account_displayed_op_only` asks tfprof to only account ops displayed
|
||||||
|
in terminal. Otherwise, tfprof accounts all ops matched by
|
||||||
|
`-account_type_regexes` recursively even if they are hidden due to some
|
||||||
|
options such as -max_depth.
|
||||||
|
|
||||||
|
10) TensorFlow has built-in op types. For example, built-in op type `Variable`
|
||||||
|
seems to include `Variable's` created by your model. However, be careful when
|
||||||
|
depending on it because TensorFlow creates extra `Variable` ops implicitly and
|
||||||
|
the implicitly created ops can have the same prefix as the `Variable's` you
|
||||||
|
defined.
|
||||||
|
|
||||||
|
In the following example, extra `Variables` are created and “/Momentum” is
|
||||||
|
appended to their names. This might cause you “model capacity” calculation
|
||||||
|
to get wrong.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
|
||||||
|
_TFProfRoot (--/930.58k params)
|
||||||
|
global_step (1/1 params)
|
||||||
|
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||||
|
pool_logit/DW (64x10, 640/1.28k params)
|
||||||
|
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||||
|
pool_logit/biases (10, 10/20 params)
|
||||||
|
pool_logit/biases/Momentum (10, 10/10 params)
|
||||||
|
unit_last/final_bn/beta (64, 64/128 params)
|
||||||
|
unit_last/final_bn/gamma (64, 64/128 params)
|
||||||
|
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||||
|
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
11) A example of defining extra op type for ops using `OpLog`
|
||||||
|
|
||||||
|
First, in Python code, create an `OpLog` proto and add op type
|
||||||
|
information to it:
|
||||||
|
|
||||||
|
```python
|
||||||
|
|
||||||
|
op_log = tfprof_log_pb2.OpLog()
|
||||||
|
entry = op_log.log_entries.add()
|
||||||
|
entry.name = 'pool_logit/DW'
|
||||||
|
entry.types.append('pool_logit')
|
||||||
|
entry = op_log.log_entries.add()
|
||||||
|
entry.name = 'pool_logit/biases'
|
||||||
|
# Alternatively:
|
||||||
|
# var = tf.get_variable(xxx)
|
||||||
|
# entry.name = var.op.name
|
||||||
|
entry.types.append('pool_logit')
|
||||||
|
```
|
||||||
|
|
||||||
|
Second, call write_op_log to write the OpLog proto.
|
||||||
|
|
||||||
|
```python
|
||||||
|
tf.contrib.tfprof.tfprof_logger.write_op_log(
|
||||||
|
sess.graph, /tmp/my_op_log_dir, op_log)
|
||||||
|
```
|
||||||
|
|
||||||
|
Third, when starting the tfprof tool, specify
|
||||||
|
"--op_log_path /tmp/my_op_log_dir/op_log"
|
||||||
|
|
||||||
|
```shell
|
||||||
|
tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
|
||||||
|
_TFProfRoot (--/650 params)
|
||||||
|
pool_logit/DW (64x10, 640/640 params)
|
||||||
|
pool_logit/biases (10, 10/10 params)
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that when you call
|
||||||
|
`tf.contrib.tfprof.tfprof_logger.write_op_log(...)`,
|
||||||
|
the tool adds all `Variables` inside `tf.trainable_variables()` to
|
||||||
|
`_trainable_variables`.
|
||||||
|
|
||||||
|
12) Run tfprof in one-shot mode and dump result to file.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Printed to stdout if --dump_to_file is not set.
|
||||||
|
tfprof scope --graph_path=graph.pbtxt \
|
||||||
|
--max_depth=3 \
|
||||||
|
--dump_to_file="/tmp/dump"
|
||||||
|
Reading Files...
|
||||||
|
Parsing GraphDef...
|
||||||
|
Preparing Views...
|
||||||
|
|
||||||
|
cat /tmp/dump
|
||||||
|
_TFProfRoot (--/930.58k params)
|
||||||
|
global_step (0/0 params)
|
||||||
|
pool_logit/DW (64x10, 640/1.28k params)
|
||||||
|
pool_logit/biases (10, 10/20 params)
|
||||||
|
```
|
||||||
|
|
||||||
|
13) Analyze how balanced Variable are on parameter servers.
|
||||||
|
|
||||||
|
In this tutorial, I'm going to use a seq2seq model, which are split
|
||||||
|
on several gpus at workers and several parameter servers.
|
||||||
|
|
||||||
|
In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
|
||||||
|
gpu0. They share an op_type called 'gpu0'.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
bazel-bin/tensorflow/tools/tfprof/tfprof \
|
||||||
|
--graph_path ~/tfprof/textsum/graph.pbtxt \
|
||||||
|
--run_meta_path ~/tfprof/textsum/run_meta
|
||||||
|
|
||||||
|
# Looks like ps task 1 is holding twice more parameters than task 0.
|
||||||
|
tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
|
||||||
|
_TFProfRoot (--/25.81m params)
|
||||||
|
tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
|
||||||
|
_TFProfRoot (--/58.84m params)
|
||||||
|
```
|
||||||
|
|
||||||
|
### CLI Input Files
|
||||||
|
|
||||||
|
tfprof command line inference (CLI) loads dumped files from a tensorflow model.
|
||||||
|
Convert them into in-memory data structures. To use it, users need to specify
|
||||||
|
the locations of the dumped files. The following are the dumped files loaded
|
||||||
|
by tfprof:
|
||||||
|
|
||||||
|
<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
|
||||||
|
representation of the model. For example, graph.pbtxt written by tf.Supervisor
|
||||||
|
is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
|
||||||
|
using tf.Graph.as_graph_def() or other API.
|
||||||
|
|
||||||
|
<b>--run_meta_path:</b> tensorflow::RunMetadata.
|
||||||
|
Used to get the memory and time consumption of
|
||||||
|
each op of the model. Users need to enable it. For example, the following code
|
||||||
|
snippet writes a RunMetadata file:
|
||||||
|
|
||||||
|
```python
|
||||||
|
run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
|
||||||
|
run_metadata = config_pb2.RunMetadata()
|
||||||
|
# Once a while, call it the get the RunMeta.
|
||||||
|
_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
|
||||||
|
with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
|
||||||
|
f.write(run_metadata.SerializeToString())
|
||||||
|
```
|
||||||
|
|
||||||
|
<b>--op_log_path:</b>
|
||||||
|
tensorflow::tfprof::OpLog. A proto used to provide extra op information
|
||||||
|
for ops. By giving a group of ops a type name, users can easily aggregate the
|
||||||
|
statistics for those ops without accidently missing or including extra ops.
|
||||||
|
tfprof exposes the following Python API to add op information and logging.
|
||||||
|
|
||||||
|
```python
|
||||||
|
tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
|
||||||
|
```
|
||||||
|
|
||||||
|
<b>--checkpoint_path:</b>
|
||||||
|
TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
|
||||||
|
provides checkpointed tensors' values.
|
||||||
|
|
||||||
|
|
||||||
|
## Design
|
||||||
|
|
||||||
|
|
||||||
|
### In-memory representation
|
||||||
|
|
||||||
|
<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
|
||||||
|
similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
|
||||||
|
For example op1 with name “name1/name2” is a child of op2 with name “name1”.
|
||||||
|
|
||||||
|
<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
|
||||||
|
a graph structure. The graph is a “directed acyclic graph” (hopefully), with
|
||||||
|
direction from “output to input”. The direction is design this way so that users
|
||||||
|
can trace from “result” to its “sources”.
|
||||||
|
|
||||||
|
### Command line options
|
||||||
|
|
||||||
|
tfprof’s major goals are to measure system performance and quicly analyze
|
||||||
|
model architectures. Hence, its commands and options should allow users to achieve
|
||||||
|
these 2 goals easily.
|
||||||
|
|
||||||
|
<b>graph:</b> It is expected that users will mostly use graph representation to
|
||||||
|
debug system performance. Hence, tfprof supports graph command, which pulls the
|
||||||
|
graph in-memory representation described above.
|
||||||
|
|
||||||
|
<b>scope:</b> It is expected that some users might want to explore their model
|
||||||
|
statistics using the name scope information they defined in the Python codes.
|
||||||
|
Hence, tfprof supports “scope” command, which pulls the tree in-memory
|
||||||
|
representation.
|
||||||
|
|
||||||
|
<b>set:</b> It is used to store the options so that user doesn’t need to
|
||||||
|
re-type the same option again and again in the follow up command line. Note that
|
||||||
|
tfprof has traditional terminal’s history and auto-complete support.
|
||||||
|
|
||||||
|
<b>help:</b> print help information.
|
||||||
|
|
||||||
|
<b>Options:</b> Run “tfprof help” to get detailed explanations.
|
||||||
|
|
||||||
|
```python
|
||||||
|
"-max_depth",
|
||||||
|
"-min_bytes",
|
||||||
|
"-min_micros",
|
||||||
|
"-min_params",
|
||||||
|
"-min_float_ops",
|
||||||
|
"-order_by",
|
||||||
|
"-account_type_regexes",
|
||||||
|
"-start_name_regexes",
|
||||||
|
"-trim_name_regexes",
|
||||||
|
"-show_name_regexes",
|
||||||
|
"-hide_name_regexes",
|
||||||
|
"-account_displayed_op_only",
|
||||||
|
"-select",
|
||||||
|
"-viz", # Only supported for graph command.
|
||||||
|
"-dump_to_file",
|
||||||
|
```
|
||||||
|
|
||||||
|
A key design is that stats are aggregated from descendants up to ancestors.
|
||||||
|
`-account_type_regexes` is used to decide which ops stat is accounted. It makes
|
||||||
|
decision based on op type. Usually set it to `.*` if no extra type information
|
||||||
|
is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
|
||||||
|
`-min/max` and `-show/hide/trim/start` options are only used the optionally
|
||||||
|
displayed or hide ops based on ops’ name and stats. However, they don’t prevent
|
||||||
|
tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
|
||||||
|
aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
|
||||||
|
an option to break this rule. When it is set, only displayed ops are accounted.
|
||||||
|
|
||||||
|
Regexes are all comma-separated, for example `-show_name_regexes`
|
||||||
|
`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
|
||||||
|
is not expected to show up in op names.
|
||||||
|
|
||||||
|
`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
|
||||||
|
(notice the indent printed) are sorted according to order_by.
|
||||||
|
|
||||||
|
## Future Work
|
||||||
|
|
||||||
|
* Load SummaryWriter event logs so that it can show the latest summary value.
|
||||||
|
|
||||||
|
* Better sorting and aggregation of outputs. Easier comprehension.
|
||||||
|
|
||||||
|
* Currently, shape information is based on `graph.pbtxt`. When the shape
|
||||||
|
information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
|
||||||
|
and `Checkpoint` to complete shape information.
|
@ -1,5 +1,9 @@
|
|||||||
package(
|
package(
|
||||||
default_visibility = ["//tensorflow:__subpackages__"],
|
default_visibility = ["//tensorflow:__subpackages__"],
|
||||||
|
features = [
|
||||||
|
"-layering_check",
|
||||||
|
"-parse_headers",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
licenses(["notice"]) # Apache 2.0
|
licenses(["notice"]) # Apache 2.0
|
||||||
@ -18,10 +22,10 @@ cc_library(
|
|||||||
":tfprof_show",
|
":tfprof_show",
|
||||||
":tfprof_utils",
|
":tfprof_utils",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
"//tensorflow/core:regexp_internal",
|
"//tensorflow/core:regexp_internal",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -49,11 +53,11 @@ cc_library(
|
|||||||
":tfprof_utils",
|
":tfprof_utils",
|
||||||
"//tensorflow/c:c_api",
|
"//tensorflow/c:c_api",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:framework",
|
"//tensorflow/core:framework",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
"//tensorflow/core:regexp_internal",
|
"//tensorflow/core:regexp_internal",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -69,10 +73,10 @@ cc_library(
|
|||||||
":tfprof_tensor",
|
":tfprof_tensor",
|
||||||
":tfprof_utils",
|
":tfprof_utils",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
"//tensorflow/core:regexp_internal",
|
"//tensorflow/core:regexp_internal",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -87,10 +91,10 @@ cc_library(
|
|||||||
":tfprof_tensor",
|
":tfprof_tensor",
|
||||||
":tfprof_utils",
|
":tfprof_utils",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
"//tensorflow/core:regexp_internal",
|
"//tensorflow/core:regexp_internal",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -109,12 +113,12 @@ tf_cc_test(
|
|||||||
":tfprof_stats",
|
":tfprof_stats",
|
||||||
":tfprof_utils",
|
":tfprof_utils",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
"//tensorflow/core:test",
|
"//tensorflow/core:test",
|
||||||
"//tensorflow/core:test_main",
|
"//tensorflow/core:test_main",
|
||||||
"//tensorflow/core:testlib",
|
"//tensorflow/core:testlib",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -138,6 +142,18 @@ cc_library(
|
|||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/core:framework_headers_lib",
|
"//tensorflow/core:framework_headers_lib",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "print_model_analysis_hdr",
|
||||||
|
hdrs = [
|
||||||
|
"print_model_analysis.h",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/core:framework_lite",
|
||||||
|
"//tensorflow/core:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -149,10 +165,11 @@ cc_library(
|
|||||||
":tfprof_options",
|
":tfprof_options",
|
||||||
":tfprof_stats",
|
":tfprof_stats",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
tf_cc_test(
|
tf_cc_test(
|
||||||
@ -170,12 +187,12 @@ tf_cc_test(
|
|||||||
":tfprof_stats",
|
":tfprof_stats",
|
||||||
":tfprof_utils",
|
":tfprof_utils",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
"//tensorflow/core:test",
|
"//tensorflow/core:test",
|
||||||
"//tensorflow/core:test_main",
|
"//tensorflow/core:test_main",
|
||||||
"//tensorflow/core:testlib",
|
"//tensorflow/core:testlib",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -185,9 +202,9 @@ cc_library(
|
|||||||
hdrs = ["tfprof_tensor.h"],
|
hdrs = ["tfprof_tensor.h"],
|
||||||
copts = ["-Wno-sign-compare"],
|
copts = ["-Wno-sign-compare"],
|
||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:framework",
|
"//tensorflow/core:framework",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -203,12 +220,12 @@ tf_cc_test(
|
|||||||
":tfprof_stats",
|
":tfprof_stats",
|
||||||
":tfprof_utils",
|
":tfprof_utils",
|
||||||
"//tensorflow/c:checkpoint_reader",
|
"//tensorflow/c:checkpoint_reader",
|
||||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:protos_all_cc",
|
"//tensorflow/core:protos_all_cc",
|
||||||
"//tensorflow/core:test",
|
"//tensorflow/core:test",
|
||||||
"//tensorflow/core:test_main",
|
"//tensorflow/core:test_main",
|
||||||
"//tensorflow/core:testlib",
|
"//tensorflow/core:testlib",
|
||||||
|
"//tensorflow/tools/tfprof:protos_all_cc",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
@ -13,20 +13,26 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
|
#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
||||||
const string* op_log, const string* command,
|
const string* op_log, const string* command,
|
||||||
const Options* options) {
|
const string* options) {
|
||||||
CHECK(graph) << "graph mustn't be null";
|
CHECK(graph) << "graph mustn't be null";
|
||||||
CHECK(command) << "command mustn't be null";
|
CHECK(command) << "command mustn't be null";
|
||||||
CHECK(options) << "options mustn't be null";
|
CHECK(options) << "options mustn't be null";
|
||||||
@ -50,16 +56,18 @@ string PrintModelAnalysis(const string* graph, const string* run_meta,
|
|||||||
TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
|
TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
|
||||||
std::move(op_log_ptr), std::move(ckpt_reader));
|
std::move(op_log_ptr), std::move(ckpt_reader));
|
||||||
|
|
||||||
if (options->dump_to_file.empty()) {
|
Options opts = Options::FromProtoStr(*options);
|
||||||
|
|
||||||
|
if (opts.dump_to_file.empty()) {
|
||||||
printf("\n=========================Options=============================\n");
|
printf("\n=========================Options=============================\n");
|
||||||
printf("%s", options->ToString().c_str());
|
printf("%s", opts.ToString().c_str());
|
||||||
printf("\n==================Model Analysis Report======================\n");
|
printf("\n==================Model Analysis Report======================\n");
|
||||||
TFProfNode root(tf_stats.PrintGraph(*command, *options));
|
TFProfNode root(tf_stats.PrintGraph(*command, opts));
|
||||||
printf("\n======================End of Report==========================\n");
|
printf("\n======================End of Report==========================\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
return root.SerializeAsString();
|
return root.SerializeAsString();
|
||||||
}
|
}
|
||||||
return tf_stats.PrintGraph(*command, *options).SerializeAsString();
|
return tf_stats.PrintGraph(*command, opts).SerializeAsString();
|
||||||
}
|
}
|
||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
#include "tensorflow/core/framework/types.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
|
class Options;
|
||||||
// ***This API is only for swig.***
|
// ***This API is only for swig. Don't user it directory!***
|
||||||
//
|
//
|
||||||
// Interface defined for Python API swig. Calls the tfprof core API.
|
// Interface defined for Python API swig. Calls the tfprof core API.
|
||||||
// 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
|
// 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
|
||||||
@ -37,9 +32,9 @@ namespace tfprof {
|
|||||||
// if not available.
|
// if not available.
|
||||||
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
||||||
const string* op_log, const string* command,
|
const string* op_log, const string* command,
|
||||||
const Options* options);
|
const string* options);
|
||||||
|
|
||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -34,4 +34,4 @@ static const char* const kCkptVarType = "_checkpoint_variables";
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
|
||||||
#include "tensorflow/core/lib/strings/strcat.h"
|
#include "tensorflow/core/lib/strings/strcat.h"
|
||||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||||
#include "tensorflow/core/platform/regexp.h"
|
#include "tensorflow/core/platform/regexp.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
@ -16,8 +16,8 @@ limitations under the License.
|
|||||||
// Build a graph structure based on op inputs/outputs. The graph is a directed
|
// Build a graph structure based on op inputs/outputs. The graph is a directed
|
||||||
// acyclic graph pointing *from outputs to inputs*.
|
// acyclic graph pointing *from outputs to inputs*.
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||||
|
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -27,13 +27,13 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -113,4 +113,4 @@ class TFGraph : public TFShow {
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||||
|
|
||||||
#include "tensorflow/core/framework/allocation_description.pb.h"
|
#include "tensorflow/core/framework/allocation_description.pb.h"
|
||||||
#include "tensorflow/core/framework/tensor_description.pb.h"
|
#include "tensorflow/core/framework/tensor_description.pb.h"
|
@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/core/framework/allocation_description.pb.h"
|
#include "tensorflow/core/framework/allocation_description.pb.h"
|
||||||
#include "tensorflow/core/framework/attr_value.pb.h"
|
#include "tensorflow/core/framework/attr_value.pb.h"
|
||||||
#include "tensorflow/core/framework/node_def.pb.h"
|
#include "tensorflow/core/framework/node_def.pb.h"
|
||||||
@ -29,6 +28,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/framework/tensor_description.pb.h"
|
#include "tensorflow/core/framework/tensor_description.pb.h"
|
||||||
#include "tensorflow/core/framework/tensor_shape.pb.h"
|
#include "tensorflow/core/framework/tensor_shape.pb.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -103,4 +103,4 @@ class TFNode {
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
@ -13,13 +13,41 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
|
||||||
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
|
#include "tensorflow/core/lib/strings/str_util.h"
|
||||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_options.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
|
|
||||||
|
Options Options::FromProtoStr(const string& opts_proto_str) {
|
||||||
|
OptionsProto opts_pb;
|
||||||
|
CHECK(opts_pb.ParseFromString(opts_proto_str));
|
||||||
|
Options opts(
|
||||||
|
opts_pb.max_depth(), opts_pb.min_bytes(), opts_pb.min_micros(),
|
||||||
|
opts_pb.min_params(), opts_pb.min_float_ops(),
|
||||||
|
std::vector<string>(opts_pb.device_regexes().begin(),
|
||||||
|
opts_pb.device_regexes().end()),
|
||||||
|
opts_pb.order_by(),
|
||||||
|
std::vector<string>(opts_pb.account_type_regexes().begin(),
|
||||||
|
opts_pb.account_type_regexes().end()),
|
||||||
|
std::vector<string>(opts_pb.start_name_regexes().begin(),
|
||||||
|
opts_pb.start_name_regexes().end()),
|
||||||
|
std::vector<string>(opts_pb.trim_name_regexes().begin(),
|
||||||
|
opts_pb.trim_name_regexes().end()),
|
||||||
|
std::vector<string>(opts_pb.show_name_regexes().begin(),
|
||||||
|
opts_pb.show_name_regexes().end()),
|
||||||
|
std::vector<string>(opts_pb.hide_name_regexes().begin(),
|
||||||
|
opts_pb.hide_name_regexes().end()),
|
||||||
|
opts_pb.account_displayed_op_only(),
|
||||||
|
std::vector<string>(opts_pb.select().begin(), opts_pb.select().end()),
|
||||||
|
opts_pb.viz(), opts_pb.dump_to_file());
|
||||||
|
return opts;
|
||||||
|
}
|
||||||
|
|
||||||
string Options::ToString() const {
|
string Options::ToString() const {
|
||||||
const string s = strings::Printf(
|
const string s = strings::Printf(
|
||||||
"%-28s%d\n"
|
"%-28s%d\n"
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -22,8 +22,6 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/core/framework/types.h"
|
#include "tensorflow/core/framework/types.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
|
||||||
#include "tensorflow/core/lib/strings/str_util.h"
|
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -62,6 +60,8 @@ static const char* const kCmds[] = {
|
|||||||
|
|
||||||
struct Options {
|
struct Options {
|
||||||
public:
|
public:
|
||||||
|
static Options FromProtoStr(const string& opts_proto_str);
|
||||||
|
|
||||||
virtual ~Options() {}
|
virtual ~Options() {}
|
||||||
Options(int max_depth, tensorflow::int64 min_bytes,
|
Options(int max_depth, tensorflow::int64 min_bytes,
|
||||||
tensorflow::int64 min_micros, tensorflow::int64 min_params,
|
tensorflow::int64 min_micros, tensorflow::int64 min_params,
|
||||||
@ -116,4 +116,4 @@ struct Options {
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "tensorflow/c/c_api.h"
|
#include "tensorflow/c/c_api.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
|
||||||
#include "tensorflow/core/framework/tensor.h"
|
#include "tensorflow/core/framework/tensor.h"
|
||||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||||
#include "tensorflow/core/platform/regexp.h"
|
#include "tensorflow/core/platform/regexp.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
@ -17,8 +17,8 @@ limitations under the License.
|
|||||||
// For example, 'name1/name2' is a child of 'name1'.
|
// For example, 'name1/name2' is a child of 'name1'.
|
||||||
// Stats are aggregated from descendants from ancestors.
|
// Stats are aggregated from descendants from ancestors.
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -26,13 +26,13 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -85,4 +85,4 @@ class TFScope : public TFShow {
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <set>
|
#include <set>
|
@ -15,23 +15,23 @@ limitations under the License.
|
|||||||
|
|
||||||
// Parent class and utilities for tfprof_graph and tfprof_scope.
|
// Parent class and utilities for tfprof_graph and tfprof_scope.
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -124,4 +124,4 @@ class TFShow {
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
@ -13,30 +13,30 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||||
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/lib/io/path.h"
|
#include "tensorflow/core/lib/io/path.h"
|
||||||
#include "tensorflow/core/platform/env.h"
|
#include "tensorflow/core/platform/env.h"
|
||||||
#include "tensorflow/core/platform/test.h"
|
#include "tensorflow/core/platform/test.h"
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
class TFProfShowTest : public ::testing::Test {
|
class TFProfShowTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
TFProfShowTest() {
|
TFProfShowTest() {
|
||||||
string graph_path = io::JoinPath(
|
string graph_path =
|
||||||
testing::TensorFlowSrcRoot(),
|
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
"tools/tfprof/internal/testdata/graph.pbtxt");
|
||||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||||
|
|
||||||
@ -44,19 +44,18 @@ class TFProfShowTest : public ::testing::Test {
|
|||||||
new tensorflow::RunMetadata());
|
new tensorflow::RunMetadata());
|
||||||
string run_meta_path =
|
string run_meta_path =
|
||||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
|
"tools/tfprof/internal/testdata/run_meta");
|
||||||
TF_CHECK_OK(
|
TF_CHECK_OK(
|
||||||
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
||||||
|
|
||||||
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
||||||
string op_log_path = io::JoinPath(
|
string op_log_path =
|
||||||
testing::TensorFlowSrcRoot(),
|
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
|
"tools/tfprof/internal/testdata/tfprof_log");
|
||||||
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
||||||
|
|
||||||
string ckpt_path =
|
string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
"tools/tfprof/internal/testdata/ckpt");
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
|
||||||
TF_Status* status = TF_NewStatus();
|
TF_Status* status = TF_NewStatus();
|
||||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
new checkpoint::CheckpointReader(ckpt_path, status));
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <utility>
|
#include <utility>
|
@ -20,8 +20,8 @@ limitations under the License.
|
|||||||
// 3. Accept command and options to selectively aggregate stats for analysis
|
// 3. Accept command and options to selectively aggregate stats for analysis
|
||||||
// and print out the results.
|
// and print out the results.
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -29,20 +29,20 @@ limitations under the License.
|
|||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/attr_value.pb.h"
|
#include "tensorflow/core/framework/attr_value.pb.h"
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/framework/step_stats.pb.h"
|
#include "tensorflow/core/framework/step_stats.pb.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -79,4 +79,4 @@ class TFStats {
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
@ -13,31 +13,31 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||||
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/lib/io/path.h"
|
#include "tensorflow/core/lib/io/path.h"
|
||||||
#include "tensorflow/core/platform/env.h"
|
#include "tensorflow/core/platform/env.h"
|
||||||
#include "tensorflow/core/platform/protobuf.h"
|
#include "tensorflow/core/platform/protobuf.h"
|
||||||
#include "tensorflow/core/platform/test.h"
|
#include "tensorflow/core/platform/test.h"
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
class TFProfStatsTest : public ::testing::Test {
|
class TFProfStatsTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
TFProfStatsTest() {
|
TFProfStatsTest() {
|
||||||
string graph_path = io::JoinPath(
|
string graph_path =
|
||||||
testing::TensorFlowSrcRoot(),
|
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
"tools/tfprof/internal/testdata/graph.pbtxt");
|
||||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||||
|
|
||||||
@ -45,19 +45,18 @@ class TFProfStatsTest : public ::testing::Test {
|
|||||||
new tensorflow::RunMetadata());
|
new tensorflow::RunMetadata());
|
||||||
string run_meta_path =
|
string run_meta_path =
|
||||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
|
"tools/tfprof/internal/testdata/run_meta");
|
||||||
TF_CHECK_OK(
|
TF_CHECK_OK(
|
||||||
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
||||||
|
|
||||||
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
||||||
string op_log_path = io::JoinPath(
|
string op_log_path =
|
||||||
testing::TensorFlowSrcRoot(),
|
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
|
"tools/tfprof/internal/testdata/tfprof_log");
|
||||||
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
||||||
|
|
||||||
string ckpt_path =
|
string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
"tools/tfprof/internal/testdata/ckpt");
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
|
||||||
TF_Status* status = TF_NewStatus();
|
TF_Status* status = TF_NewStatus();
|
||||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
new checkpoint::CheckpointReader(ckpt_path, status));
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
@ -19,16 +19,16 @@ limitations under the License.
|
|||||||
// is not supported by TensorFlow CheckPointReader library, though it is
|
// is not supported by TensorFlow CheckPointReader library, though it is
|
||||||
// supported in current code.
|
// supported in current code.
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||||
|
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/tensor.h"
|
#include "tensorflow/core/framework/tensor.h"
|
||||||
#include "tensorflow/core/lib/strings/numbers.h"
|
#include "tensorflow/core/lib/strings/numbers.h"
|
||||||
#include "tensorflow/core/lib/strings/strcat.h"
|
#include "tensorflow/core/lib/strings/strcat.h"
|
||||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -117,4 +117,4 @@ class TFProfTensor {
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
@ -14,34 +14,33 @@ limitations under the License.
|
|||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/lib/io/path.h"
|
#include "tensorflow/core/lib/io/path.h"
|
||||||
#include "tensorflow/core/platform/protobuf.h"
|
#include "tensorflow/core/platform/protobuf.h"
|
||||||
#include "tensorflow/core/platform/test.h"
|
#include "tensorflow/core/platform/test.h"
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
class TFProfTensorTest : public ::testing::Test {
|
class TFProfTensorTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
TFProfTensorTest() {
|
TFProfTensorTest() {
|
||||||
string graph_path = io::JoinPath(
|
string graph_path =
|
||||||
testing::TensorFlowSrcRoot(),
|
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
"tools/tfprof/internal/testdata/graph.pbtxt");
|
||||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||||
|
|
||||||
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
|
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
|
||||||
std::unique_ptr<OpLog> op_log_pb;
|
std::unique_ptr<OpLog> op_log_pb;
|
||||||
|
|
||||||
string ckpt_path =
|
string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
"tools/tfprof/internal/testdata/ckpt");
|
||||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
|
||||||
TF_Status* status = TF_NewStatus();
|
TF_Status* status = TF_NewStatus();
|
||||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
new checkpoint::CheckpointReader(ckpt_path, status));
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
#include "tensorflow/core/platform/env.h"
|
#include "tensorflow/core/platform/env.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
@ -47,4 +47,4 @@ void PrintHelp();
|
|||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
@ -24,10 +24,6 @@ limitations under the License.
|
|||||||
#include "linenoise.h"
|
#include "linenoise.h"
|
||||||
#include "tensorflow/c/c_api.h"
|
#include "tensorflow/c/c_api.h"
|
||||||
#include "tensorflow/c/checkpoint_reader.h"
|
#include "tensorflow/c/checkpoint_reader.h"
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
|
||||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/framework/types.h"
|
#include "tensorflow/core/framework/types.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
@ -36,6 +32,10 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/platform/init_main.h"
|
#include "tensorflow/core/platform/init_main.h"
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
#include "tensorflow/core/util/command_line_flags.h"
|
#include "tensorflow/core/util/command_line_flags.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
|
||||||
|
#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
|
||||||
|
|
||||||
using tensorflow::str_util::Split;
|
using tensorflow::str_util::Split;
|
||||||
|
|
24
tensorflow/tools/tfprof/tfprof_options.proto
Normal file
24
tensorflow/tools/tfprof/tfprof_options.proto
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package tensorflow.tfprof;
|
||||||
|
|
||||||
|
// Refers to tfprof_options.h/cc for documentation.
|
||||||
|
// Only used to pass tfprof options from Python to C++.
|
||||||
|
message OptionsProto {
|
||||||
|
optional int64 max_depth = 1;
|
||||||
|
optional int64 min_bytes = 2;
|
||||||
|
optional int64 min_micros = 3;
|
||||||
|
optional int64 min_params = 4;
|
||||||
|
optional int64 min_float_ops = 5;
|
||||||
|
repeated string device_regexes = 6;
|
||||||
|
optional string order_by = 7;
|
||||||
|
repeated string account_type_regexes = 8;
|
||||||
|
repeated string start_name_regexes = 9;
|
||||||
|
repeated string trim_name_regexes = 10;
|
||||||
|
repeated string show_name_regexes = 11;
|
||||||
|
repeated string hide_name_regexes = 12;
|
||||||
|
optional bool account_displayed_op_only = 13;
|
||||||
|
repeated string select = 14;
|
||||||
|
optional bool viz = 15;
|
||||||
|
optional string dump_to_file = 16;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user