tfprof: "Swiss Army Knife Tool" To Explore Your Model.
Change: 133968335
This commit is contained in:
parent
64a170499d
commit
e4a63b578f
13
linenoise.BUILD
Normal file
13
linenoise.BUILD
Normal file
@ -0,0 +1,13 @@
|
||||
licenses(["notice"]) # 2-clause BSD
|
||||
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "linenoise",
|
||||
srcs = ["linenoise.c"],
|
||||
hdrs = ["linenoise.h"],
|
||||
)
|
@ -124,6 +124,9 @@ filegroup(
|
||||
"//tensorflow/contrib/tensor_forest/hybrid:all_files",
|
||||
"//tensorflow/contrib/tensorboard:all_files",
|
||||
"//tensorflow/contrib/testing:all_files",
|
||||
"//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:all_files",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
|
||||
"//tensorflow/contrib/training:all_files",
|
||||
"//tensorflow/contrib/util:all_files",
|
||||
"//tensorflow/core:all_files",
|
||||
|
453
tensorflow/contrib/tfprof/README.md
Normal file
453
tensorflow/contrib/tfprof/README.md
Normal file
@ -0,0 +1,453 @@
|
||||
# tfprof: A Profiling Tool for TensorFlow Models
|
||||
|
||||
go/tfprof
|
||||
|
||||
Author: Xin Pan (xpan@google.com, github: panyx0718)
|
||||
|
||||
Consultants: Jon Shlens (shlens@google.com), Pete Warden (petewarden@google.com)
|
||||
|
||||
[TOC]
|
||||
|
||||
## Introduction
|
||||
|
||||
tfprof is a profiling tool for TensorFlow that analyzes model architectures
|
||||
and measures system performance.
|
||||
|
||||
###Major Features
|
||||
|
||||
1. Measure model parameters, float operations, tensor shapes.
|
||||
2. Measure op execution times, requested memory size and device placement.
|
||||
3. Inspect checkpoint tensors' shapes and their values.
|
||||
4. Explore model based on name scope or graph structure.
|
||||
5. Selectively grouping/filtering/accounting/ordering ops.
|
||||
|
||||
### Interfaces
|
||||
|
||||
[CLI Tutorials](#cli-tutorials):
|
||||
It supports interactive mode for exploration and single-shot mode for
|
||||
scripts. Outputs can be dumped to files or printed in terminal.
|
||||
|
||||
Python API Tutorials: Python API is not released yet.
|
||||
|
||||
## CLI Tutorials
|
||||
|
||||
Tutorials are based on a 32 layers ResNet.
|
||||
TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
|
||||
|
||||
### Examples
|
||||
|
||||
1) Start `tfprof` command line tool
|
||||
|
||||
```shell
|
||||
# Build the tool.
|
||||
bazel build -c opt tensorflow/contrib/tfprof/...
|
||||
|
||||
# Help information, including detail 'option' instructions.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
|
||||
#
|
||||
# The following commands will start tfprof interactive mode.
|
||||
#
|
||||
# Profile model shapes and parameters only.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=/graph.pbtxt
|
||||
#
|
||||
# Additionally profile checkpoint statistics and values.
|
||||
# Use '-account_type_regexes _checkpoint_variables' to select
|
||||
# checkpoint tensors.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--checkpoint_path=model.ckpt
|
||||
#
|
||||
# Additionally profile ops requested memory and timing.
|
||||
# See CLI Input Files section on generating run_meta file.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--run_meta_path=run_meta \
|
||||
--checkpoint_path=model.ckpt
|
||||
#
|
||||
# tfprof_log is used to define customized op types and float ops.
|
||||
# Use tfprof_logger.write_op_log() to create tfprof_log.
|
||||
# See 11) in Examples section on generating tfprof_log file.
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path=graph.pbtxt \
|
||||
--run_meta_path=run_meta \
|
||||
--op_log_path=tfprof_log \
|
||||
--checkpoint_path=model.ckpt
|
||||
```
|
||||
Note that `graph.pbtxt` is an ASCII text format.
|
||||
|
||||
2) Press enter to show the default options
|
||||
|
||||
```shell
|
||||
tfprof>
|
||||
tfprof>
|
||||
-max_depth 4
|
||||
-min_bytes 0
|
||||
-min_micros 0
|
||||
-min_params 0
|
||||
-min_float_ops 0
|
||||
-device_regexes .*
|
||||
-order_by name
|
||||
-account_type_regexes Variable
|
||||
-start_name_regexes .*
|
||||
-trim_name_regexes
|
||||
-show_name_regexes .*
|
||||
-hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
|
||||
-account_displayed_op_only false
|
||||
# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
|
||||
# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
|
||||
-select params
|
||||
-viz false
|
||||
-dump_to_file
|
||||
```
|
||||
|
||||
3) I want to see the `BatchNorm`'s gamma value in checkpoint.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --checkpoint_path.
|
||||
tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
|
||||
_TFProfRoot ()
|
||||
unit_1_0/shared_activation/init_bn/gamma ()
|
||||
[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
|
||||
unit_1_0/sub2/bn2/gamma ()
|
||||
[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
|
||||
```
|
||||
|
||||
4) I want to see my checkpoint tensors shape and number of parameters.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --checkpoint_path.
|
||||
# Increase -max_depth to see all tensors.
|
||||
tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (0/0 params)
|
||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
pool_logit/biases/Momentum (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/128 params)
|
||||
unit_last/final_bn/gamma (64, 64/128 params)
|
||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||
```
|
||||
|
||||
5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
|
||||
it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
|
||||
graph dependencies.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --run_meta_path.
|
||||
tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
|
||||
_TFProfRoot (0us/3.61sec)
|
||||
init/init_conv/Conv2D (11.75ms/3.10sec)
|
||||
random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
|
||||
unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
|
||||
unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
|
||||
unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
|
||||
unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
|
||||
```
|
||||
|
||||
6) I want to know the expensive operations during the back propagation.
|
||||
Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
|
||||
command to explore based on name scope hierarchies.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --run_meta_path.
|
||||
tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
|
||||
_TFProfRoot (0us/2.29sec)
|
||||
gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
|
||||
gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
|
||||
gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
|
||||
gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
|
||||
gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
|
||||
gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
|
||||
...
|
||||
```
|
||||
|
||||
7) Show the number of float operations in the model.
|
||||
Note: float operations calculation depends on
|
||||
1) op.RegisterStatistics. If an op doesn’t
|
||||
have RegisterStatistics defined, its float operations cannot be counted.
|
||||
2) fully defined shape is also necessary in order to calculate flops.
|
||||
float operations number is provided by tensorflow::tfprof::OpLog logged from
|
||||
Python API.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path, --op_log_path.
|
||||
tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
|
||||
_TFProfRoot (0/17.63b flops)
|
||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
|
||||
gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
|
||||
init/init_conv/Conv2D (113.25m/113.25m flops)
|
||||
pool_logit/xw_plus_b (1.28k/165.12k flops)
|
||||
pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
|
||||
unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
|
||||
unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
|
||||
...
|
||||
```
|
||||
|
||||
8) Show the number of parameters of all `tf.trainable_variables()` in the model.
|
||||
|
||||
```shell
|
||||
# Requires --graph_path --op_log_path.
|
||||
# store option for future commands.
|
||||
tfprof> set -account_type_regexes _trainable_variables
|
||||
tfprof> scope -max_depth 4 -select params
|
||||
_TFProfRoot (--/464.15k params)
|
||||
init/init_conv/DW (3x3x3x16, 432/432 params)
|
||||
pool_logit/DW (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/64 params)
|
||||
unit_last/final_bn/gamma (64, 64/64 params)
|
||||
```
|
||||
|
||||
Where does “_trainable_variables” come from? It is from the OpLog file
|
||||
generated by write_op_log() Python API. write_op_log() help users create some
|
||||
common op types implicitly. Users can define their own op types and log it
|
||||
through the write_op_log() API.
|
||||
|
||||
9) What if I’m lazy and don’t want to define op type? I have given my ops
|
||||
well-defined names in my model’s code. And want to use names to select a group
|
||||
of ops. Let’s try it!
|
||||
|
||||
```shell
|
||||
tfprof> set -account_type_regexes .*
|
||||
tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
|
||||
_TFProfRoot (0/18.43k params)
|
||||
unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
|
||||
unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
|
||||
```
|
||||
|
||||
The above command allows you to filter ops that match specific names.
|
||||
`-account_displayed_op_only` asks tfprof to only account ops displayed
|
||||
in terminal. Otherwise, tfprof accounts all ops matched by
|
||||
`-account_type_regexes` recursively even if they are hidden due to some
|
||||
options such as -max_depth.
|
||||
|
||||
10) TensorFlow has built-in op types. For example, built-in op type `Variable`
|
||||
seems to include `Variable's` created by your model. However, be careful when
|
||||
depending on it because TensorFlow creates extra `Variable` ops implicitly and
|
||||
the implicitly created ops can have the same prefix as the `Variable's` you
|
||||
defined.
|
||||
|
||||
In the following example, extra `Variables` are created and “/Momentum” is
|
||||
appended to their names. This might cause you “model capacity” calculation
|
||||
to get wrong.
|
||||
|
||||
```shell
|
||||
tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (1/1 params)
|
||||
init/init_conv/DW (3x3x3x16, 432/864 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/DW/Momentum (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
pool_logit/biases/Momentum (10, 10/10 params)
|
||||
unit_last/final_bn/beta (64, 64/128 params)
|
||||
unit_last/final_bn/gamma (64, 64/128 params)
|
||||
unit_last/final_bn/moving_mean (64, 64/64 params)
|
||||
unit_last/final_bn/moving_variance (64, 64/64 params)
|
||||
```
|
||||
|
||||
|
||||
11) A example of defining extra op type for ops using `OpLog`
|
||||
|
||||
First, in Python code, create an `OpLog` proto and add op type
|
||||
information to it:
|
||||
|
||||
```python
|
||||
op_log = tfprof_log_pb2.OpLog()
|
||||
entry = op_log.log_entries.add()
|
||||
entry.name = 'pool_logit/DW'
|
||||
entry.types.append('pool_logit')
|
||||
entry = op_log.log_entries.add()
|
||||
entry.name = 'pool_logit/biases'
|
||||
# Alternatively:
|
||||
# var = tf.get_variable(xxx)
|
||||
# entry.name = var.op.name
|
||||
entry.types.append('pool_logit')
|
||||
```
|
||||
|
||||
Second, call write_op_log to write the OpLog proto.
|
||||
|
||||
```python
|
||||
tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
|
||||
```
|
||||
|
||||
Third, when starting the tfprof tool, specify
|
||||
"--op_log_path /tmp/my_op_log_dir/op_log"
|
||||
|
||||
```shell
|
||||
tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
|
||||
_TFProfRoot (--/650 params)
|
||||
pool_logit/DW (64x10, 640/640 params)
|
||||
pool_logit/biases (10, 10/10 params)
|
||||
```
|
||||
|
||||
Note that when you call
|
||||
`tfprof_logger.write_op_log(...)`, the tool adds all `Variables` inside
|
||||
`tf.trainable_variables()` to `_trainable_variables`.
|
||||
|
||||
12) Run tfprof in one-shot mode and dump result to file.
|
||||
|
||||
```shell
|
||||
# Printed to stdout if --dump_to_file is not set.
|
||||
tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt \
|
||||
--max_depth 3 \
|
||||
--dump_to_file "/tmp/dump"
|
||||
Reading Files...
|
||||
Parsing GraphDef...
|
||||
Preparing Views...
|
||||
|
||||
cat /tmp/dump
|
||||
_TFProfRoot (--/930.58k params)
|
||||
global_step (0/0 params)
|
||||
pool_logit/DW (64x10, 640/1.28k params)
|
||||
pool_logit/biases (10, 10/20 params)
|
||||
```
|
||||
|
||||
13) Analyze how balanced Variable are on parameter servers.
|
||||
|
||||
In this tutorial, I'm going to use a seq2seq model, which are split
|
||||
on several gpus at workers and several parameter servers.
|
||||
|
||||
In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
|
||||
gpu0. They share an op_type called 'gpu0'.
|
||||
|
||||
```shell
|
||||
bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
|
||||
--graph_path ~/tfprof/textsum/graph.pbtxt \
|
||||
--run_meta_path ~/tfprof/textsum/run_meta
|
||||
|
||||
# Looks like ps task 1 is holding twice more parameters than task 0.
|
||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
|
||||
_TFProfRoot (--/25.81m params)
|
||||
tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
|
||||
_TFProfRoot (--/58.84m params)
|
||||
```
|
||||
|
||||
### CLI Input Files
|
||||
|
||||
tfprof command line inference (CLI) loads dumped files from a tensorflow model.
|
||||
Convert them into in-memory data structures. To use it, users need to specify
|
||||
the locations of the dumped files. The following are the dumped files loaded
|
||||
by tfprof:
|
||||
|
||||
<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
|
||||
representation of the model. For example, graph.pbtxt written by tf.Supervisor
|
||||
is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
|
||||
using tf.Graph.as_graph_def() or other API.
|
||||
|
||||
<b>--run_meta_path:</b> tensorflow::RunMetadata.
|
||||
Used to get the memory and time consumption of
|
||||
each op of the model. Users need to enable it. For example, the following code
|
||||
snippet writes a RunMetadata file:
|
||||
|
||||
```python
|
||||
run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
|
||||
run_metadata = config_pb2.RunMetadata()
|
||||
# Once a while, call it the get the RunMeta.
|
||||
_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
|
||||
with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
|
||||
f.write(run_metadata.SerializeToString())
|
||||
```
|
||||
|
||||
<b>--op_log_path:</b>
|
||||
tensorflow::tfprof::OpLog. A proto used to provide extra op information
|
||||
for ops. By giving a group of ops a type name, users can easily aggregate the
|
||||
statistics for those ops without accidently missing or including extra ops.
|
||||
tfprof exposes the following Python API to add op information and logging.
|
||||
|
||||
```python
|
||||
def write_op_log(graph, log_dir, op_log=None)
|
||||
```
|
||||
|
||||
<b>--checkpoint_path:</b>
|
||||
TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
|
||||
provides checkpointed tensors' values.
|
||||
|
||||
|
||||
## Design
|
||||
|
||||
|
||||
### In-memory representation
|
||||
|
||||
<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
|
||||
similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
|
||||
For example op1 with name “name1/name2” is a child of op2 with name “name1”.
|
||||
|
||||
<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
|
||||
a graph structure. The graph is a “directed acyclic graph” (hopefully), with
|
||||
direction from “output to input”. The direction is design this way so that users
|
||||
can trace from “result” to its “sources”.
|
||||
|
||||
### Command line options
|
||||
|
||||
tfprof’s major goals are to measure system performance and quicly analyze
|
||||
model architectures. Hence, its commands and options should allow users to achieve
|
||||
these 2 goals easily.
|
||||
|
||||
<b>graph:</b> It is expected that users will mostly use graph representation to
|
||||
debug system performance. Hence, tfprof supports graph command, which pulls the
|
||||
graph in-memory representation described above.
|
||||
|
||||
<b>scope:</b> It is expected that some users might want to explore their model
|
||||
statistics using the name scope information they defined in the Python codes.
|
||||
Hence, tfprof supports “scope” command, which pulls the tree in-memory
|
||||
representation.
|
||||
|
||||
<b>set:</b> It is used to store the options so that user doesn’t need to
|
||||
re-type the same option again and again in the follow up command line. Note that
|
||||
tfprof has traditional terminal’s history and auto-complete support.
|
||||
|
||||
<b>help:</b> print help information.
|
||||
|
||||
<b>Options:</b> Run “tfprof help” to get detailed explanations.
|
||||
|
||||
```python
|
||||
"-max_depth",
|
||||
"-min_bytes",
|
||||
"-min_micros",
|
||||
"-min_params",
|
||||
"-min_float_ops",
|
||||
"-order_by",
|
||||
"-account_type_regexes",
|
||||
"-start_name_regexes",
|
||||
"-trim_name_regexes",
|
||||
"-show_name_regexes",
|
||||
"-hide_name_regexes",
|
||||
"-account_displayed_op_only",
|
||||
"-select",
|
||||
"-viz", # Only supported for graph command.
|
||||
"-dump_to_file",
|
||||
```
|
||||
|
||||
A key design is that stats are aggregated from descendants up to ancestors.
|
||||
`-account_type_regexes` is used to decide which ops stat is accounted. It makes
|
||||
decision based on op type. Usually set it to `.*` if no extra type information
|
||||
is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
|
||||
`-min/max` and `-show/hide/trim/start` options are only used the optionally
|
||||
displayed or hide ops based on ops’ name and stats. However, they don’t prevent
|
||||
tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
|
||||
aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
|
||||
an option to break this rule. When it is set, only displayed ops are accounted.
|
||||
|
||||
Regexes are all comma-separated, for example `-show_name_regexes`
|
||||
`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
|
||||
is not expected to show up in op names.
|
||||
|
||||
`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
|
||||
(notice the indent printed) are sorted according to order_by.
|
||||
|
||||
## Future Work
|
||||
|
||||
* Load SummaryWriter event logs so that it can show the latest summary value.
|
||||
|
||||
* Better sorting and aggregation of outputs. Easier comprehension.
|
||||
|
||||
* Currently, shape information is based on `graph.pbtxt`. When the shape
|
||||
information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
|
||||
and `Checkpoint` to complete shape information.
|
31
tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
Normal file
31
tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
Normal file
@ -0,0 +1,31 @@
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
py_library(
|
||||
name = "tfprof_logger",
|
||||
srcs = ["tfprof_logger.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
|
||||
"//tensorflow/python:framework_for_generated_wrappers",
|
||||
],
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Google-internal targets. These must be at the end for syncrepo.
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
["**/*"],
|
||||
exclude = [
|
||||
"**/METADATA",
|
||||
"**/OWNERS",
|
||||
],
|
||||
),
|
||||
visibility = ["//tensorflow:__subpackages__"],
|
||||
)
|
114
tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
Normal file
114
tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
Normal file
@ -0,0 +1,114 @@
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Logging tensorflow::tfprof::OpLog.
|
||||
|
||||
OpLog is used to add extra model information for offline analysis by tfprof.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_log_pb2
|
||||
from tensorflow.python.framework import ops
|
||||
|
||||
TRAINABLE_VARIABLES = '_trainable_variables'
|
||||
REGISTERED_FLOP_STATS = 'flops'
|
||||
|
||||
|
||||
def _get_logged_ops(graph):
|
||||
"""Extract trainable model parameters and FLOPs for ops from a Graph.
|
||||
|
||||
Args:
|
||||
graph: tf.Graph.
|
||||
Returns:
|
||||
logged_ops: dict mapping from op_name to OpLogEntry.
|
||||
"""
|
||||
logged_ops = {}
|
||||
|
||||
graph_def = graph.as_graph_def()
|
||||
for node in graph_def.node:
|
||||
try:
|
||||
stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS)
|
||||
except ValueError:
|
||||
# Catch Exception When shape is incomplete. Skip it.
|
||||
stats = None
|
||||
|
||||
if not stats or not stats.value:
|
||||
continue
|
||||
if node.name not in logged_ops:
|
||||
entry = tfprof_log_pb2.OpLogEntry()
|
||||
entry.name = node.name
|
||||
entry.float_ops = stats.value
|
||||
logged_ops[entry.name] = entry
|
||||
|
||||
for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
|
||||
if v.op.name not in logged_ops:
|
||||
entry = tfprof_log_pb2.OpLogEntry()
|
||||
entry.name = v.op.name
|
||||
entry.types.append(TRAINABLE_VARIABLES)
|
||||
logged_ops[entry.name] = entry
|
||||
else:
|
||||
logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES)
|
||||
return logged_ops
|
||||
|
||||
|
||||
def _merge_default_with_oplog(graph, op_log=None):
|
||||
"""Merge the tfprof default extra info with caller's op_log.
|
||||
|
||||
Args:
|
||||
graph: tf.Graph.
|
||||
op_log: OpLog proto.
|
||||
Returns:
|
||||
tmp_op_log: Merged OpLog proto.
|
||||
"""
|
||||
tmp_op_log = tfprof_log_pb2.OpLog()
|
||||
logged_ops = _get_logged_ops(graph)
|
||||
if not op_log:
|
||||
tmp_op_log.log_entries.extend(logged_ops.values())
|
||||
else:
|
||||
all_ops = dict()
|
||||
for entry in op_log.log_entries:
|
||||
all_ops[entry.name] = entry
|
||||
for op_name, entry in logged_ops.iteritems():
|
||||
if op_name in all_ops:
|
||||
all_ops[op_name].types.extend(entry.types)
|
||||
if entry.float_ops > 0 and all_ops[op_name].float_ops == 0:
|
||||
all_ops[op_name].float_ops = entry.float_ops
|
||||
else:
|
||||
all_ops[op_name] = entry
|
||||
tmp_op_log.log_entries.extend(all_ops.values())
|
||||
return tmp_op_log
|
||||
|
||||
|
||||
def write_op_log(graph, log_dir, op_log=None):
|
||||
"""Log provided 'op_log', and add additional model information below.
|
||||
|
||||
The API also assigns ops in tf.trainable_variables() an op type called
|
||||
'_trainable_variables'.
|
||||
The API also logs 'flops' statistics for ops with op.RegisterStatistics()
|
||||
defined.
|
||||
|
||||
Args:
|
||||
graph: tf.Graph.
|
||||
log_dir: directory to write the log file.
|
||||
op_log: OpLog proto.
|
||||
"""
|
||||
op_log = _merge_default_with_oplog(graph, op_log)
|
||||
|
||||
with tf.gfile.Open(os.path.join(log_dir, 'tfprof_log'), 'w') as log:
|
||||
log.write(op_log.SerializeToString())
|
52
tensorflow/contrib/tfprof/tools/tfprof/BUILD
Normal file
52
tensorflow/contrib/tfprof/tools/tfprof/BUILD
Normal file
@ -0,0 +1,52 @@
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Google-internal targets. These must be at the end for syncrepo.
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
["**/*"],
|
||||
exclude = [
|
||||
"**/METADATA",
|
||||
"**/OWNERS",
|
||||
],
|
||||
),
|
||||
visibility = ["//tensorflow:__subpackages__"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "tfprof",
|
||||
srcs = ["tfprof_main.cc"],
|
||||
deps = [
|
||||
":protos_all_cc",
|
||||
"//tensorflow/c:c_api",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
|
||||
"//tensorflow/core:framework_headers_lib",
|
||||
"//tensorflow/core:framework_internal",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"@linenoise//:linenoise",
|
||||
],
|
||||
)
|
||||
|
||||
load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
|
||||
|
||||
tf_proto_library(
|
||||
name = "protos_all",
|
||||
srcs = glob(
|
||||
["**/*.proto"],
|
||||
),
|
||||
cc_api_version = 2,
|
||||
cc_libs = ["//tensorflow/core:protos_all_cc"],
|
||||
go_api_version = 2,
|
||||
java_api_version = 2,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
227
tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
Normal file
227
tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
Normal file
@ -0,0 +1,227 @@
|
||||
package(
|
||||
default_visibility = ["//tensorflow:__subpackages__"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cc_test")
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_stats",
|
||||
srcs = ["tfprof_stats.cc"],
|
||||
hdrs = ["tfprof_stats.h"],
|
||||
deps = [
|
||||
":tfprof_graph",
|
||||
":tfprof_node",
|
||||
":tfprof_options",
|
||||
":tfprof_scope",
|
||||
":tfprof_show",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_node",
|
||||
srcs = ["tfprof_node.cc"],
|
||||
hdrs = ["tfprof_node.h"],
|
||||
deps = [
|
||||
":tfprof_options",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_scope",
|
||||
srcs = ["tfprof_scope.cc"],
|
||||
hdrs = ["tfprof_scope.h"],
|
||||
deps = [
|
||||
":tfprof_constants",
|
||||
":tfprof_node",
|
||||
":tfprof_options",
|
||||
":tfprof_show",
|
||||
":tfprof_tensor",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:c_api",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_graph",
|
||||
srcs = ["tfprof_graph.cc"],
|
||||
hdrs = ["tfprof_graph.h"],
|
||||
deps = [
|
||||
":tfprof_constants",
|
||||
":tfprof_node",
|
||||
":tfprof_options",
|
||||
":tfprof_show",
|
||||
":tfprof_tensor",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_show",
|
||||
srcs = ["tfprof_show.cc"],
|
||||
hdrs = ["tfprof_show.h"],
|
||||
deps = [
|
||||
":tfprof_constants",
|
||||
":tfprof_node",
|
||||
":tfprof_options",
|
||||
":tfprof_tensor",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "tfprof_show_test",
|
||||
srcs = ["tfprof_show_test.cc"],
|
||||
data = [
|
||||
"testdata/ckpt",
|
||||
"testdata/graph.pbtxt",
|
||||
"testdata/run_meta",
|
||||
"testdata/tfprof_log",
|
||||
],
|
||||
deps = [
|
||||
":tfprof_constants",
|
||||
":tfprof_options",
|
||||
":tfprof_stats",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_utils",
|
||||
srcs = ["tfprof_utils.cc"],
|
||||
hdrs = ["tfprof_utils.h"],
|
||||
deps = [
|
||||
":tfprof_options",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_options",
|
||||
srcs = ["tfprof_options.cc"],
|
||||
hdrs = ["tfprof_options.h"],
|
||||
deps = [
|
||||
"//tensorflow/core:framework_headers_lib",
|
||||
"//tensorflow/core:lib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "print_model_analysis",
|
||||
srcs = ["print_model_analysis.cc"],
|
||||
hdrs = ["print_model_analysis.h"],
|
||||
deps = [
|
||||
":tfprof_options",
|
||||
":tfprof_stats",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "tfprof_stats_test",
|
||||
srcs = ["tfprof_stats_test.cc"],
|
||||
data = [
|
||||
"testdata/ckpt",
|
||||
"testdata/graph.pbtxt",
|
||||
"testdata/run_meta",
|
||||
"testdata/tfprof_log",
|
||||
],
|
||||
deps = [
|
||||
":tfprof_constants",
|
||||
":tfprof_options",
|
||||
":tfprof_stats",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_tensor",
|
||||
srcs = ["tfprof_tensor.cc"],
|
||||
hdrs = ["tfprof_tensor.h"],
|
||||
deps = [
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "tfprof_tensor_test",
|
||||
srcs = ["tfprof_tensor_test.cc"],
|
||||
data = [
|
||||
"testdata/ckpt",
|
||||
"testdata/graph.pbtxt",
|
||||
],
|
||||
deps = [
|
||||
":tfprof_options",
|
||||
":tfprof_stats",
|
||||
":tfprof_utils",
|
||||
"//tensorflow/c:checkpoint_reader",
|
||||
"//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tfprof_constants",
|
||||
hdrs = ["tfprof_constants.h"],
|
||||
deps = [
|
||||
],
|
||||
)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Google-internal targets. These must be at the end for syncrepo.
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
["**/*"],
|
||||
exclude = [
|
||||
"**/METADATA",
|
||||
"**/OWNERS",
|
||||
],
|
||||
),
|
||||
visibility = ["//tensorflow:__subpackages__"],
|
||||
)
|
@ -0,0 +1,65 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
||||
const string* op_log, const string* command,
|
||||
const Options* options) {
|
||||
CHECK(graph) << "graph mustn't be null";
|
||||
CHECK(command) << "command mustn't be null";
|
||||
CHECK(options) << "options mustn't be null";
|
||||
std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
|
||||
graph_ptr->ParseFromString(*graph);
|
||||
|
||||
std::unique_ptr<RunMetadata> run_meta_ptr;
|
||||
if (run_meta) {
|
||||
run_meta_ptr.reset(new RunMetadata());
|
||||
run_meta_ptr->ParseFromString(*run_meta);
|
||||
}
|
||||
|
||||
std::unique_ptr<OpLog> op_log_ptr;
|
||||
if (op_log) {
|
||||
op_log_ptr.reset(new OpLog());
|
||||
op_log_ptr->ParseFromString(*op_log);
|
||||
}
|
||||
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader;
|
||||
|
||||
TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
|
||||
std::move(op_log_ptr), std::move(ckpt_reader));
|
||||
|
||||
if (options->dump_to_file.empty()) {
|
||||
printf("\n=========================Options=============================\n");
|
||||
printf("%s", options->ToString().c_str());
|
||||
printf("\n==================Model Analysis Report======================\n");
|
||||
TFProfNode root(tf_stats.PrintGraph(*command, *options));
|
||||
printf("\n======================End of Report==========================\n");
|
||||
fflush(stdout);
|
||||
return root.SerializeAsString();
|
||||
}
|
||||
return tf_stats.PrintGraph(*command, *options).SerializeAsString();
|
||||
}
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
@ -0,0 +1,45 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
// ***This API is only for swig.***
|
||||
//
|
||||
// Interface defined for Python API swig. Calls the tfprof core API.
|
||||
// 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
|
||||
// OpLog strings, respectively.
|
||||
// 'graph', 'command' and 'options' are required. Others can be nullptr
|
||||
// if not available.
|
||||
string PrintModelAnalysis(const string* graph, const string* run_meta,
|
||||
const string* op_log, const string* command,
|
||||
const Options* options);
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
|
BIN
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
vendored
Normal file
BIN
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
vendored
Normal file
Binary file not shown.
636
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
vendored
Normal file
636
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
vendored
Normal file
@ -0,0 +1,636 @@
|
||||
node {
|
||||
name: "zeros"
|
||||
op: "Const"
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "value"
|
||||
value {
|
||||
tensor {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape {
|
||||
dim {
|
||||
size: 2
|
||||
}
|
||||
dim {
|
||||
size: 6
|
||||
}
|
||||
dim {
|
||||
size: 6
|
||||
}
|
||||
dim {
|
||||
size: 3
|
||||
}
|
||||
}
|
||||
float_val: 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW"
|
||||
op: "Variable"
|
||||
attr {
|
||||
key: "container"
|
||||
value {
|
||||
s: ""
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "shape"
|
||||
value {
|
||||
shape {
|
||||
dim {
|
||||
size: 3
|
||||
}
|
||||
dim {
|
||||
size: 3
|
||||
}
|
||||
dim {
|
||||
size: 3
|
||||
}
|
||||
dim {
|
||||
size: 6
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "shared_name"
|
||||
value {
|
||||
s: ""
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/Initializer/random_normal/shape"
|
||||
op: "Const"
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_INT32
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "value"
|
||||
value {
|
||||
tensor {
|
||||
dtype: DT_INT32
|
||||
tensor_shape {
|
||||
dim {
|
||||
size: 4
|
||||
}
|
||||
}
|
||||
tensor_content: "\003\000\000\000\003\000\000\000\003\000\000\000\006\000\000\000"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/Initializer/random_normal/mean"
|
||||
op: "Const"
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "value"
|
||||
value {
|
||||
tensor {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape {
|
||||
}
|
||||
float_val: 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/Initializer/random_normal/stddev"
|
||||
op: "Const"
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "value"
|
||||
value {
|
||||
tensor {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape {
|
||||
}
|
||||
float_val: 0.0010000000475
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/Initializer/random_normal/RandomStandardNormal"
|
||||
op: "RandomStandardNormal"
|
||||
input: "DW/Initializer/random_normal/shape"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_INT32
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "seed"
|
||||
value {
|
||||
i: 87654321
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "seed2"
|
||||
value {
|
||||
i: 5
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/Initializer/random_normal/mul"
|
||||
op: "Mul"
|
||||
input: "DW/Initializer/random_normal/RandomStandardNormal"
|
||||
input: "DW/Initializer/random_normal/stddev"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/Initializer/random_normal"
|
||||
op: "Add"
|
||||
input: "DW/Initializer/random_normal/mul"
|
||||
input: "DW/Initializer/random_normal/mean"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/Assign"
|
||||
op: "Assign"
|
||||
input: "DW"
|
||||
input: "DW/Initializer/random_normal"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "use_locking"
|
||||
value {
|
||||
b: true
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "validate_shape"
|
||||
value {
|
||||
b: true
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW/read"
|
||||
op: "Identity"
|
||||
input: "DW"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "Conv2D"
|
||||
op: "Conv2D"
|
||||
input: "zeros"
|
||||
input: "DW/read"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "data_format"
|
||||
value {
|
||||
s: "NHWC"
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "padding"
|
||||
value {
|
||||
s: "SAME"
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "strides"
|
||||
value {
|
||||
list {
|
||||
i: 1
|
||||
i: 2
|
||||
i: 2
|
||||
i: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "use_cudnn_on_gpu"
|
||||
value {
|
||||
b: true
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2"
|
||||
op: "Variable"
|
||||
attr {
|
||||
key: "container"
|
||||
value {
|
||||
s: ""
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "shape"
|
||||
value {
|
||||
shape {
|
||||
dim {
|
||||
size: 2
|
||||
}
|
||||
dim {
|
||||
size: 2
|
||||
}
|
||||
dim {
|
||||
size: 6
|
||||
}
|
||||
dim {
|
||||
size: 12
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "shared_name"
|
||||
value {
|
||||
s: ""
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/Initializer/random_normal/shape"
|
||||
op: "Const"
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_INT32
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "value"
|
||||
value {
|
||||
tensor {
|
||||
dtype: DT_INT32
|
||||
tensor_shape {
|
||||
dim {
|
||||
size: 4
|
||||
}
|
||||
}
|
||||
tensor_content: "\002\000\000\000\002\000\000\000\006\000\000\000\014\000\000\000"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/Initializer/random_normal/mean"
|
||||
op: "Const"
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "value"
|
||||
value {
|
||||
tensor {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape {
|
||||
}
|
||||
float_val: 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/Initializer/random_normal/stddev"
|
||||
op: "Const"
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "value"
|
||||
value {
|
||||
tensor {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape {
|
||||
}
|
||||
float_val: 0.0010000000475
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/Initializer/random_normal/RandomStandardNormal"
|
||||
op: "RandomStandardNormal"
|
||||
input: "DW2/Initializer/random_normal/shape"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_INT32
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "seed"
|
||||
value {
|
||||
i: 87654321
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "seed2"
|
||||
value {
|
||||
i: 15
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/Initializer/random_normal/mul"
|
||||
op: "Mul"
|
||||
input: "DW2/Initializer/random_normal/RandomStandardNormal"
|
||||
input: "DW2/Initializer/random_normal/stddev"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/Initializer/random_normal"
|
||||
op: "Add"
|
||||
input: "DW2/Initializer/random_normal/mul"
|
||||
input: "DW2/Initializer/random_normal/mean"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/Assign"
|
||||
op: "Assign"
|
||||
input: "DW2"
|
||||
input: "DW2/Initializer/random_normal"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "use_locking"
|
||||
value {
|
||||
b: true
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "validate_shape"
|
||||
value {
|
||||
b: true
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "DW2/read"
|
||||
op: "Identity"
|
||||
input: "DW2"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "_class"
|
||||
value {
|
||||
list {
|
||||
s: "loc:@DW2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "Conv2D_1"
|
||||
op: "Conv2D"
|
||||
input: "Conv2D"
|
||||
input: "DW2/read"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "data_format"
|
||||
value {
|
||||
s: "NHWC"
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "padding"
|
||||
value {
|
||||
s: "SAME"
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "strides"
|
||||
value {
|
||||
list {
|
||||
i: 1
|
||||
i: 2
|
||||
i: 2
|
||||
i: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
key: "use_cudnn_on_gpu"
|
||||
value {
|
||||
b: true
|
||||
}
|
||||
}
|
||||
}
|
||||
versions {
|
||||
producer: 13
|
||||
}
|
22
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
vendored
Normal file
22
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
<EFBFBD>
|
||||
<EFBFBD>
|
||||
%/job:localhost/replica:0/task:0/cpu:0:
|
||||
_SOURCEû¡ˆ§·†Ï (2
|
||||
cpuB_SOURCE = NoOp()H塈§·†Ïa
|
||||
zeros”¢ˆ§·†Ï (2
|
||||
cpu:(&"àcpu0€ýèÉöûBzeros = Const()H<>¢ˆ§·†Ï^
|
||||
DW<10>¢ˆ§·†Ï (2
|
||||
cpu:(&"ˆcpu0à©€ ìûBDW = Variable()H‹¢ˆ§·†Ï`
|
||||
DW2Ÿ¢ˆ§·†Ï (2
|
||||
cpu:(&"€ cpu0 Ÿ€àëûBDW2 = Variable()H‹¢ˆ§·†Ïj
|
||||
DW/read±¢ˆ§·†Ï (2
|
||||
cpu:(&"ˆcpu0à©€ ìûBDW/read = Identity(DW)H¥¢ˆ§·†Ïm
|
||||
DW2/read¸¢ˆ§·†Ï (2
|
||||
cpu:(&"€ cpu0 Ÿ€àëûBDW2/read = Identity(DW2)H§¢ˆ§·†Ïs
|
||||
Conv2D¹¢ˆ§·†Ï P(U2
|
||||
cpu°:(&"°cpu0à«€àìûBConv2D = Conv2D(zeros, DW/read)H¶¢ˆ§·†Ï{
|
||||
Conv2D_1’£ˆ§·†Ï (2
|
||||
cpu€:(&"€cpu0฀àìûB#Conv2D_1 = Conv2D(Conv2D, DW2/read)HŽ£ˆ§·†Ï6
|
||||
_SINK³£ˆ§·†Ï (2
|
||||
cpuB_SINK = NoOp()H£ˆ§·†Ï
|
9
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
vendored
Normal file
9
tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
|
||||
|
||||
Conv2D_1€$
|
||||
|
||||
DW2_trainable_variables
|
||||
|
||||
DW_trainable_variables
|
||||
|
||||
Conv2DÈ-
|
@ -0,0 +1,37 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
// Op name of root of everything. Aggregates all stats.
|
||||
static const char* const kTFProfRoot = "_TFProfRoot";
|
||||
// Op type for nodes that doesn't represent a physical node in the
|
||||
// TensorFlow model. Only exist as a placehold to aggregate children.
|
||||
// For example, kTFProfRoot belongs to this type.
|
||||
static const char* const kTFGraphParent = "_TFGraphParent";
|
||||
static const char* const kTFScopeParent = "_kTFScopeParent";
|
||||
// Op type for tf.trainable_variables().
|
||||
static const char* const kTrainableVarType = "_trainable_variables";
|
||||
// Op type for tensors in the checkpoint file.
|
||||
static const char* const kCkptVarType = "_checkpoint_variables";
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
|
222
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
Normal file
222
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
Normal file
@ -0,0 +1,222 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/regexp.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
GraphNode* TFGraph::CreateParentNode(const string& name) {
|
||||
node_defs_.push_back(std::unique_ptr<NodeDef>(new NodeDef()));
|
||||
node_defs_.back()->set_name(name);
|
||||
node_defs_.back()->set_op(kTFGraphParent);
|
||||
parent_nodes_[name] =
|
||||
std::unique_ptr<TFNode>(new TFNode(node_defs_.back().get()));
|
||||
nodes_map_[name] =
|
||||
std::unique_ptr<GraphNode>(new GraphNode(parent_nodes_[name].get()));
|
||||
return nodes_map_[name].get();
|
||||
}
|
||||
|
||||
void TFGraph::AddNode(TFNode* node) {
|
||||
string name = node->node_def()->name();
|
||||
nodes_map_[name] = std::unique_ptr<GraphNode>(new GraphNode(node));
|
||||
}
|
||||
|
||||
void TFGraph::Build() {
|
||||
if (!roots_.empty()) return;
|
||||
|
||||
std::set<string> nonroots;
|
||||
// Filter out the root nodes (node not input of any other node).
|
||||
for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
|
||||
GraphNode* node = it->second.get();
|
||||
const std::map<string, TFNode*>& inputs = node->node->inputs();
|
||||
for (auto inputs_it = inputs.cbegin(); inputs_it != inputs.cend();
|
||||
inputs_it++) {
|
||||
nonroots.insert(inputs_it->first);
|
||||
auto child_it = nodes_map_.find(inputs_it->first);
|
||||
if (child_it != nodes_map_.end()) {
|
||||
node->children.push_back(child_it->second.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
|
||||
if (nonroots.find(it->first) == nonroots.end()) {
|
||||
roots_.push_back(it->second.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ShowNode* TFGraph::ShowInternal(const Options& opts) {
|
||||
// Search the nodes to start from.
|
||||
std::vector<GraphNode*> roots = roots_;
|
||||
if (opts.start_name_regexes.size() != 1 ||
|
||||
opts.start_name_regexes[0] != ".*") {
|
||||
std::set<string> visited;
|
||||
roots = SearchRoot(roots, opts.start_name_regexes, &visited);
|
||||
}
|
||||
|
||||
GraphNode* root = CreateParentNode(kTFProfRoot);
|
||||
root->children.assign(roots.begin(), roots.end());
|
||||
|
||||
std::map<string, int64> account_visits;
|
||||
Account({root}, opts, &account_visits);
|
||||
|
||||
if (opts.viz) {
|
||||
printf("Visualizing feature disabled...\n");
|
||||
}
|
||||
std::set<string> visits;
|
||||
return PrintGraph({root}, opts, 1, 0, 0, &visits)[0];
|
||||
}
|
||||
|
||||
std::vector<GraphNode*> TFGraph::SearchRoot(
|
||||
const std::vector<GraphNode*>& roots, const std::vector<string>& regexes,
|
||||
std::set<string>* visited) {
|
||||
std::vector<GraphNode*> res;
|
||||
if (roots.empty()) {
|
||||
return res;
|
||||
}
|
||||
for (GraphNode* root : roots) {
|
||||
if (visited->find(root->name()) != visited->end()) continue;
|
||||
visited->insert(root->name());
|
||||
// If the parent is a start point, don't search its children.
|
||||
// Note that its children can still be added as start node through
|
||||
// another route.
|
||||
bool match_start_node = false;
|
||||
for (const string& regex : regexes) {
|
||||
if (RE2::FullMatch(root->name(), regex)) {
|
||||
res.push_back(root);
|
||||
match_start_node = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (match_start_node) {
|
||||
continue;
|
||||
}
|
||||
std::vector<GraphNode*> nroot =
|
||||
SearchRoot(root->children, regexes, visited);
|
||||
res.insert(res.end(), nroot.begin(), nroot.end());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<GraphNode*> TFGraph::PrintGraph(const std::vector<GraphNode*> roots,
|
||||
const Options& opts, int depth,
|
||||
int hidden, int last_ident,
|
||||
std::set<string>* visits) {
|
||||
std::vector<GraphNode*> show_nodes;
|
||||
|
||||
for (GraphNode* node : roots) {
|
||||
if (visits->find(node->name()) != visits->end()) continue;
|
||||
visits->insert(node->name());
|
||||
|
||||
int nhidden = hidden;
|
||||
int nlast_ident = last_ident;
|
||||
bool show = ShouldShow(node, opts, depth);
|
||||
if (show) {
|
||||
node->formatted_str.clear();
|
||||
if (opts.account_displayed_op_only) {
|
||||
node->ResetTotalStats();
|
||||
node->AddSelfToTotalStats();
|
||||
}
|
||||
nhidden = 0;
|
||||
nlast_ident = (hidden && opts.select.find(kShown[4]) != opts.select.end()
|
||||
? last_ident + 4
|
||||
: last_ident + 2);
|
||||
} else {
|
||||
++nhidden;
|
||||
}
|
||||
|
||||
std::vector<GraphNode*> show_cnodes;
|
||||
if (!ShouldTrim(node, opts.trim_name_regexes)) {
|
||||
show_cnodes = PrintGraph(node->children, opts, depth + 1, nhidden,
|
||||
nlast_ident, visits);
|
||||
}
|
||||
if (show) {
|
||||
show_cnodes = SortNodes(show_cnodes, opts);
|
||||
string children_str;
|
||||
for (GraphNode* sc : show_cnodes) {
|
||||
children_str += sc->formatted_str;
|
||||
node->mutable_proto()->add_children()->MergeFrom(sc->proto());
|
||||
if (opts.account_displayed_op_only) {
|
||||
node->AggregateTotalStats(sc);
|
||||
}
|
||||
}
|
||||
if (hidden && opts.select.find(kShown[4]) != opts.select.end()) {
|
||||
node->formatted_str = strings::Printf(
|
||||
"%s...hidden %d...\n", string(last_ident, ' ').c_str(), hidden);
|
||||
node->formatted_str +=
|
||||
strings::Printf(" %s%s\n", string(last_ident, ' ').c_str(),
|
||||
node->Format(opts).c_str());
|
||||
} else {
|
||||
node->formatted_str =
|
||||
strings::Printf("%s%s\n", string(last_ident, ' ').c_str(),
|
||||
node->Format(opts).c_str());
|
||||
}
|
||||
if (opts.select.find(kShown[5]) != opts.select.end()) {
|
||||
std::unique_ptr<TFProfTensor> tfprof_tensor;
|
||||
if (LookUpCheckPoint(node->name(), &tfprof_tensor)) {
|
||||
string value_str;
|
||||
tfprof_tensor->Display(&value_str,
|
||||
node->mutable_proto()->mutable_tensor_value());
|
||||
node->formatted_str += value_str;
|
||||
}
|
||||
}
|
||||
|
||||
node->formatted_str += children_str;
|
||||
show_nodes.push_back(node);
|
||||
} else {
|
||||
show_nodes.insert(show_nodes.end(), show_cnodes.begin(),
|
||||
show_cnodes.end());
|
||||
}
|
||||
}
|
||||
return show_nodes;
|
||||
}
|
||||
|
||||
void TFGraph::Account(const std::vector<GraphNode*>& roots, const Options& opts,
|
||||
std::map<string, int64>* visits) {
|
||||
if (roots.empty()) return;
|
||||
|
||||
for (GraphNode* node : roots) {
|
||||
if (visits->find(node->name()) != visits->end()) continue;
|
||||
(*visits)[node->name()] = 1;
|
||||
node->ResetTotalStats();
|
||||
// Depth-firsth.
|
||||
Account(node->children, opts, visits);
|
||||
|
||||
node->account = ShouldAccount(node, opts);
|
||||
if (node->account) {
|
||||
node->AddSelfToTotalStats();
|
||||
}
|
||||
// Aggregate its children stats.
|
||||
for (GraphNode* c : node->children) {
|
||||
// A node can be visited from multiple parents. Only account once.
|
||||
// "visits==1" is when the node is visited through depth-first search.
|
||||
(*visits)[c->name()] += 1;
|
||||
if ((*visits)[c->name()] > 2) continue;
|
||||
|
||||
node->AggregateTotalStats(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
116
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
Normal file
116
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
Normal file
@ -0,0 +1,116 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Build a graph structure based on op inputs/outputs. The graph is a directed
|
||||
// acyclic graph pointing *from outputs to inputs*.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class GraphNode : public ShowNode {
|
||||
public:
|
||||
explicit GraphNode(TFNode* node) : ShowNode(node) {
|
||||
mutable_proto()->set_inputs(node->inputs().size());
|
||||
mutable_proto()->set_total_inputs(0);
|
||||
}
|
||||
|
||||
void AggregateTotalStats(GraphNode* node) {
|
||||
ShowNode::AggregateTotalStats(node);
|
||||
mutable_proto()->set_total_inputs(proto().total_inputs() +
|
||||
node->proto().total_inputs() + 1);
|
||||
}
|
||||
|
||||
void AddSelfToTotalStats() {
|
||||
ShowNode::AddSelfToTotalStats();
|
||||
mutable_proto()->set_total_inputs(proto().total_inputs() +
|
||||
proto().inputs());
|
||||
}
|
||||
|
||||
void ResetTotalStats() {
|
||||
ShowNode::ResetTotalStats();
|
||||
mutable_proto()->set_total_inputs(0);
|
||||
}
|
||||
|
||||
std::vector<GraphNode*> children;
|
||||
};
|
||||
|
||||
// Organize tensorflow ops in a graph structure, pointing from output ops
|
||||
// to input ops.
|
||||
class TFGraph : public TFShow {
|
||||
public:
|
||||
explicit TFGraph(checkpoint::CheckpointReader* ckpt_reader)
|
||||
: TFShow(ckpt_reader) {}
|
||||
~TFGraph() override {}
|
||||
|
||||
void AddNode(TFNode* node) override;
|
||||
|
||||
void Build() override;
|
||||
|
||||
private:
|
||||
const ShowNode* ShowInternal(const Options& opts) override;
|
||||
|
||||
bool ShouldShowIfExtra(ShowNode* node, const Options& opts,
|
||||
int depth) override {
|
||||
return true;
|
||||
}
|
||||
|
||||
GraphNode* CreateParentNode(const string& name);
|
||||
|
||||
std::vector<GraphNode*> SearchRoot(const std::vector<GraphNode*>& roots,
|
||||
const std::vector<string>& regexes,
|
||||
std::set<string>* visited);
|
||||
|
||||
std::vector<GraphNode*> PrintGraph(const std::vector<GraphNode*> roots,
|
||||
const Options& opts, int depth, int hidden,
|
||||
int last_ident, std::set<string>* visits);
|
||||
|
||||
void VisualizeGraph(GraphNode* root, const Options& opts);
|
||||
|
||||
std::vector<GraphNode*> GenerateGraphDot(
|
||||
GraphNode* root, GraphNode* last_shown, const Options& opts, int depth,
|
||||
int hidden, std::set<string>* declared_nodes,
|
||||
std::set<string>* declared_edges, TFProfNode* parent);
|
||||
|
||||
void Account(const std::vector<GraphNode*>& roots, const Options& opts,
|
||||
std::map<string, int64>* visits);
|
||||
|
||||
std::vector<GraphNode*> roots_;
|
||||
std::vector<std::unique_ptr<NodeDef>> node_defs_;
|
||||
std::map<string, std::unique_ptr<TFNode>> parent_nodes_;
|
||||
std::map<string, std::unique_ptr<GraphNode>> nodes_map_;
|
||||
};
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
|
@ -0,0 +1,47 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
|
||||
#include "tensorflow/core/framework/allocation_description.pb.h"
|
||||
#include "tensorflow/core/framework/tensor_description.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
void TFNode::AddStepStat(const string& device, const NodeExecStats* step_stat) {
|
||||
if (!device.empty()) {
|
||||
// This might override device from GraphDef.
|
||||
device_ = device;
|
||||
}
|
||||
step_stat_ = step_stat;
|
||||
|
||||
op_start_micros_ = step_stat_->all_start_micros();
|
||||
if (step_stat_->op_end_rel_micros() && step_stat_->op_start_rel_micros()) {
|
||||
op_exec_micros_ =
|
||||
step_stat_->op_end_rel_micros() - step_stat_->op_start_rel_micros();
|
||||
}
|
||||
all_spent_micros_ = step_stat_->all_end_rel_micros();
|
||||
|
||||
for (const auto& output : step_stat_->output()) {
|
||||
if (output.has_tensor_description() &&
|
||||
output.tensor_description().has_allocation_description()) {
|
||||
requested_bytes_ += output.tensor_description()
|
||||
.allocation_description()
|
||||
.requested_bytes();
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
106
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
Normal file
106
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
Normal file
@ -0,0 +1,106 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/core/framework/allocation_description.pb.h"
|
||||
#include "tensorflow/core/framework/attr_value.pb.h"
|
||||
#include "tensorflow/core/framework/node_def.pb.h"
|
||||
#include "tensorflow/core/framework/step_stats.pb.h"
|
||||
#include "tensorflow/core/framework/tensor_description.pb.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
class TFNode {
|
||||
public:
|
||||
TFNode(const NodeDef* node)
|
||||
: node_(node),
|
||||
step_stat_(nullptr),
|
||||
op_start_micros_(0),
|
||||
op_exec_micros_(0),
|
||||
all_spent_micros_(0),
|
||||
requested_bytes_(0),
|
||||
float_ops_(0) {
|
||||
if (!node) return;
|
||||
|
||||
for (const auto& attr : node->attr()) {
|
||||
// TODO(xpan): Also consider _output_shapes.
|
||||
if (attr.first != "shape" || !attr.second.has_shape()) continue;
|
||||
if (!shape_.empty()) {
|
||||
fprintf(stderr, "Found duplicated shapes!\n");
|
||||
continue;
|
||||
}
|
||||
std::vector<int64> shape_vec;
|
||||
for (const auto& d : attr.second.shape().dim()) {
|
||||
shape_vec.push_back(d.size());
|
||||
}
|
||||
update_shape(shape_vec);
|
||||
}
|
||||
op_types_.insert(node->op());
|
||||
device_ = node->device();
|
||||
}
|
||||
|
||||
TFNode() : TFNode(nullptr) {}
|
||||
|
||||
void AddInput(TFNode* input) { inputs_[input->node_def()->name()] = input; }
|
||||
|
||||
void AddOpType(const string& op_type) { op_types_.insert(op_type); }
|
||||
|
||||
void AddStepStat(const string& device, const NodeExecStats* step_stat);
|
||||
|
||||
void AddFloatOps(int64 float_ops) { float_ops_ = float_ops; }
|
||||
|
||||
const NodeDef* node_def() { return node_; }
|
||||
const std::map<string, TFNode*>& inputs() { return inputs_; }
|
||||
int64 op_start_micros() { return op_start_micros_; }
|
||||
int64 op_exec_micros() { return op_exec_micros_; }
|
||||
int64 all_spent_micros() { return all_spent_micros_; }
|
||||
int64 requested_byptes() { return requested_bytes_; }
|
||||
int64 float_ops() { return float_ops_; }
|
||||
string device() { return device_; }
|
||||
const std::set<string>& op_types() { return op_types_; }
|
||||
|
||||
const std::vector<int64>& shape() { return shape_; }
|
||||
void update_shape(const std::vector<int64>& shape) { shape_ = shape; }
|
||||
|
||||
private:
|
||||
std::map<string, TFNode*> inputs_;
|
||||
const NodeDef* node_;
|
||||
const NodeExecStats* step_stat_;
|
||||
|
||||
std::vector<int64> shape_;
|
||||
std::set<string> op_types_;
|
||||
string device_;
|
||||
int64 op_start_micros_;
|
||||
int64 op_exec_micros_;
|
||||
int64 all_spent_micros_;
|
||||
int64 requested_bytes_;
|
||||
int64 float_ops_;
|
||||
};
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
|
@ -0,0 +1,57 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
string Options::ToString() const {
|
||||
const string s = strings::Printf(
|
||||
"%-28s%d\n"
|
||||
"%-28s%lld\n"
|
||||
"%-28s%lld\n"
|
||||
"%-28s%lld\n"
|
||||
"%-28s%lld\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n"
|
||||
"%-28s%s\n",
|
||||
kOptions[0], max_depth, kOptions[1], min_bytes, kOptions[2], min_micros,
|
||||
kOptions[3], min_params, kOptions[4], min_float_ops, kOptions[5],
|
||||
str_util::Join(device_regexes, ",").c_str(), kOptions[6],
|
||||
order_by.c_str(), kOptions[7],
|
||||
str_util::Join(account_type_regexes, ",").c_str(), kOptions[8],
|
||||
str_util::Join(start_name_regexes, ",").c_str(), kOptions[9],
|
||||
str_util::Join(trim_name_regexes, ",").c_str(), kOptions[10],
|
||||
str_util::Join(show_name_regexes, ",").c_str(), kOptions[11],
|
||||
str_util::Join(hide_name_regexes, ",").c_str(), kOptions[12],
|
||||
(account_displayed_op_only ? "true" : "false"), kOptions[13],
|
||||
str_util::Join(select, ",").c_str(), kOptions[14],
|
||||
(viz ? "true" : "false"), kOptions[15], dump_to_file.c_str());
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
119
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
Normal file
119
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
Normal file
@ -0,0 +1,119 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
static const char* const kOptions[] = {
|
||||
"-max_depth",
|
||||
"-min_bytes",
|
||||
"-min_micros",
|
||||
"-min_params",
|
||||
"-min_float_ops",
|
||||
"-device_regexes",
|
||||
"-order_by",
|
||||
"-account_type_regexes",
|
||||
"-start_name_regexes",
|
||||
"-trim_name_regexes",
|
||||
"-show_name_regexes",
|
||||
"-hide_name_regexes",
|
||||
"-account_displayed_op_only",
|
||||
"-select",
|
||||
"-viz",
|
||||
"-dump_to_file",
|
||||
};
|
||||
|
||||
static const char* const kOrderBy[] = {
|
||||
"name", "bytes", "micros", "params", "float_ops",
|
||||
};
|
||||
|
||||
// Append Only.
|
||||
static const char* const kShown[] = {
|
||||
"bytes", "micros", "params", "float_ops",
|
||||
"num_hidden_ops", "tensor_value", "device", "op_types",
|
||||
};
|
||||
|
||||
static const char* const kCmds[] = {
|
||||
"scope", "graph", "set", "help",
|
||||
};
|
||||
|
||||
struct Options {
|
||||
public:
|
||||
virtual ~Options() {}
|
||||
Options(int max_depth, tensorflow::int64 min_bytes,
|
||||
tensorflow::int64 min_micros, tensorflow::int64 min_params,
|
||||
tensorflow::int64 min_float_ops,
|
||||
const std::vector<string>& device_regexes, const string& order_by,
|
||||
const std::vector<string>& account_type_regexes,
|
||||
const std::vector<string>& start_name_regexes,
|
||||
const std::vector<string>& trim_name_regexes,
|
||||
const std::vector<string>& show_name_regexes,
|
||||
const std::vector<string>& hide_name_regexes,
|
||||
bool account_displayed_op_only, const std::vector<string>& select,
|
||||
bool viz, const string& dump_to_file = "")
|
||||
: max_depth(max_depth),
|
||||
min_bytes(min_bytes),
|
||||
min_micros(min_micros),
|
||||
min_params(min_params),
|
||||
min_float_ops(min_float_ops),
|
||||
device_regexes(device_regexes),
|
||||
order_by(order_by),
|
||||
account_type_regexes(account_type_regexes),
|
||||
start_name_regexes(start_name_regexes),
|
||||
trim_name_regexes(trim_name_regexes),
|
||||
show_name_regexes(show_name_regexes),
|
||||
hide_name_regexes(hide_name_regexes),
|
||||
account_displayed_op_only(account_displayed_op_only),
|
||||
select(select.begin(), select.end()),
|
||||
viz(viz),
|
||||
dump_to_file(dump_to_file) {}
|
||||
|
||||
string ToString() const;
|
||||
|
||||
int max_depth;
|
||||
tensorflow::int64 min_bytes;
|
||||
tensorflow::int64 min_micros;
|
||||
tensorflow::int64 min_params;
|
||||
tensorflow::int64 min_float_ops;
|
||||
std::vector<string> device_regexes;
|
||||
string order_by;
|
||||
|
||||
std::vector<string> account_type_regexes;
|
||||
std::vector<string> start_name_regexes;
|
||||
std::vector<string> trim_name_regexes;
|
||||
std::vector<string> show_name_regexes;
|
||||
std::vector<string> hide_name_regexes;
|
||||
bool account_displayed_op_only;
|
||||
|
||||
std::set<string> select;
|
||||
bool viz;
|
||||
string dump_to_file;
|
||||
};
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
|
191
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
Normal file
191
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
Normal file
@ -0,0 +1,191 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/c_api.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/regexp.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
ScopeNode* TFScope::CreateParentNode(const string& name) {
|
||||
if (nodes_map_.find(name) != nodes_map_.end()) {
|
||||
return nodes_map_[name].get();
|
||||
}
|
||||
node_defs_.push_back(std::unique_ptr<NodeDef>(new NodeDef()));
|
||||
node_defs_.back()->set_name(name);
|
||||
node_defs_.back()->set_op(kTFScopeParent);
|
||||
parent_nodes_[name] =
|
||||
std::unique_ptr<TFNode>(new TFNode(node_defs_.back().get()));
|
||||
nodes_map_[name] =
|
||||
std::unique_ptr<ScopeNode>(new ScopeNode(parent_nodes_[name].get()));
|
||||
return nodes_map_[name].get();
|
||||
}
|
||||
|
||||
void TFScope::AddNode(TFNode* node) {
|
||||
string name = node->node_def()->name();
|
||||
if (nodes_map_.find(node->node_def()->name()) == nodes_map_.end()) {
|
||||
nodes_map_[name] = std::unique_ptr<ScopeNode>(new ScopeNode(node));
|
||||
}
|
||||
|
||||
auto last_slash = name.find_last_of("/");
|
||||
while (last_slash != name.npos) {
|
||||
name = name.substr(0, last_slash);
|
||||
if (nodes_map_.find(name) == nodes_map_.end()) {
|
||||
CHECK(CreateParentNode(name));
|
||||
}
|
||||
last_slash = name.find_last_of("/");
|
||||
}
|
||||
}
|
||||
|
||||
void TFScope::Build() {
|
||||
if (!roots_.empty()) return;
|
||||
// Found roots, which are nodes without "/".
|
||||
for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
|
||||
ScopeNode* node = it->second.get();
|
||||
auto last_slash = node->name().find_last_of("/");
|
||||
if (last_slash == string::npos) {
|
||||
roots_.push_back(node);
|
||||
} else {
|
||||
const string prefix = node->name().substr(0, last_slash);
|
||||
nodes_map_[prefix]->children.push_back(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ShowNode* TFScope::ShowInternal(const Options& opts) {
|
||||
// Search from roots recursively to find start node, if start_name_regexes
|
||||
// is specified.
|
||||
std::vector<ScopeNode*> roots = roots_;
|
||||
if (opts.start_name_regexes.size() != 1 ||
|
||||
opts.start_name_regexes[0] != ".*") {
|
||||
roots = SearchRoot(roots, opts.start_name_regexes);
|
||||
}
|
||||
|
||||
ScopeNode* root = CreateParentNode(kTFProfRoot);
|
||||
root->children.assign(roots.begin(), roots.end());
|
||||
Account({root}, opts);
|
||||
|
||||
root = PrintScope({root}, opts, 1, 0)[0];
|
||||
return root;
|
||||
}
|
||||
|
||||
std::vector<ScopeNode*> TFScope::SearchRoot(
|
||||
std::vector<ScopeNode*> roots, const std::vector<string>& regexes) {
|
||||
std::vector<ScopeNode*> res;
|
||||
if (roots.empty()) {
|
||||
return res;
|
||||
}
|
||||
for (ScopeNode* root : roots) {
|
||||
bool match_start_node = false;
|
||||
for (const string& regex : regexes) {
|
||||
if (RE2::FullMatch(root->name(), regex)) {
|
||||
res.push_back(root);
|
||||
match_start_node = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (match_start_node) {
|
||||
// Found a start node at this branch, no need to continue.
|
||||
continue;
|
||||
}
|
||||
std::vector<ScopeNode*> nroots = SearchRoot(root->children, regexes);
|
||||
res.insert(res.end(), nroots.begin(), nroots.end());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<ScopeNode*> TFScope::PrintScope(const std::vector<ScopeNode*> roots,
|
||||
const Options& opts, int depth,
|
||||
int last_ident) {
|
||||
std::vector<ScopeNode*> show_nodes;
|
||||
|
||||
for (ScopeNode* node : roots) {
|
||||
int nlast_ident = last_ident;
|
||||
bool show = ShouldShow(node, opts, depth);
|
||||
if (show) {
|
||||
node->formatted_str.clear();
|
||||
if (opts.account_displayed_op_only) {
|
||||
node->ResetTotalStats();
|
||||
node->AddSelfToTotalStats();
|
||||
}
|
||||
nlast_ident += 2;
|
||||
}
|
||||
|
||||
std::vector<ScopeNode*> show_cnodes;
|
||||
if (!ShouldTrim(node, opts.trim_name_regexes)) {
|
||||
show_cnodes = PrintScope(node->children, opts, depth + 1, nlast_ident);
|
||||
}
|
||||
if (show) {
|
||||
show_cnodes = SortNodes(show_cnodes, opts);
|
||||
string children_str;
|
||||
for (ScopeNode* sc : show_cnodes) {
|
||||
children_str += sc->formatted_str;
|
||||
node->mutable_proto()->add_children()->MergeFrom(sc->proto());
|
||||
if (opts.account_displayed_op_only) {
|
||||
node->AggregateTotalStats(sc);
|
||||
}
|
||||
}
|
||||
|
||||
node->formatted_str =
|
||||
strings::Printf("%s%s\n", string(last_ident, ' ').c_str(),
|
||||
node->Format(opts).c_str());
|
||||
|
||||
if (opts.select.find(kShown[5]) != opts.select.end()) {
|
||||
std::unique_ptr<TFProfTensor> tfprof_tensor;
|
||||
if (LookUpCheckPoint(node->name(), &tfprof_tensor)) {
|
||||
string value_str;
|
||||
tfprof_tensor->Display(&value_str,
|
||||
node->mutable_proto()->mutable_tensor_value());
|
||||
node->formatted_str += value_str;
|
||||
}
|
||||
}
|
||||
|
||||
node->formatted_str += children_str;
|
||||
show_nodes.push_back(node);
|
||||
} else {
|
||||
show_nodes.insert(show_nodes.end(), show_cnodes.begin(),
|
||||
show_cnodes.end());
|
||||
}
|
||||
}
|
||||
return show_nodes;
|
||||
}
|
||||
|
||||
void TFScope::Account(const std::vector<ScopeNode*>& roots,
|
||||
const Options& opts) {
|
||||
if (roots.empty()) return;
|
||||
|
||||
for (ScopeNode* node : roots) {
|
||||
node->ResetTotalStats();
|
||||
Account(node->children, opts);
|
||||
|
||||
node->account = ShouldAccount(node, opts);
|
||||
if (node->account) {
|
||||
node->AddSelfToTotalStats();
|
||||
}
|
||||
for (ScopeNode* c : node->children) {
|
||||
node->AggregateTotalStats(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
@ -0,0 +1,88 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Build a tree structure based on the TensorFlow op names.
|
||||
// For example, 'name1/name2' is a child of 'name1'.
|
||||
// Stats are aggregated from descendants from ancestors.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
class ScopeNode : public ShowNode {
|
||||
public:
|
||||
explicit ScopeNode(TFNode* node) : ShowNode(node) {}
|
||||
~ScopeNode() override {}
|
||||
|
||||
void AggregateTotalStats(ScopeNode* node) {
|
||||
ShowNode::AggregateTotalStats(node);
|
||||
}
|
||||
|
||||
void AddSelfToTotalStats() { ShowNode::AddSelfToTotalStats(); }
|
||||
|
||||
void ResetTotalStats() { ShowNode::ResetTotalStats(); }
|
||||
|
||||
std::vector<ScopeNode*> children;
|
||||
};
|
||||
|
||||
class TFScope : public TFShow {
|
||||
public:
|
||||
explicit TFScope(checkpoint::CheckpointReader* ckpt_reader)
|
||||
: TFShow(ckpt_reader) {}
|
||||
~TFScope() override {}
|
||||
|
||||
void AddNode(TFNode* node) override;
|
||||
|
||||
void Build() override;
|
||||
|
||||
private:
|
||||
const ShowNode* ShowInternal(const Options& opts) override;
|
||||
|
||||
ScopeNode* CreateParentNode(const string& name);
|
||||
|
||||
std::vector<ScopeNode*> SearchRoot(std::vector<ScopeNode*> roots,
|
||||
const std::vector<string>& regexes);
|
||||
|
||||
std::vector<ScopeNode*> PrintScope(const std::vector<ScopeNode*> roots,
|
||||
const Options& opts, int depth,
|
||||
int last_ident);
|
||||
|
||||
void Account(const std::vector<ScopeNode*>& roots, const Options& opts);
|
||||
|
||||
std::vector<ScopeNode*> roots_;
|
||||
std::vector<std::unique_ptr<NodeDef>> node_defs_;
|
||||
std::map<string, std::unique_ptr<TFNode>> parent_nodes_;
|
||||
std::map<string, std::unique_ptr<ScopeNode>> nodes_map_;
|
||||
};
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
|
266
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
Normal file
266
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
Normal file
@ -0,0 +1,266 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/regexp.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
ShowNode::ShowNode(TFNode* node) : node(node), account(true) {
|
||||
mutable_proto()->set_name(name());
|
||||
if (!node->device().empty()) {
|
||||
mutable_proto()->set_device(node->device());
|
||||
}
|
||||
mutable_proto()->set_exec_micros(node->op_exec_micros());
|
||||
mutable_proto()->set_requested_bytes(node->requested_byptes());
|
||||
mutable_proto()->set_float_ops(node->float_ops());
|
||||
|
||||
if (!node->shape().empty()) {
|
||||
int64 params = 1;
|
||||
bool complete_shape = true;
|
||||
for (int64 d : node->shape()) {
|
||||
// Sometimes parameters could be <0 when a dim is unknown.
|
||||
if (d < 0) {
|
||||
complete_shape = false;
|
||||
break;
|
||||
}
|
||||
params *= d;
|
||||
}
|
||||
if (complete_shape) {
|
||||
mutable_proto()->set_parameters(proto_.parameters() + params);
|
||||
} else {
|
||||
fprintf(stderr, "Incomplete shape.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string ShowNode::Format(const Options& opts) {
|
||||
if (opts.select.empty()) {
|
||||
return name();
|
||||
}
|
||||
return strings::Printf("%s (%s)", name().c_str(), FormatMeta(opts).c_str());
|
||||
}
|
||||
|
||||
string ShowNode::FormatMeta(const Options& opts) {
|
||||
std::vector<string> info;
|
||||
if (opts.select.find(kShown[2]) != opts.select.end()) {
|
||||
const string shape = FormatShapes(node->shape());
|
||||
if (!shape.empty()) {
|
||||
info.push_back(shape);
|
||||
}
|
||||
string params = FormatNumber(proto().total_parameters()) + " params";
|
||||
if (account) {
|
||||
params = FormatNumber(proto().parameters()) + "/" + params;
|
||||
} else {
|
||||
params = "--/" + params;
|
||||
}
|
||||
info.push_back(params);
|
||||
}
|
||||
if (opts.select.find(kShown[3]) != opts.select.end()) {
|
||||
string fops = FormatNumber(proto().total_float_ops()) + " flops";
|
||||
if (account) {
|
||||
fops = FormatNumber(proto().float_ops()) + "/" + fops;
|
||||
} else {
|
||||
fops = "--/" + fops;
|
||||
}
|
||||
info.push_back(fops);
|
||||
}
|
||||
if (opts.select.find(kShown[0]) != opts.select.end()) {
|
||||
string memory = FormatMemory(proto().total_requested_bytes());
|
||||
if (account) {
|
||||
memory = FormatMemory(proto().requested_bytes()) + "/" + memory;
|
||||
|
||||
} else {
|
||||
memory = "--/" + memory;
|
||||
}
|
||||
info.push_back(memory);
|
||||
}
|
||||
if (opts.select.find(kShown[1]) != opts.select.end()) {
|
||||
string time = FormatTime(proto().total_exec_micros());
|
||||
if (account) {
|
||||
time = FormatTime(proto().exec_micros()) + "/" + time;
|
||||
} else {
|
||||
time = "--/" + time;
|
||||
}
|
||||
info.push_back(time);
|
||||
}
|
||||
if (opts.select.find(kShown[6]) != opts.select.end()) {
|
||||
if (!proto().device().empty()) {
|
||||
info.push_back(proto().device());
|
||||
}
|
||||
}
|
||||
if (opts.select.find(kShown[7]) != opts.select.end()) {
|
||||
std::set<string> op_types = node->op_types();
|
||||
// Device is considered a type.
|
||||
if (!proto().device().empty()) {
|
||||
op_types.insert(proto().device());
|
||||
}
|
||||
info.push_back(str_util::Join(op_types, "|"));
|
||||
}
|
||||
return str_util::Join(info, ", ");
|
||||
}
|
||||
|
||||
TFProfNode* ShowNode::mutable_proto() { return &proto_; }
|
||||
|
||||
const TFProfNode& ShowNode::proto() const { return proto_; }
|
||||
|
||||
void ShowNode::AggregateTotalStats(ShowNode* node) {
|
||||
TFProfNode* node_pb = node->mutable_proto();
|
||||
mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
|
||||
node_pb->total_exec_micros());
|
||||
mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
|
||||
node_pb->total_requested_bytes());
|
||||
mutable_proto()->set_total_parameters(proto().total_parameters() +
|
||||
node_pb->total_parameters());
|
||||
mutable_proto()->set_total_float_ops(proto().total_float_ops() +
|
||||
node_pb->total_float_ops());
|
||||
}
|
||||
|
||||
void ShowNode::AddSelfToTotalStats() {
|
||||
mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
|
||||
proto().exec_micros());
|
||||
mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
|
||||
proto().requested_bytes());
|
||||
mutable_proto()->set_total_parameters(proto().total_parameters() +
|
||||
proto().parameters());
|
||||
mutable_proto()->set_total_float_ops(proto().total_float_ops() +
|
||||
proto().float_ops());
|
||||
}
|
||||
|
||||
void ShowNode::ResetTotalStats() {
|
||||
mutable_proto()->set_total_exec_micros(0);
|
||||
mutable_proto()->set_total_requested_bytes(0);
|
||||
mutable_proto()->set_total_parameters(0);
|
||||
mutable_proto()->set_total_float_ops(0);
|
||||
}
|
||||
|
||||
const TFProfNode& TFShow::Show(const Options& opts) {
|
||||
const ShowNode* root = ShowInternal(opts);
|
||||
if (opts.dump_to_file.empty()) {
|
||||
printf("%s", root->formatted_str.c_str());
|
||||
fflush(stdout);
|
||||
} else {
|
||||
Status s = WriteStringToFile(Env::Default(), opts.dump_to_file,
|
||||
root->formatted_str);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
}
|
||||
}
|
||||
return root->proto();
|
||||
}
|
||||
|
||||
bool TFShow::LookUpCheckPoint(const string& name,
|
||||
std::unique_ptr<TFProfTensor>* tensor) {
|
||||
if (name == kTFProfRoot || !ckpt_reader_ || !tensor) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<Tensor> out_tensor;
|
||||
TF_Status* status = TF_NewStatus();
|
||||
ckpt_reader_->GetTensor(name, &out_tensor, status);
|
||||
if (TF_GetCode(status) != TF_OK) {
|
||||
fprintf(stderr, "%s\n", TF_Message(status));
|
||||
TF_DeleteStatus(status);
|
||||
return false;
|
||||
}
|
||||
tensor->reset(new TFProfTensor(std::move(out_tensor)));
|
||||
TF_DeleteStatus(status);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TFShow::ShouldShow(ShowNode* node, const Options& opts, int depth) {
|
||||
// Always show kTFProfRoot.
|
||||
if (node->name() == kTFProfRoot) return true;
|
||||
|
||||
if (!node->account) return false;
|
||||
|
||||
if (node->proto().requested_bytes() < opts.min_bytes ||
|
||||
node->proto().exec_micros() < opts.min_micros ||
|
||||
node->proto().parameters() < opts.min_params ||
|
||||
node->proto().float_ops() < opts.min_float_ops ||
|
||||
depth > opts.max_depth || !ShouldShowIfExtra(node, opts, depth)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool show = false;
|
||||
if (opts.device_regexes.size() == 1 && opts.device_regexes[0] == ".*") {
|
||||
show = true;
|
||||
} else {
|
||||
for (const string& regex : opts.device_regexes) {
|
||||
if (RE2::FullMatch(node->proto().device(), regex)) {
|
||||
show = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Don't show if device_regexes don't cover it.
|
||||
if (!show) return false;
|
||||
|
||||
show = false;
|
||||
if (opts.show_name_regexes.size() == 1 && opts.show_name_regexes[0] == ".*") {
|
||||
show = true;
|
||||
} else {
|
||||
for (const string& regex : opts.show_name_regexes) {
|
||||
if (RE2::FullMatch(node->name(), regex)) {
|
||||
show = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Don't show if show_name_regexes don't cover it.
|
||||
if (!show) return false;
|
||||
// Don't show if hide_name_regexes cover it.
|
||||
for (const string& regex : opts.hide_name_regexes) {
|
||||
if (RE2::FullMatch(node->name(), regex)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TFShow::ShouldTrim(ShowNode* node, const std::vector<string>& regexes) {
|
||||
for (const string& regex : regexes) {
|
||||
if (RE2::FullMatch(node->name(), regex)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TFShow::ShouldAccount(ShowNode* node, const Options& opts) {
|
||||
if (opts.account_type_regexes.size() == 1 &&
|
||||
opts.account_type_regexes[0] == ".*") {
|
||||
return true;
|
||||
}
|
||||
for (const string& regex : opts.account_type_regexes) {
|
||||
for (const string& type : node->node->op_types()) {
|
||||
if (RE2::FullMatch(type, regex)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (RE2::FullMatch(node->proto().device(), regex)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
127
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
Normal file
127
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
Normal file
@ -0,0 +1,127 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Parent class and utilities for tfprof_graph and tfprof_scope.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class ShowNode {
|
||||
public:
|
||||
explicit ShowNode(TFNode* node);
|
||||
virtual ~ShowNode() {}
|
||||
|
||||
const string& name() const { return node->node_def()->name(); }
|
||||
TFProfNode* mutable_proto();
|
||||
const TFProfNode& proto() const;
|
||||
|
||||
string Format(const Options& opts);
|
||||
|
||||
string FormatMeta(const Options& opts);
|
||||
|
||||
TFNode* node;
|
||||
bool account;
|
||||
string formatted_str;
|
||||
|
||||
protected:
|
||||
void AggregateTotalStats(ShowNode* node);
|
||||
|
||||
void AddSelfToTotalStats();
|
||||
|
||||
void ResetTotalStats();
|
||||
|
||||
TFProfNode proto_;
|
||||
};
|
||||
|
||||
class TFShow {
|
||||
public:
|
||||
explicit TFShow(checkpoint::CheckpointReader* ckpt_reader)
|
||||
: ckpt_reader_(ckpt_reader) {}
|
||||
virtual ~TFShow() {}
|
||||
virtual void AddNode(TFNode* node) = 0;
|
||||
virtual void Build() = 0;
|
||||
const TFProfNode& Show(const Options& opts);
|
||||
|
||||
protected:
|
||||
virtual const ShowNode* ShowInternal(const Options& opts) = 0;
|
||||
|
||||
bool LookUpCheckPoint(const string& name,
|
||||
std::unique_ptr<TFProfTensor>* tensor);
|
||||
|
||||
// Overridden by subclass if extra requirements need to be met.
|
||||
virtual bool ShouldShowIfExtra(ShowNode* node, const Options& opts,
|
||||
int depth) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShouldShow(ShowNode* node, const Options& opts, int depth);
|
||||
|
||||
bool ShouldTrim(ShowNode* node, const std::vector<string>& regexes);
|
||||
|
||||
bool ShouldAccount(ShowNode* node, const Options& opts);
|
||||
|
||||
template <typename T>
|
||||
std::vector<T*> SortNodes(const std::vector<T*>& nodes, const Options& opts) {
|
||||
if (opts.order_by.empty() || nodes.empty()) {
|
||||
return nodes;
|
||||
}
|
||||
std::vector<T*> sorted_nodes = nodes;
|
||||
std::sort(sorted_nodes.begin(), sorted_nodes.end(), [&opts](const T* n1,
|
||||
const T* n2) {
|
||||
if (n1->name() == kTFProfRoot) return true;
|
||||
if (n2->name() == kTFProfRoot) return false;
|
||||
bool name_cmp = n1->name() < n2->name();
|
||||
if (opts.order_by == kOrderBy[0]) {
|
||||
return name_cmp;
|
||||
} else if (opts.order_by == kOrderBy[1]) {
|
||||
return n1->proto().total_requested_bytes() >
|
||||
n2->proto().total_requested_bytes();
|
||||
} else if (opts.order_by == kOrderBy[2]) {
|
||||
return n1->proto().total_exec_micros() >
|
||||
n2->proto().total_exec_micros();
|
||||
} else if (opts.order_by == kOrderBy[3]) {
|
||||
return n1->proto().total_parameters() > n2->proto().total_parameters();
|
||||
} else if (opts.order_by == kOrderBy[4]) {
|
||||
return n1->proto().total_float_ops() > n2->proto().total_float_ops();
|
||||
}
|
||||
return name_cmp;
|
||||
});
|
||||
return sorted_nodes;
|
||||
}
|
||||
|
||||
checkpoint::CheckpointReader* ckpt_reader_;
|
||||
};
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
|
@ -0,0 +1,92 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class TFProfShowTest : public ::testing::Test {
|
||||
protected:
|
||||
TFProfShowTest() {
|
||||
string graph_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||
|
||||
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
|
||||
new tensorflow::RunMetadata());
|
||||
string run_meta_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
|
||||
TF_CHECK_OK(
|
||||
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
||||
|
||||
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
||||
string op_log_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
|
||||
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
||||
|
||||
string ckpt_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
||||
TF_Status* status = TF_NewStatus();
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
||||
CHECK(TF_GetCode(status) == TF_OK);
|
||||
TF_DeleteStatus(status);
|
||||
|
||||
tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
|
||||
std::move(op_log_pb), std::move(ckpt_reader)));
|
||||
}
|
||||
|
||||
std::unique_ptr<TFStats> tf_stats_;
|
||||
};
|
||||
|
||||
TEST_F(TFProfShowTest, DumpScopeMode) {
|
||||
string dump_file = io::JoinPath(testing::TmpDir(), "dump");
|
||||
Options opts(5, 0, 0, 0, 0, {".*"}, "name",
|
||||
{"Variable"}, // accout_type_regexes
|
||||
{".*"}, {""}, {".*"}, {""}, false,
|
||||
{"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
|
||||
false, dump_file);
|
||||
tf_stats_->PrintGraph("scope", opts);
|
||||
|
||||
string dump_str;
|
||||
TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
|
||||
EXPECT_EQ(
|
||||
"_TFProfRoot (--/450 params, --/0 flops, --/1.80KB, --/0us)\n DW "
|
||||
"(3x3x3x6, 162/162 params, 0/0 flops, 648B/648B, 0us/0us)\n DW2 "
|
||||
"(2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/1.15KB, 0us/0us)\n",
|
||||
dump_str);
|
||||
}
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
130
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
Normal file
130
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
Normal file
@ -0,0 +1,130 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/core/framework/step_stats.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
TFStats::TFStats(std::unique_ptr<GraphDef> graph,
|
||||
std::unique_ptr<RunMetadata> run_meta,
|
||||
std::unique_ptr<OpLog> op_log,
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
|
||||
: graph_(std::move(graph)),
|
||||
run_meta_(std::move(run_meta)),
|
||||
op_log_(std::move(op_log)),
|
||||
ckpt_reader_(std::move(ckpt_reader)) {
|
||||
CHECK(graph_) << "Must at least have GraphDef";
|
||||
|
||||
printf("Parsing GraphDef...\n");
|
||||
ParseGraph();
|
||||
if (run_meta_) {
|
||||
printf("Parsing RunMetadata...\n");
|
||||
ParseRunMeta();
|
||||
}
|
||||
if (op_log_) {
|
||||
printf("Parsing OpLog...\n");
|
||||
ParseOpLog();
|
||||
}
|
||||
|
||||
if (ckpt_reader_) {
|
||||
printf("Parsing Checkpoint...\n");
|
||||
for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
|
||||
auto node = nodes_map_.find(v.first);
|
||||
if (node != nodes_map_.end()) {
|
||||
node->second.AddOpType("_checkpoint_variables");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf("Preparing Views...\n");
|
||||
scope_view_ = std::unique_ptr<TFScope>(new TFScope(ckpt_reader_.get()));
|
||||
graph_view_ = std::unique_ptr<TFGraph>(new TFGraph(ckpt_reader_.get()));
|
||||
for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
|
||||
scope_view_->AddNode(&it->second);
|
||||
graph_view_->AddNode(&it->second);
|
||||
}
|
||||
scope_view_->Build();
|
||||
graph_view_->Build();
|
||||
}
|
||||
|
||||
const TFProfNode& TFStats::PrintGraph(const string& cmd, const Options& opts) {
|
||||
if (cmd == kCmds[0]) {
|
||||
return scope_view_->Show(opts);
|
||||
} else if (cmd == kCmds[1]) {
|
||||
return graph_view_->Show(opts);
|
||||
} else {
|
||||
fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
|
||||
return empty_node_;
|
||||
}
|
||||
}
|
||||
|
||||
void TFStats::ParseGraph() {
|
||||
for (const NodeDef& node : graph_->node()) {
|
||||
CHECK(nodes_map_.find(node.name()) == nodes_map_.end());
|
||||
nodes_map_[node.name()] = TFNode(&node);
|
||||
}
|
||||
for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
|
||||
const NodeDef* node_def = it->second.node_def();
|
||||
for (string node_input : node_def->input()) {
|
||||
// input name format can be: "^node:src_output"
|
||||
auto prefix_pos = node_input.find(":");
|
||||
if (prefix_pos != node_input.npos) {
|
||||
node_input.substr(0, prefix_pos);
|
||||
}
|
||||
if (node_input.substr(0, 1) == "^") {
|
||||
node_input = node_input.substr(1);
|
||||
}
|
||||
auto input_node = nodes_map_.find(node_input);
|
||||
if (input_node == nodes_map_.end()) {
|
||||
continue;
|
||||
}
|
||||
it->second.AddInput(&input_node->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TFStats::ParseOpLog() {
|
||||
for (const OpLogEntry& entry : op_log_->log_entries()) {
|
||||
auto node = nodes_map_.find(entry.name());
|
||||
if (node == nodes_map_.end()) continue;
|
||||
for (const string& type : entry.types()) {
|
||||
node->second.AddOpType(type);
|
||||
}
|
||||
if (entry.float_ops()) {
|
||||
node->second.AddFloatOps(entry.float_ops());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TFStats::ParseRunMeta() {
|
||||
if (!run_meta_->has_step_stats()) return;
|
||||
|
||||
for (const auto& dev_stat : run_meta_->step_stats().dev_stats()) {
|
||||
for (const auto& node_stat : dev_stat.node_stats()) {
|
||||
auto node = nodes_map_.find(node_stat.node_name());
|
||||
if (node == nodes_map_.end()) {
|
||||
continue;
|
||||
}
|
||||
node->second.AddStepStat(dev_stat.device(), &node_stat);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
@ -0,0 +1,82 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Core API of tfprof.
|
||||
// 1. Load protos generated from a tensorflow model.
|
||||
// 2. Build in-memory representations of the tensorflow model, annotate the
|
||||
// representation with various stats, such as params,times,memory,etc.
|
||||
// 3. Accept command and options to selectively aggregate stats for analysis
|
||||
// and print out the results.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/attr_value.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/framework/step_stats.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
class TFStats {
|
||||
public:
|
||||
TFStats(std::unique_ptr<GraphDef> graph,
|
||||
std::unique_ptr<RunMetadata> run_meta, std::unique_ptr<OpLog> op_log,
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader);
|
||||
~TFStats() {}
|
||||
|
||||
// Prints the results to stdout. Also returns the printed output in
|
||||
// a proto.
|
||||
const TFProfNode& PrintGraph(const string& cmd, const Options& opts);
|
||||
|
||||
private:
|
||||
void ParseGraph();
|
||||
|
||||
void ParseOpLog();
|
||||
|
||||
void ParseRunMeta();
|
||||
|
||||
std::unique_ptr<TFScope> scope_view_;
|
||||
std::unique_ptr<TFGraph> graph_view_;
|
||||
std::unique_ptr<GraphDef> graph_;
|
||||
std::unique_ptr<RunMetadata> run_meta_;
|
||||
std::unique_ptr<OpLog> op_log_;
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader_;
|
||||
// Store TFNode instead of TFNode* to avoid large number of dynamic alloc.
|
||||
std::map<string, TFNode> nodes_map_;
|
||||
TFProfNode empty_node_;
|
||||
};
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
|
@ -0,0 +1,194 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class TFProfStatsTest : public ::testing::Test {
|
||||
protected:
|
||||
TFProfStatsTest() {
|
||||
string graph_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||
|
||||
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
|
||||
new tensorflow::RunMetadata());
|
||||
string run_meta_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
|
||||
TF_CHECK_OK(
|
||||
ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
|
||||
|
||||
std::unique_ptr<OpLog> op_log_pb(new OpLog());
|
||||
string op_log_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
|
||||
TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
|
||||
|
||||
string ckpt_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
||||
TF_Status* status = TF_NewStatus();
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
||||
CHECK(TF_GetCode(status) == TF_OK);
|
||||
TF_DeleteStatus(status);
|
||||
|
||||
tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
|
||||
std::move(op_log_pb), std::move(ckpt_reader)));
|
||||
}
|
||||
|
||||
std::unique_ptr<TFStats> tf_stats_;
|
||||
};
|
||||
|
||||
TEST_F(TFProfStatsTest, CustomOpType) {
|
||||
Options opts(3, 0, 0, 0, 0, {".*"}, "name",
|
||||
{kTrainableVarType}, // accout_type_regexes
|
||||
{".*"}, {""}, {".*"}, {""}, false,
|
||||
{"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
|
||||
false);
|
||||
const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
|
||||
|
||||
TFProfNode expected;
|
||||
CHECK(protobuf::TextFormat::ParseFromString(
|
||||
"name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
|
||||
"0\ntotal_exec_micros: 0\ntotal_requested_bytes: 1800\ntotal_parameters: "
|
||||
"450\nchildren {\n name: \"DW\"\n exec_micros: 0\n requested_bytes: "
|
||||
"648\n parameters: 162\n total_exec_micros: 0\n "
|
||||
"total_requested_bytes: 648\n total_parameters: 162\n device: "
|
||||
"\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
|
||||
"total_float_ops: 0\n}\nchildren {\n name: \"DW2\"\n exec_micros: 0\n "
|
||||
"requested_bytes: 1152\n parameters: 288\n total_exec_micros: 0\n "
|
||||
"total_requested_bytes: 1152\n total_parameters: 288\n device: "
|
||||
"\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
|
||||
"total_float_ops: 0\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
|
||||
&expected));
|
||||
EXPECT_EQ(expected.DebugString(), root.DebugString());
|
||||
}
|
||||
|
||||
TEST_F(TFProfStatsTest, CheckPointOpType) {
|
||||
Options opts(
|
||||
3, 0, 0, 0, 0, {".*"}, "name", {kCkptVarType}, // accout_type_regexes
|
||||
{".*"}, {""}, {".*"}, {""}, false,
|
||||
{"params", "bytes", "micros", "float_ops", "num_hidden_ops"}, false);
|
||||
const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
|
||||
|
||||
TFProfNode expected;
|
||||
CHECK(protobuf::TextFormat::ParseFromString(
|
||||
"name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
|
||||
"0\ntotal_exec_micros: 0\ntotal_requested_bytes: 1800\ntotal_parameters: "
|
||||
"450\nchildren {\n name: \"DW\"\n exec_micros: 0\n requested_bytes: "
|
||||
"648\n parameters: 162\n total_exec_micros: 0\n "
|
||||
"total_requested_bytes: 648\n total_parameters: 162\n device: "
|
||||
"\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
|
||||
"total_float_ops: 0\n}\nchildren {\n name: \"DW2\"\n exec_micros: 0\n "
|
||||
"requested_bytes: 1152\n parameters: 288\n total_exec_micros: 0\n "
|
||||
"total_requested_bytes: 1152\n total_parameters: 288\n device: "
|
||||
"\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
|
||||
"total_float_ops: 0\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
|
||||
&expected));
|
||||
EXPECT_EQ(expected.DebugString(), root.DebugString());
|
||||
}
|
||||
|
||||
TEST_F(TFProfStatsTest, TestGraph) {
|
||||
Options opts(100, 0, 10000, 0, 0, {".*"}, "name", {".*"},
|
||||
{"cost.*"}, // start_name_regexes
|
||||
{""}, {".*"}, {""}, false,
|
||||
{"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
|
||||
false);
|
||||
const TFProfNode& root = tf_stats_->PrintGraph("graph", opts);
|
||||
|
||||
TFProfNode expected;
|
||||
CHECK(protobuf::TextFormat::ParseFromString(
|
||||
"name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: 0\ninputs: "
|
||||
"0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
|
||||
"0\ntotal_inputs: 0\nfloat_ops: 0\ntotal_float_ops: 0\n",
|
||||
&expected));
|
||||
EXPECT_EQ(expected.DebugString(), root.DebugString());
|
||||
}
|
||||
|
||||
TEST_F(TFProfStatsTest, TestFloatOps) {
|
||||
Options opts(10, 0, 0, 0, 1, {".*"}, "name", {".*"}, {".*"}, {""}, {".*"},
|
||||
{""}, false, {"float_ops"}, false);
|
||||
const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
|
||||
|
||||
TFProfNode expected;
|
||||
CHECK(protobuf::TextFormat::ParseFromString(
|
||||
"name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
|
||||
"0\ntotal_exec_micros: 11\ntotal_requested_bytes: "
|
||||
"5280\ntotal_parameters: 450\nchildren {\n name: \"Conv2D\"\n "
|
||||
"exec_micros: 0\n requested_bytes: 432\n total_exec_micros: 0\n "
|
||||
"total_requested_bytes: 432\n total_parameters: 0\n device: "
|
||||
"\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 5832\n "
|
||||
"total_float_ops: 5832\n}\nchildren {\n name: \"Conv2D_1\"\n "
|
||||
"exec_micros: 10\n requested_bytes: 384\n total_exec_micros: 10\n "
|
||||
"total_requested_bytes: 384\n total_parameters: 0\n device: "
|
||||
"\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 4608\n "
|
||||
"total_float_ops: 4608\n}\nfloat_ops: 0\ntotal_float_ops: 10440\n",
|
||||
&expected));
|
||||
EXPECT_EQ(expected.DebugString(), root.DebugString());
|
||||
}
|
||||
|
||||
TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
|
||||
Options opts(100, 0, 0, 0, 0, {".*"}, "name", {".*"}, {".*"}, {""},
|
||||
{"unit_2_1.*DW"}, // show_name_regexes.
|
||||
{""}, true, // account_displayed_op_only.
|
||||
{"params"}, false);
|
||||
const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
|
||||
|
||||
TFProfNode expected;
|
||||
CHECK(protobuf::TextFormat::ParseFromString(
|
||||
"name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
|
||||
"0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
|
||||
"0\nfloat_ops: 0\ntotal_float_ops: 0\n",
|
||||
&expected));
|
||||
EXPECT_EQ(expected.DebugString(), root.DebugString());
|
||||
}
|
||||
|
||||
TEST_F(TFProfStatsTest, TestShowTensorValue) {
|
||||
Options opts(10, 0, 0, 0, 0, {".*"}, "name", {".*"}, {".*"}, {""},
|
||||
{"unit_1_0.*gamma"}, {""}, false,
|
||||
{"tensor_value"}, // Show tensor value from checkpoint.
|
||||
false);
|
||||
const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
|
||||
TFProfNode expected;
|
||||
CHECK(protobuf::TextFormat::ParseFromString(
|
||||
"name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
|
||||
"0\ntotal_exec_micros: 11\ntotal_requested_bytes: "
|
||||
"5280\ntotal_parameters: 450\nfloat_ops: 0\ntotal_float_ops: 10440\n",
|
||||
&expected));
|
||||
EXPECT_EQ(expected.DebugString(), root.DebugString());
|
||||
}
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
@ -0,0 +1,78 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
void TFProfTensor::Display(string* formatted_str,
|
||||
TFProfTensorProto* tfprof_tensor_pb) {
|
||||
if (formatted_str) {
|
||||
if (formatted_str_.length() >= kTFProfTenosrMaxDisplayLen) {
|
||||
*formatted_str =
|
||||
strings::StrCat(formatted_str_, "...omitted from display\n\n");
|
||||
} else {
|
||||
*formatted_str = formatted_str_;
|
||||
}
|
||||
}
|
||||
if (tfprof_tensor_pb) {
|
||||
tfprof_tensor_pb->MergeFrom(tfprof_tensor_pb_);
|
||||
}
|
||||
}
|
||||
|
||||
void TFProfTensor::Build() {
|
||||
tfprof_tensor_pb_.set_dtype(tensor_->dtype());
|
||||
|
||||
switch (tensor_->dtype()) {
|
||||
// Double for all floats.
|
||||
case DataType::DT_FLOAT:
|
||||
case DataType::DT_DOUBLE: {
|
||||
std::vector<double> values_vec;
|
||||
if (tensor_->dtype() == DataType::DT_FLOAT) {
|
||||
GetValueVec<float, double>(&values_vec);
|
||||
} else if (tensor_->dtype() == DataType::DT_DOUBLE) {
|
||||
GetValueVec<double, double>(&values_vec);
|
||||
}
|
||||
BuildOutput<double>(0, 0, values_vec, &tfprof_tensor_pb_);
|
||||
break;
|
||||
}
|
||||
// Int64 for all integers.
|
||||
case DataType::DT_INT32:
|
||||
case DataType::DT_INT64: {
|
||||
std::vector<int64> values_vec;
|
||||
if (tensor_->dtype() == DataType::DT_INT32) {
|
||||
GetValueVec<int32, int64>(&values_vec);
|
||||
} else if (tensor_->dtype() == DataType::DT_INT64) {
|
||||
GetValueVec<int64, int64>(&values_vec);
|
||||
}
|
||||
BuildOutput<int64>(0, 0, values_vec, &tfprof_tensor_pb_);
|
||||
break;
|
||||
}
|
||||
case DataType::DT_STRING: {
|
||||
// Not supported by TensorFlow.
|
||||
std::vector<string> values_vec;
|
||||
GetValueVec<string, string>(&values_vec);
|
||||
BuildOutput<string>(0, 0, values_vec, &tfprof_tensor_pb_);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
fprintf(stderr, "Not Supported type %d\n", tensor_->dtype());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
120
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
Normal file
120
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
Normal file
@ -0,0 +1,120 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// TFProf representation of a Tensor's value.
|
||||
// 1. Multi-dimension tensor is flattened in row major, and stored in proto.
|
||||
// 2. integer are up-casted to int64. floats are up-casted to double. string
|
||||
// is not supported by TensorFlow CheckPointReader library, though it is
|
||||
// supported in current code.
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
||||
|
||||
#include <typeinfo>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/lib/strings/numbers.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
|
||||
class TFProfTensor {
|
||||
public:
|
||||
explicit TFProfTensor(std::unique_ptr<Tensor> tensor)
|
||||
: tensor_(std::move(tensor)) {
|
||||
Build();
|
||||
}
|
||||
|
||||
// If pointers are provided, they are filled by the method.
|
||||
void Display(string* formatted_str, TFProfTensorProto* tfprof_tensor_pb);
|
||||
|
||||
private:
|
||||
// Max length of tensor value displayed to CLI.
|
||||
const int64 kTFProfTenosrMaxDisplayLen = 10000;
|
||||
// Max length after which a latency warning will be printed.
|
||||
const int64 kTFProfTensorMaxWarnLen = 100000;
|
||||
|
||||
void Build();
|
||||
|
||||
// It assumes the flatten values are stored in row-major, which is mentioned
|
||||
// indirectly at various places:
|
||||
// TODO(xpan): Further verifying it.
|
||||
template <typename T>
|
||||
int64 BuildOutput(int64 start, int depth, const std::vector<T>& values,
|
||||
TFProfTensorProto* dim) {
|
||||
formatted_str_ += "[";
|
||||
int64 nstart = start;
|
||||
for (int i = 0; i < tensor_->dim_size(depth); i++) {
|
||||
// Last dimension, pull the values.
|
||||
if (depth == tensor_->dims() - 1) {
|
||||
std::ostringstream sstream;
|
||||
sstream << values[nstart];
|
||||
|
||||
if (typeid(values[nstart]) == typeid(double)) {
|
||||
double double_val;
|
||||
CHECK(strings::safe_strtod(sstream.str().c_str(), &double_val));
|
||||
dim->add_value_double(double_val);
|
||||
formatted_str_ += strings::Printf(
|
||||
"%.2f ", dim->value_double(dim->value_double_size() - 1));
|
||||
} else if (typeid(values[nstart]) == typeid(int64)) {
|
||||
int64 int64_val;
|
||||
CHECK(strings::safe_strto64(sstream.str().c_str(), &int64_val));
|
||||
dim->add_value_int64(int64_val);
|
||||
formatted_str_ += strings::Printf(
|
||||
"%lld ", dim->value_int64(dim->value_int64_size() - 1));
|
||||
} else if (typeid(values[nstart]) == typeid(string)) {
|
||||
dim->add_value_str(sstream.str());
|
||||
formatted_str_ =
|
||||
strings::StrCat(formatted_str_, "'",
|
||||
dim->value_str(dim->value_str_size() - 1) + "' ");
|
||||
} else {
|
||||
CHECK(false) << "Unsupported type: " << typeid(values[nstart]).name();
|
||||
}
|
||||
++nstart;
|
||||
} else {
|
||||
// Not-last dimension. Drill deeper.
|
||||
nstart = BuildOutput<T>(nstart, depth + 1, values, dim);
|
||||
}
|
||||
}
|
||||
if (formatted_str_.length() > kTFProfTenosrMaxDisplayLen) {
|
||||
formatted_str_ = formatted_str_.substr(0, kTFProfTenosrMaxDisplayLen);
|
||||
}
|
||||
formatted_str_ += "],\n";
|
||||
return nstart;
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
void GetValueVec(std::vector<U>* value_vec) {
|
||||
// TODO(xpan): Address the huge tensor problem.
|
||||
if (tensor_->NumElements() > kTFProfTensorMaxWarnLen) {
|
||||
fprintf(stderr, "Showing huge tensor, the tool might halt...\n");
|
||||
}
|
||||
auto values = tensor_->flat<T>();
|
||||
for (int64 i = 0; i < tensor_->NumElements(); i++) {
|
||||
value_vec->push_back(static_cast<U>(values(i)));
|
||||
}
|
||||
}
|
||||
|
||||
TFProfTensorProto tfprof_tensor_pb_;
|
||||
std::unique_ptr<Tensor> tensor_;
|
||||
string formatted_str_;
|
||||
};
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
|
@ -0,0 +1,306 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
class TFProfTensorTest : public ::testing::Test {
|
||||
protected:
|
||||
TFProfTensorTest() {
|
||||
string graph_path = io::JoinPath(
|
||||
testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
|
||||
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
|
||||
TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
|
||||
|
||||
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
|
||||
std::unique_ptr<OpLog> op_log_pb;
|
||||
|
||||
string ckpt_path =
|
||||
io::JoinPath(testing::TensorFlowSrcRoot(),
|
||||
"contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
|
||||
TF_Status* status = TF_NewStatus();
|
||||
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
|
||||
new checkpoint::CheckpointReader(ckpt_path, status));
|
||||
CHECK(TF_GetCode(status) == TF_OK);
|
||||
TF_DeleteStatus(status);
|
||||
|
||||
tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
|
||||
std::move(op_log_pb), std::move(ckpt_reader)));
|
||||
}
|
||||
|
||||
std::unique_ptr<TFStats> tf_stats_;
|
||||
};
|
||||
|
||||
TEST_F(TFProfTensorTest, Basics) {
|
||||
Options opts(3, 0, 0, 0, 0, {".*"}, "name", {"Variable"}, {".*"}, {""},
|
||||
{".*"}, {""}, false, {"tensor_value"}, // show the tensor value.
|
||||
false);
|
||||
const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
|
||||
|
||||
TFProfNode expected;
|
||||
CHECK(protobuf::TextFormat::ParseFromString(
|
||||
"name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
|
||||
"0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
|
||||
"450\nchildren {\n name: \"DW\"\n exec_micros: 0\n requested_bytes: "
|
||||
"0\n parameters: 162\n total_exec_micros: 0\n total_requested_bytes: "
|
||||
"0\n total_parameters: 162\n float_ops: 0\n total_float_ops: 0\n "
|
||||
"tensor_value {\n dtype: DT_FLOAT\n value_double: -0.00117808\n "
|
||||
"value_double: -0.000709941\n value_double: -0.00174816\n "
|
||||
"value_double: -0.000495372\n value_double: 0.000243039\n "
|
||||
"value_double: -0.000126313\n value_double: -0.000663929\n "
|
||||
"value_double: -0.000495198\n value_double: -0.000893934\n "
|
||||
"value_double: -0.00179659\n value_double: 0.000408874\n "
|
||||
"value_double: -0.00120166\n value_double: -0.00109484\n "
|
||||
"value_double: -0.000200362\n value_double: 0.000726721\n "
|
||||
"value_double: -0.000277568\n value_double: 0.00180584\n "
|
||||
"value_double: 0.000997271\n value_double: -0.00185987\n "
|
||||
"value_double: -0.00113401\n value_double: -0.000528852\n "
|
||||
"value_double: -0.000197412\n value_double: 1.32871e-05\n "
|
||||
"value_double: -0.000285896\n value_double: -0.000428898\n "
|
||||
"value_double: -0.000424633\n value_double: 2.15488e-05\n "
|
||||
"value_double: 0.00149753\n value_double: -0.000884576\n "
|
||||
"value_double: -0.0013795\n value_double: -0.000650125\n "
|
||||
"value_double: 0.00191612\n value_double: 4.71838e-05\n "
|
||||
"value_double: 0.000400201\n value_double: 0.00239555\n "
|
||||
"value_double: -0.00177706\n value_double: -0.000781899\n "
|
||||
"value_double: -0.00145247\n value_double: 0.0020025\n "
|
||||
"value_double: 0.000597419\n value_double: 0.00135456\n "
|
||||
"value_double: 0.0015876\n value_double: -0.000993568\n "
|
||||
"value_double: 0.0006509\n value_double: -0.000894533\n "
|
||||
"value_double: -0.00129322\n value_double: 0.0003859\n "
|
||||
"value_double: 0.000415186\n value_double: -0.000439212\n "
|
||||
"value_double: 0.000442138\n value_double: 0.00212353\n "
|
||||
"value_double: 0.000702953\n value_double: 0.000713424\n "
|
||||
"value_double: -0.000304877\n value_double: -9.17046e-05\n "
|
||||
"value_double: -0.000801103\n value_double: 0.000304854\n "
|
||||
"value_double: -0.00070527\n value_double: -0.00106408\n "
|
||||
"value_double: -0.000909906\n value_double: -4.49183e-05\n "
|
||||
"value_double: 0.000104172\n value_double: -0.000438067\n "
|
||||
"value_double: -0.000317689\n value_double: -0.000769914\n "
|
||||
"value_double: -0.00157729\n value_double: 0.000220733\n "
|
||||
"value_double: 0.00107268\n value_double: -0.000186449\n "
|
||||
"value_double: -0.000807328\n value_double: 0.000456308\n "
|
||||
"value_double: -0.000593729\n value_double: -0.000954873\n "
|
||||
"value_double: -0.000268676\n value_double: 9.06328e-05\n "
|
||||
"value_double: -0.000323473\n value_double: -0.000628768\n "
|
||||
"value_double: 0.000664985\n value_double: 0.0020999\n "
|
||||
"value_double: -0.000932228\n value_double: -0.00203203\n "
|
||||
"value_double: 0.000565405\n value_double: 0.000167899\n "
|
||||
"value_double: 0.00054897\n value_double: 0.000612407\n "
|
||||
"value_double: -0.000619301\n value_double: 0.00169361\n "
|
||||
"value_double: -0.000188057\n value_double: 0.000267652\n "
|
||||
"value_double: -0.00127341\n value_double: -0.000218836\n "
|
||||
"value_double: -0.000431722\n value_double: 5.41867e-05\n "
|
||||
"value_double: 0.000296628\n value_double: 0.000819415\n "
|
||||
"value_double: -0.000758993\n value_double: -0.000114477\n "
|
||||
"value_double: 6.29219e-05\n value_double: 0.000726988\n "
|
||||
"value_double: -0.00135974\n value_double: 2.28447e-05\n "
|
||||
"value_double: 0.00120547\n value_double: -0.00136907\n "
|
||||
"value_double: -0.00140188\n value_double: 0.000201145\n "
|
||||
"value_double: -0.000774109\n value_double: 0.000798465\n "
|
||||
"value_double: -0.00131861\n value_double: 3.08996e-05\n "
|
||||
"value_double: -0.000637026\n value_double: 0.00228975\n "
|
||||
"value_double: -0.000633757\n value_double: -0.00116047\n "
|
||||
"value_double: 7.66039e-05\n value_double: 2.09167e-06\n "
|
||||
"value_double: -0.000296448\n value_double: 0.000206795\n "
|
||||
"value_double: 0.000674405\n value_double: -0.000722742\n "
|
||||
"value_double: -9.32443e-05\n value_double: -0.00170917\n "
|
||||
"value_double: -0.000505279\n value_double: 0.000628132\n "
|
||||
"value_double: -0.00145929\n value_double: 0.00106077\n "
|
||||
"value_double: -0.000796743\n value_double: 0.000498275\n "
|
||||
"value_double: -0.0002914\n value_double: -0.00230622\n "
|
||||
"value_double: -9.42872e-05\n value_double: 0.000200359\n "
|
||||
"value_double: -0.00305027\n value_double: -0.0016218\n "
|
||||
"value_double: 0.00137126\n value_double: -0.00215436\n "
|
||||
"value_double: -0.000743827\n value_double: -0.00090007\n "
|
||||
"value_double: -0.000762207\n value_double: -0.000149951\n "
|
||||
"value_double: -0.0013102\n value_double: 0.00165781\n "
|
||||
"value_double: 0.000343809\n value_double: -0.000826069\n "
|
||||
"value_double: -4.67404e-05\n value_double: 0.0023931\n "
|
||||
"value_double: 0.00165338\n value_double: -0.00050529\n "
|
||||
"value_double: 0.000178771\n value_double: -0.000858287\n "
|
||||
"value_double: -0.00157031\n value_double: -0.00165846\n "
|
||||
"value_double: -0.000713672\n value_double: 0.00014357\n "
|
||||
"value_double: 0.00203632\n value_double: -0.0010973\n "
|
||||
"value_double: -9.89852e-05\n value_double: 0.000558808\n "
|
||||
"value_double: 0.00087211\n value_double: 0.000661239\n "
|
||||
"value_double: 0.000389605\n value_double: 0.00060653\n "
|
||||
"value_double: -0.000330104\n }\n}\nchildren {\n name: \"DW2\"\n "
|
||||
"exec_micros: 0\n requested_bytes: 0\n parameters: 288\n "
|
||||
"total_exec_micros: 0\n total_requested_bytes: 0\n total_parameters: "
|
||||
"288\n float_ops: 0\n total_float_ops: 0\n tensor_value {\n dtype: "
|
||||
"DT_FLOAT\n value_double: 0.000704577\n value_double: "
|
||||
"0.000127421\n value_double: 0.00105952\n value_double: "
|
||||
"0.000423765\n value_double: -0.00025461\n value_double: "
|
||||
"-0.000857203\n value_double: 0.000693494\n value_double: "
|
||||
"0.000282214\n value_double: 0.00106185\n value_double: "
|
||||
"-0.000836552\n value_double: -0.00116766\n value_double: "
|
||||
"0.000733674\n value_double: -0.000669601\n value_double: "
|
||||
"-0.000275175\n value_double: -0.000428215\n value_double: "
|
||||
"-0.000495715\n value_double: -0.000125887\n value_double: "
|
||||
"-0.000715204\n value_double: -0.00108936\n value_double: "
|
||||
"0.000738267\n value_double: 0.000376081\n value_double: "
|
||||
"0.00191442\n value_double: 0.001423\n value_double: -0.00093811\n "
|
||||
" value_double: -5.91421e-05\n value_double: -0.000221507\n "
|
||||
"value_double: -0.000104555\n value_double: -0.00069682\n "
|
||||
"value_double: -0.000278325\n value_double: -0.00122748\n "
|
||||
"value_double: -0.00112411\n value_double: -0.000440511\n "
|
||||
"value_double: -0.000392247\n value_double: -0.000419606\n "
|
||||
"value_double: -0.00167063\n value_double: -0.000988578\n "
|
||||
"value_double: -0.00040159\n value_double: 0.00238918\n "
|
||||
"value_double: -0.000892898\n value_double: -0.000875976\n "
|
||||
"value_double: 0.00154401\n value_double: -0.000719911\n "
|
||||
"value_double: 0.000753941\n value_double: -0.000119961\n "
|
||||
"value_double: -0.000305115\n value_double: 9.97947e-05\n "
|
||||
"value_double: -0.00128908\n value_double: -0.000584184\n "
|
||||
"value_double: -0.000734685\n value_double: -0.00146612\n "
|
||||
"value_double: 0.000670802\n value_double: 0.000924219\n "
|
||||
"value_double: -0.000154409\n value_double: 0.000198231\n "
|
||||
"value_double: -0.000340742\n value_double: -0.00159646\n "
|
||||
"value_double: -1.19382e-05\n value_double: 0.00165203\n "
|
||||
"value_double: 0.0017085\n value_double: -0.000199614\n "
|
||||
"value_double: 0.000529526\n value_double: 0.000769364\n "
|
||||
"value_double: 0.00135369\n value_double: 0.00132873\n "
|
||||
"value_double: 0.000451174\n value_double: 0.000255218\n "
|
||||
"value_double: 0.00102891\n value_double: -0.00160068\n "
|
||||
"value_double: 0.000324269\n value_double: -0.000492347\n "
|
||||
"value_double: 0.000925301\n value_double: 0.00281998\n "
|
||||
"value_double: -0.000826404\n value_double: -0.000602903\n "
|
||||
"value_double: 0.00126559\n value_double: 0.000924364\n "
|
||||
"value_double: -9.19827e-05\n value_double: -5.59275e-05\n "
|
||||
"value_double: 0.00107971\n value_double: -9.91756e-05\n "
|
||||
"value_double: 0.000864708\n value_double: 0.00121747\n "
|
||||
"value_double: 0.00146338\n value_double: 0.000186883\n "
|
||||
"value_double: -0.00168195\n value_double: -0.00062029\n "
|
||||
"value_double: 0.000658127\n value_double: 0.00115682\n "
|
||||
"value_double: -0.00178359\n value_double: 0.000685606\n "
|
||||
"value_double: -0.000503373\n value_double: -0.000312999\n "
|
||||
"value_double: 0.000335383\n value_double: -1.08597e-05\n "
|
||||
"value_double: -8.2499e-05\n value_double: -0.000469726\n "
|
||||
"value_double: -0.00170868\n value_double: 0.000118957\n "
|
||||
"value_double: -0.000460736\n value_double: -5.56372e-05\n "
|
||||
"value_double: -0.00110148\n value_double: 0.00059123\n "
|
||||
"value_double: 0.000386339\n value_double: -0.00139967\n "
|
||||
"value_double: -0.000835664\n value_double: 0.00103421\n "
|
||||
"value_double: -0.00104296\n value_double: -0.000687497\n "
|
||||
"value_double: 1.1338e-05\n value_double: 0.00176484\n "
|
||||
"value_double: 0.000531523\n value_double: -0.000986387\n "
|
||||
"value_double: -0.00114152\n value_double: 0.000256744\n "
|
||||
"value_double: 0.000228425\n value_double: 0.00116583\n "
|
||||
"value_double: 0.0002726\n value_double: -0.00100828\n "
|
||||
"value_double: -0.000950376\n value_double: -0.00229074\n "
|
||||
"value_double: -0.000348272\n value_double: -0.000526032\n "
|
||||
"value_double: -0.000133703\n value_double: 0.000310979\n "
|
||||
"value_double: -0.00199278\n value_double: -0.000874469\n "
|
||||
"value_double: -0.000631466\n value_double: 0.0010534\n "
|
||||
"value_double: 0.00134646\n value_double: -0.00172743\n "
|
||||
"value_double: 0.00131031\n value_double: -0.000697506\n "
|
||||
"value_double: 0.000286747\n value_double: 0.000140759\n "
|
||||
"value_double: 0.000568707\n value_double: 0.000108177\n "
|
||||
"value_double: -0.00207337\n value_double: -0.00138146\n "
|
||||
"value_double: 0.000483162\n value_double: -0.00167096\n "
|
||||
"value_double: -0.000465813\n value_double: 0.00067724\n "
|
||||
"value_double: 2.08388e-05\n value_double: -0.00203279\n "
|
||||
"value_double: 7.8429e-05\n value_double: 0.00161337\n "
|
||||
"value_double: -0.000269005\n value_double: 0.000217822\n "
|
||||
"value_double: 0.000599886\n value_double: 0.000317549\n "
|
||||
"value_double: 0.00146597\n value_double: -0.00210947\n "
|
||||
"value_double: -0.000823917\n value_double: -6.83766e-05\n "
|
||||
"value_double: 0.000656085\n value_double: 0.000117134\n "
|
||||
"value_double: -0.000390405\n value_double: 2.39565e-05\n "
|
||||
"value_double: 0.00104837\n value_double: -0.000563671\n "
|
||||
"value_double: 0.000634073\n value_double: -0.000554531\n "
|
||||
"value_double: 0.000677971\n value_double: -0.000596207\n "
|
||||
"value_double: -0.00103335\n value_double: 0.000645199\n "
|
||||
"value_double: 0.00162195\n value_double: 0.000239246\n "
|
||||
"value_double: 0.00113519\n value_double: 0.000787431\n "
|
||||
"value_double: -0.000471688\n value_double: -0.000216625\n "
|
||||
"value_double: -0.000537156\n value_double: 0.000551816\n "
|
||||
"value_double: 0.00094337\n value_double: -0.000708127\n "
|
||||
"value_double: 0.000956955\n value_double: -0.000904936\n "
|
||||
"value_double: -0.000424413\n value_double: 0.000106455\n "
|
||||
"value_double: -0.000443952\n value_double: 0.000185436\n "
|
||||
"value_double: 0.000944397\n value_double: -0.000760572\n "
|
||||
"value_double: 0.000560002\n value_double: 4.09886e-05\n "
|
||||
"value_double: -0.00075076\n value_double: -0.000701856\n "
|
||||
"value_double: -0.000234851\n value_double: -0.000131515\n "
|
||||
"value_double: -0.000761718\n value_double: -0.000267808\n "
|
||||
"value_double: -0.00039682\n value_double: 0.000542953\n "
|
||||
"value_double: -0.000817685\n value_double: 0.00103851\n "
|
||||
"value_double: -0.000427176\n value_double: 0.000517784\n "
|
||||
"value_double: -0.000823552\n value_double: -0.000742637\n "
|
||||
"value_double: 0.000529213\n value_double: -0.000372805\n "
|
||||
"value_double: 1.85745e-05\n value_double: 0.00139891\n "
|
||||
"value_double: -0.000128417\n value_double: -0.000404316\n "
|
||||
"value_double: -0.000671571\n value_double: 0.000490311\n "
|
||||
"value_double: -0.00118493\n value_double: -0.000897118\n "
|
||||
"value_double: 0.000939601\n value_double: 0.000376399\n "
|
||||
"value_double: 0.0014709\n value_double: 0.000134806\n "
|
||||
"value_double: -0.000294469\n value_double: -0.000569142\n "
|
||||
"value_double: 0.00127266\n value_double: -0.00140936\n "
|
||||
"value_double: 0.000870083\n value_double: 0.000287246\n "
|
||||
"value_double: 0.000537685\n value_double: 0.000125569\n "
|
||||
"value_double: 0.000360276\n value_double: -0.000186268\n "
|
||||
"value_double: 0.0011141\n value_double: -0.000605185\n "
|
||||
"value_double: -0.0016281\n value_double: -0.000552758\n "
|
||||
"value_double: -0.000196755\n value_double: -0.00265188\n "
|
||||
"value_double: 0.000480997\n value_double: 0.00018776\n "
|
||||
"value_double: -0.00199234\n value_double: 0.000959982\n "
|
||||
"value_double: 0.00040334\n value_double: -0.000693596\n "
|
||||
"value_double: 0.00157678\n value_double: -0.00134499\n "
|
||||
"value_double: 0.00121909\n value_double: -0.000328734\n "
|
||||
"value_double: 0.000148554\n value_double: -0.000209509\n "
|
||||
"value_double: -0.000266303\n value_double: -0.00134084\n "
|
||||
"value_double: 5.21371e-05\n value_double: 0.0005329\n "
|
||||
"value_double: -0.000168858\n value_double: -0.00074875\n "
|
||||
"value_double: 0.000959397\n value_double: -0.00159476\n "
|
||||
"value_double: -0.000368838\n value_double: 0.0006077\n "
|
||||
"value_double: -0.00117243\n value_double: -0.00146013\n "
|
||||
"value_double: 0.00031519\n value_double: -0.000167911\n "
|
||||
"value_double: 0.000482571\n value_double: -0.000752268\n "
|
||||
"value_double: -0.00042363\n value_double: 0.00121219\n "
|
||||
"value_double: -0.000208159\n value_double: 0.000128531\n "
|
||||
"value_double: -0.000406308\n value_double: -0.000242663\n "
|
||||
"value_double: -3.96673e-05\n value_double: 0.00144854\n "
|
||||
"value_double: -0.000787328\n value_double: -0.000401958\n "
|
||||
"value_double: 0.00114091\n value_double: -0.000739546\n "
|
||||
"value_double: 0.000483236\n value_double: -0.000916945\n "
|
||||
"value_double: -0.00129577\n value_double: -0.00186504\n "
|
||||
"value_double: 0.000806804\n value_double: -0.000152251\n "
|
||||
"value_double: 0.000662576\n value_double: -0.000533236\n "
|
||||
"value_double: 0.00151019\n value_double: 0.00127805\n "
|
||||
"value_double: 0.00115399\n value_double: -0.00130876\n "
|
||||
"value_double: 2.99457e-06\n value_double: 0.000820777\n "
|
||||
"value_double: 0.000878393\n value_double: -0.000562642\n "
|
||||
"value_double: -0.00070442\n value_double: -0.00066277\n "
|
||||
"}\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
|
||||
&expected));
|
||||
EXPECT_EQ(expected.DebugString(), root.DebugString());
|
||||
}
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
350
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
Normal file
350
tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
Normal file
@ -0,0 +1,350 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
|
||||
#include "tensorflow/core/lib/strings/numbers.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/regexp.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
string FormatNumber(int64 n) {
|
||||
if (n < 1000) {
|
||||
return strings::Printf("%lld", n);
|
||||
} else if (n < 1000000) {
|
||||
return strings::Printf("%.2fk", n / 1000.0);
|
||||
} else if (n < 1000000000) {
|
||||
return strings::Printf("%.2fm", n / 1000000.0);
|
||||
} else {
|
||||
return strings::Printf("%.2fb", n / 1000000000.0);
|
||||
}
|
||||
}
|
||||
|
||||
string FormatTime(int64 micros) {
|
||||
if (micros < 1000) {
|
||||
return strings::Printf("%lldus", micros);
|
||||
} else if (micros < 1000000) {
|
||||
return strings::Printf("%.2fms", micros / 1000.0);
|
||||
} else {
|
||||
return strings::Printf("%.2fsec", micros / 1000000.0);
|
||||
}
|
||||
}
|
||||
|
||||
string FormatMemory(int64 bytes) {
|
||||
if (bytes < 1000) {
|
||||
return strings::Printf("%lldB", bytes);
|
||||
} else if (bytes < 1000000) {
|
||||
return strings::Printf("%.2fKB", bytes / 1000.0);
|
||||
} else {
|
||||
return strings::Printf("%.2fMB", bytes / 1000000.0);
|
||||
}
|
||||
}
|
||||
|
||||
string FormatShapes(const std::vector<int64>& shape) {
|
||||
return str_util::Join(shape, "x");
|
||||
}
|
||||
|
||||
string StringReplace(const string& str, const string& oldsub,
|
||||
const string& newsub) {
|
||||
string out = str;
|
||||
RE2::GlobalReplace(&out, oldsub, newsub);
|
||||
return out;
|
||||
}
|
||||
|
||||
Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def) {
|
||||
string out;
|
||||
Status s = ReadFileToString(env, fname, &out);
|
||||
if (!s.ok()) return s;
|
||||
if (protobuf::TextFormat::ParseFromString(out, graph_def)) {
|
||||
return Status();
|
||||
}
|
||||
return errors::InvalidArgument("Cannot parse proto string.");
|
||||
}
|
||||
|
||||
namespace {
|
||||
string StripQuote(const string& s) {
|
||||
int start = s.find_first_not_of("\"\'");
|
||||
int end = s.find_last_not_of("\"\'");
|
||||
if (start == s.npos || end == s.npos) return "";
|
||||
|
||||
return s.substr(start, end - start + 1);
|
||||
}
|
||||
|
||||
tensorflow::Status ReturnError(const std::vector<string> pieces, int idx) {
|
||||
string val;
|
||||
if (pieces.size() > idx + 1) {
|
||||
val = pieces[idx + 1];
|
||||
}
|
||||
return tensorflow::Status(
|
||||
tensorflow::error::INVALID_ARGUMENT,
|
||||
strings::StrCat("Invalid option '", pieces[idx], "' value: '", val, "'"));
|
||||
}
|
||||
|
||||
bool CaseEqual(StringPiece s1, StringPiece s2) {
|
||||
if (s1.size() != s2.size()) return false;
|
||||
return str_util::Lowercase(s1) == str_util::Lowercase(s2);
|
||||
}
|
||||
|
||||
bool StringToBool(StringPiece str, bool* value) {
|
||||
CHECK(value != NULL) << "NULL output boolean given.";
|
||||
if (CaseEqual(str, "true") || CaseEqual(str, "t") || CaseEqual(str, "yes") ||
|
||||
CaseEqual(str, "y") || CaseEqual(str, "1")) {
|
||||
*value = true;
|
||||
return true;
|
||||
}
|
||||
if (CaseEqual(str, "false") || CaseEqual(str, "f") || CaseEqual(str, "no") ||
|
||||
CaseEqual(str, "n") || CaseEqual(str, "0")) {
|
||||
*value = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
tensorflow::Status ParseCmdLine(const string& line, string* cmd,
|
||||
tensorflow::tfprof::Options* opts) {
|
||||
std::vector<string> pieces =
|
||||
str_util::Split(line, ' ', str_util::SkipEmpty());
|
||||
|
||||
std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
|
||||
if (std::find(cmds_str.begin(), cmds_str.end(), pieces[0]) ==
|
||||
cmds_str.end()) {
|
||||
return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT,
|
||||
"First string must be a valid command.");
|
||||
}
|
||||
*cmd = pieces[0];
|
||||
|
||||
for (int i = 1; i < pieces.size(); ++i) {
|
||||
if (pieces[i] == string(tensorflow::tfprof::kOptions[0])) {
|
||||
if (pieces.size() <= i + 1 ||
|
||||
!strings::safe_strto32(pieces[i + 1], &opts->max_depth)) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[1]) {
|
||||
if (pieces.size() <= i + 1 ||
|
||||
!strings::safe_strto64(pieces[i + 1], &opts->min_bytes)) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[2]) {
|
||||
if (pieces.size() <= i + 1 ||
|
||||
!strings::safe_strto64(pieces[i + 1], &opts->min_micros)) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[3]) {
|
||||
if (pieces.size() <= i + 1 ||
|
||||
!strings::safe_strto64(pieces[i + 1], &opts->min_params)) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[4]) {
|
||||
if (pieces.size() <= i + 1 ||
|
||||
!strings::safe_strto64(pieces[i + 1], &opts->min_float_ops)) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[5]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->device_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
|
||||
str_util::SkipEmpty());
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[6]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
std::set<string> order_by_set(
|
||||
kOrderBy, kOrderBy + sizeof(kOrderBy) / sizeof(*kOrderBy));
|
||||
auto order_by = order_by_set.find(pieces[i + 1]);
|
||||
if (order_by == order_by_set.end()) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->order_by = *order_by;
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[7]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->account_type_regexes = str_util::Split(StripQuote(pieces[i + 1]),
|
||||
',', str_util::SkipEmpty());
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[8]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->start_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
|
||||
str_util::SkipEmpty());
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[9]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->trim_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
|
||||
str_util::SkipEmpty());
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[10]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->show_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
|
||||
str_util::SkipEmpty());
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[11]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->hide_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
|
||||
str_util::SkipEmpty());
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[12]) {
|
||||
if ((pieces.size() > i + 1 && pieces[i + 1].find("-") == 0) ||
|
||||
pieces.size() == i + 1) {
|
||||
opts->account_displayed_op_only = true;
|
||||
} else if (!StringToBool(pieces[i + 1],
|
||||
&opts->account_displayed_op_only)) {
|
||||
return ReturnError(pieces, i);
|
||||
} else {
|
||||
++i;
|
||||
}
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[13]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
std::set<string> shown_set(kShown,
|
||||
kShown + sizeof(kShown) / sizeof(*kShown));
|
||||
std::vector<string> requested_vector = str_util::Split(
|
||||
StripQuote(pieces[i + 1]), ',', str_util::SkipEmpty());
|
||||
std::set<string> requested_set(requested_vector.begin(),
|
||||
requested_vector.end());
|
||||
for (const string& requested : requested_set) {
|
||||
if (shown_set.find(requested) == shown_set.end()) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
}
|
||||
opts->select = requested_set;
|
||||
++i;
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[14]) {
|
||||
if ((pieces.size() > i + 1 && pieces[i + 1].find("-") == 0) ||
|
||||
pieces.size() == i + 1) {
|
||||
opts->viz = true;
|
||||
} else if (!StringToBool(pieces[i + 1], &opts->viz)) {
|
||||
return ReturnError(pieces, i);
|
||||
} else {
|
||||
++i;
|
||||
}
|
||||
} else if (pieces[i] == tensorflow::tfprof::kOptions[15]) {
|
||||
if (pieces.size() <= i + 1) {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
opts->dump_to_file = StripQuote(pieces[i + 1]);
|
||||
++i;
|
||||
} else {
|
||||
return ReturnError(pieces, i);
|
||||
}
|
||||
}
|
||||
return tensorflow::Status::OK();
|
||||
}
|
||||
|
||||
void PrintHelp() {
|
||||
printf(
|
||||
"\nSee go/tfprof for detail tutorial.\n"
|
||||
"\nCommands\n\n"
|
||||
" scope: Each op has its op name in TensorFlow, such as 'n1', 'n1/n2', "
|
||||
"'n1/n2/n3'. 'n1/n2' is a child of 'n1'. 'scope' command builds "
|
||||
"a name scope tree and aggregates statistics based on it.\n\n"
|
||||
" graph: ops in TensorFlow are organized as a graph based on their "
|
||||
"the source (inputs) and sink (outputs). 'graph' command builds "
|
||||
"a graph pointing *from output to input*, and aggregates "
|
||||
"statistics based on it.\n\n"
|
||||
" set: Set options that will be default for follow up commands.\n\n"
|
||||
" help: Show helps.\n"
|
||||
"\nOptions\n\n"
|
||||
"Press Enter in CLI to see default option values.\n\n"
|
||||
" -max_depth: Show ops that are at most this number of hops from "
|
||||
"starting op in the tree/graph structure.\n\n"
|
||||
" -min_bytes: Show ops that request at least this number of bytes.\n\n"
|
||||
" -min_micros: Show ops that spend at least this number of micros to "
|
||||
"run.\n\n"
|
||||
" -min_params: Show ops that contains at least this number of "
|
||||
"parameters.\n\n"
|
||||
" -min_float_ops: Show ops that contain at least this number of "
|
||||
"float operations. Only available if an op has "
|
||||
"op.RegisterStatistics() defined and OpLog is "
|
||||
"provided\n\n"
|
||||
" -device_regexes: Show ops that a placed on the specified devices. "
|
||||
"regexes are comma-separated.\n\n"
|
||||
" -order_by: Order the results by [name|depth|bytes|micros|params|"
|
||||
"float_ops]\n\n"
|
||||
" -account_type_regexes: Account and display the ops whose types match "
|
||||
"one of the type regexes specified. tfprof "
|
||||
"allow user to define extra op types for ops "
|
||||
"through tensorflow.tfprof.OpLog proto. regexes "
|
||||
"are comma-sperated.\n\n"
|
||||
" -start_name_regexes: Show ops starting from the ops that matches the "
|
||||
"regexes, recursively. regexes are "
|
||||
"comma-separated.\n\n"
|
||||
" -trim_name_regexes: Hide ops starting from the ops that matches the "
|
||||
"regexes, recursively, regexes are comma-seprated. "
|
||||
"\n\n"
|
||||
" -show_name_regexes: Show ops that match the regexes. regexes are "
|
||||
"comma-seprated.\n\n"
|
||||
" -hide_name_regexes: Hide ops that match the regexes. regexes are "
|
||||
"comma-seprated.\n\n"
|
||||
""
|
||||
" Notes: For each op, -acount_type_regexes is first evaluated, "
|
||||
"only ops with types matching the specified regexes are accounted and "
|
||||
"selected for displayed. -start/trim/show/hide_name_regexes are used "
|
||||
"to further filter ops for display. -start_name_regexes is evaluated "
|
||||
"first to search the starting ops to display. Descendants of starting "
|
||||
"ops are then evaluated against show/hide_name_regexes to make display "
|
||||
"decision. If an op matches trim_name_regexes, all its descendants are "
|
||||
"hidden.\n"
|
||||
"Ops statistics are *accounted even if they are hidden* as long as "
|
||||
"they match the -account_xxx options.\n\n"
|
||||
" -account_displayed_op_only: If True, only account the statistics of "
|
||||
"ops eventually displayed. If False, account all "
|
||||
"op statistics matching -account_type_regexes recursively.\n\n"
|
||||
" -select: Comma-separated list of metrics to show: [bytes|micros|"
|
||||
"params|float_ops|num_hidden_ops|tensor_value|device|op_types]."
|
||||
"\n\n"
|
||||
" -dump_to_file: Dump the output to a file, instead of terminal.\n\n"
|
||||
""
|
||||
"Examples\n"
|
||||
" Assuming a toy model:\n"
|
||||
" intput(typeB)->conv2d_1(typeA)->conv2d_2(typeA)->"
|
||||
"fc(typeA)->cost(typeA)->summarize(typeC)\n"
|
||||
" Command:\n"
|
||||
" tfprof> graph -account_type_regexes typeA -start_name_regexes "
|
||||
"cost.* -show_name_regexes conv2d.* -max_depth 10\n\n"
|
||||
" The above command only aggregate statistics of all ops of typeA ("
|
||||
"hence ignoring input(typeB)). It will start looking for candidate to "
|
||||
"display from cost.* and finally displays conv2d_1 and conv2d_2.\n\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
@ -0,0 +1,50 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tfprof {
|
||||
string FormatNumber(int64 n);
|
||||
|
||||
string FormatTime(int64 micros);
|
||||
|
||||
string FormatMemory(int64 bytes);
|
||||
|
||||
string FormatShapes(const std::vector<int64>& shapes);
|
||||
|
||||
tensorflow::Status ParseCmdLine(const string& line, string* cmd,
|
||||
tensorflow::tfprof::Options* opts);
|
||||
|
||||
string StringReplace(const string& str, const string& oldsub,
|
||||
const string& newsub);
|
||||
|
||||
Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def);
|
||||
|
||||
void PrintHelp();
|
||||
|
||||
} // namespace tfprof
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
|
19
tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
Normal file
19
tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
Normal file
@ -0,0 +1,19 @@
|
||||
syntax = "proto2";
|
||||
|
||||
package tensorflow.tfprof;
|
||||
|
||||
message OpLogEntry {
|
||||
// op name.
|
||||
optional string name = 1;
|
||||
// float_ops is filled by tfprof Python API when called. It requires the
|
||||
// op has RegisterStatistics defined. Currently, Conv2D, MatMul, etc, are
|
||||
// implemented.
|
||||
optional int64 float_ops = 2;
|
||||
// User can define extra op type information for an op. This allows the user
|
||||
// to select a group of ops precisely using op_type as a key.
|
||||
repeated string types = 3;
|
||||
}
|
||||
|
||||
message OpLog {
|
||||
repeated OpLogEntry log_entries = 1;
|
||||
}
|
236
tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
Normal file
236
tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
Normal file
@ -0,0 +1,236 @@
|
||||
/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "linenoise.h"
|
||||
#include "tensorflow/c/c_api.h"
|
||||
#include "tensorflow/c/checkpoint_reader.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
|
||||
#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/init_main.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/core/util/command_line_flags.h"
|
||||
|
||||
using tensorflow::str_util::Split;
|
||||
|
||||
void completion(const char* buf, linenoiseCompletions* lc) {
|
||||
tensorflow::string buf_str = tensorflow::string(buf);
|
||||
if (buf_str.find(" ") == buf_str.npos) {
|
||||
for (const char* opt : tensorflow::tfprof::kCmds) {
|
||||
if (tensorflow::string(opt).find(buf_str) == 0) {
|
||||
linenoiseAddCompletion(lc, opt);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
tensorflow::string prefix;
|
||||
int last_dash = buf_str.find_last_of(' ');
|
||||
if (last_dash != tensorflow::string::npos) {
|
||||
prefix = buf_str.substr(0, last_dash + 1);
|
||||
buf_str = buf_str.substr(last_dash + 1, tensorflow::kint32max);
|
||||
}
|
||||
for (const char* opt : tensorflow::tfprof::kOptions) {
|
||||
if (tensorflow::string(opt).find(buf_str) == 0) {
|
||||
linenoiseAddCompletion(lc, (prefix + opt).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
tensorflow::string FLAGS_graph_path = "";
|
||||
tensorflow::string FLAGS_run_meta_path = "";
|
||||
tensorflow::string FLAGS_op_log_path = "";
|
||||
tensorflow::string FLAGS_checkpoint_path = "";
|
||||
tensorflow::int32 FLAGS_max_depth = 4;
|
||||
tensorflow::int64 FLAGS_min_bytes = 0;
|
||||
tensorflow::int64 FLAGS_min_micros = 0;
|
||||
tensorflow::int64 FLAGS_min_params = 0;
|
||||
tensorflow::int64 FLAGS_min_float_ops = 0;
|
||||
tensorflow::string FLAGS_device_regexes = ".*";
|
||||
tensorflow::string FLAGS_order_by = "name";
|
||||
tensorflow::string FLAGS_account_type_regexes = "Variable";
|
||||
tensorflow::string FLAGS_start_name_regexes = ".*";
|
||||
tensorflow::string FLAGS_trim_name_regexes = "";
|
||||
tensorflow::string FLAGS_show_name_regexes = ".*";
|
||||
tensorflow::string FLAGS_hide_name_regexes;
|
||||
bool FLAGS_account_displayed_op_only = false;
|
||||
tensorflow::string FLAGS_select = "params";
|
||||
bool FLAGS_viz = false;
|
||||
tensorflow::string FLAGS_dump_to_file = "";
|
||||
for (int i = 0; i < argc; i++) {
|
||||
fprintf(stderr, "%s\n", argv[i]);
|
||||
}
|
||||
|
||||
CHECK(tensorflow::ParseFlags(
|
||||
&argc, argv,
|
||||
{tensorflow::Flag("graph_path", &FLAGS_graph_path),
|
||||
tensorflow::Flag("run_meta_path", &FLAGS_run_meta_path),
|
||||
tensorflow::Flag("op_log_path", &FLAGS_op_log_path),
|
||||
tensorflow::Flag("checkpoint_path", &FLAGS_checkpoint_path),
|
||||
tensorflow::Flag("max_depth", &FLAGS_max_depth),
|
||||
tensorflow::Flag("min_bytes", &FLAGS_min_bytes),
|
||||
tensorflow::Flag("min_micros", &FLAGS_min_micros),
|
||||
tensorflow::Flag("min_params", &FLAGS_min_params),
|
||||
tensorflow::Flag("min_float_ops", &FLAGS_min_float_ops),
|
||||
tensorflow::Flag("device_regexes", &FLAGS_device_regexes),
|
||||
tensorflow::Flag("order_by", &FLAGS_order_by),
|
||||
tensorflow::Flag("account_type_regexes", &FLAGS_start_name_regexes),
|
||||
tensorflow::Flag("trim_name_regexes", &FLAGS_trim_name_regexes),
|
||||
tensorflow::Flag("show_name_regexes", &FLAGS_show_name_regexes),
|
||||
tensorflow::Flag("hide_name_regexes", &FLAGS_hide_name_regexes),
|
||||
tensorflow::Flag("account_displayed_op_only",
|
||||
&FLAGS_account_displayed_op_only),
|
||||
tensorflow::Flag("select", &FLAGS_select),
|
||||
tensorflow::Flag("dump_to_file", &FLAGS_dump_to_file)}));
|
||||
tensorflow::port::InitMain(argv[0], &argc, &argv);
|
||||
|
||||
fprintf(stderr, "%s\n", FLAGS_graph_path.c_str());
|
||||
|
||||
std::vector<tensorflow::string> device_regexes =
|
||||
Split(FLAGS_device_regexes, ',', tensorflow::str_util::SkipEmpty());
|
||||
std::vector<tensorflow::string> account_type_regexes =
|
||||
Split(FLAGS_account_type_regexes, ',', tensorflow::str_util::SkipEmpty());
|
||||
std::vector<tensorflow::string> start_name_regexes =
|
||||
Split(FLAGS_start_name_regexes, ',', tensorflow::str_util::SkipEmpty());
|
||||
std::vector<tensorflow::string> trim_name_regexes =
|
||||
Split(FLAGS_trim_name_regexes, ',', tensorflow::str_util::SkipEmpty());
|
||||
std::vector<tensorflow::string> show_name_regexes =
|
||||
Split(FLAGS_show_name_regexes, ',', tensorflow::str_util::SkipEmpty());
|
||||
std::vector<tensorflow::string> hide_name_regexes =
|
||||
Split(FLAGS_hide_name_regexes, ',', tensorflow::str_util::SkipEmpty());
|
||||
std::vector<tensorflow::string> select =
|
||||
Split(FLAGS_select, ',', tensorflow::str_util::SkipEmpty());
|
||||
|
||||
tensorflow::string cmd = "";
|
||||
if (argc == 1 && FLAGS_graph_path.empty()) {
|
||||
printf("1) go/tfprof: Tutorial.\n");
|
||||
printf("2) tfprof help: Detail help information.\n");
|
||||
printf(
|
||||
"3) tfprof --graph_path <GraphDef proto text file>: "
|
||||
"Profiling model structure, tensor shape and # parameters.\n");
|
||||
printf(
|
||||
"4) tfprof --graph_path <GraphDef proto text file> \\\n"
|
||||
" --run_meta_path <RunMetadata proto binary file> \\\n"
|
||||
" --op_log_path <tensorflow::tfprof::OpLog proto binary file> "
|
||||
"\\\n"
|
||||
" --checkpoint_path <TensorFlow Checkpoint file>: "
|
||||
"Profiling everything!\n");
|
||||
return 0;
|
||||
} else if (argc > 1) {
|
||||
if (tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[3]) {
|
||||
tensorflow::tfprof::PrintHelp();
|
||||
return 0;
|
||||
}
|
||||
if (tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[0] ||
|
||||
tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[1]) {
|
||||
cmd = argv[1];
|
||||
}
|
||||
}
|
||||
|
||||
printf("Reading Files...\n");
|
||||
std::unique_ptr<tensorflow::GraphDef> graph(new tensorflow::GraphDef());
|
||||
TF_CHECK_OK(tensorflow::tfprof::ReadGraphDefText(
|
||||
tensorflow::Env::Default(), FLAGS_graph_path, graph.get()));
|
||||
|
||||
std::unique_ptr<tensorflow::RunMetadata> run_meta(
|
||||
new tensorflow::RunMetadata());
|
||||
if (!ReadBinaryProto(tensorflow::Env::Default(), FLAGS_run_meta_path,
|
||||
run_meta.get())
|
||||
.ok()) {
|
||||
run_meta.release();
|
||||
}
|
||||
|
||||
std::unique_ptr<tensorflow::tfprof::OpLog> op_log(
|
||||
new tensorflow::tfprof::OpLog());
|
||||
if (!ReadBinaryProto(tensorflow::Env::Default(), FLAGS_op_log_path,
|
||||
op_log.get())
|
||||
.ok()) {
|
||||
op_log.release();
|
||||
}
|
||||
|
||||
std::unique_ptr<tensorflow::checkpoint::CheckpointReader> ckpt_reader;
|
||||
TF_Status* status = TF_NewStatus();
|
||||
if (!FLAGS_checkpoint_path.empty()) {
|
||||
ckpt_reader.reset(new tensorflow::checkpoint::CheckpointReader(
|
||||
FLAGS_checkpoint_path, status));
|
||||
if (TF_GetCode(status) != TF_OK) {
|
||||
fprintf(stderr, "%s\n", TF_Message(status));
|
||||
TF_DeleteStatus(status);
|
||||
return 1;
|
||||
}
|
||||
TF_DeleteStatus(status);
|
||||
}
|
||||
|
||||
tensorflow::tfprof::TFStats tf_stat(std::move(graph), std::move(run_meta),
|
||||
std::move(op_log),
|
||||
std::move(ckpt_reader));
|
||||
tensorflow::tfprof::Options opts(
|
||||
FLAGS_max_depth, FLAGS_min_bytes, FLAGS_min_micros, FLAGS_min_params,
|
||||
FLAGS_min_float_ops, device_regexes, FLAGS_order_by, account_type_regexes,
|
||||
start_name_regexes, trim_name_regexes, show_name_regexes,
|
||||
hide_name_regexes, FLAGS_account_displayed_op_only, select, FLAGS_viz,
|
||||
FLAGS_dump_to_file);
|
||||
|
||||
if (!cmd.empty()) {
|
||||
tf_stat.PrintGraph(cmd, opts);
|
||||
return 0;
|
||||
}
|
||||
|
||||
linenoiseSetCompletionCallback(completion);
|
||||
linenoiseHistoryLoad(".tfprof_history.txt");
|
||||
|
||||
for (char* line = nullptr; (line = linenoise("tfprof> ")) != nullptr;) {
|
||||
tensorflow::string line_s = tensorflow::string(line);
|
||||
free(line);
|
||||
|
||||
if (line_s.empty()) {
|
||||
printf("%s", opts.ToString().c_str());
|
||||
continue;
|
||||
}
|
||||
linenoiseHistoryAdd(line_s.c_str());
|
||||
linenoiseHistorySave(".tfprof_history.txt");
|
||||
|
||||
tensorflow::tfprof::Options new_opts = opts;
|
||||
tensorflow::Status s =
|
||||
tensorflow::tfprof::ParseCmdLine(line_s, &cmd, &new_opts);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "E: %s\n", s.ToString().c_str());
|
||||
continue;
|
||||
}
|
||||
if (cmd == tensorflow::tfprof::kCmds[2]) {
|
||||
opts = new_opts;
|
||||
} else if (cmd == tensorflow::tfprof::kCmds[3]) {
|
||||
tensorflow::tfprof::PrintHelp();
|
||||
} else {
|
||||
tf_stat.PrintGraph(cmd, new_opts);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
49
tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
Normal file
49
tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
Normal file
@ -0,0 +1,49 @@
|
||||
syntax = "proto2";
|
||||
|
||||
import "tensorflow/core/framework/tensor_shape.proto";
|
||||
import "tensorflow/core/framework/types.proto";
|
||||
|
||||
package tensorflow.tfprof;
|
||||
|
||||
message TFProfTensorProto {
|
||||
optional DataType dtype = 1;
|
||||
// Flatten tensor in row-major.
|
||||
// Only one of the following array is set.
|
||||
repeated double value_double = 2;
|
||||
repeated int64 value_int64 = 3;
|
||||
repeated string value_str = 4;
|
||||
}
|
||||
|
||||
message TFProfNode {
|
||||
// op name.
|
||||
optional string name = 1;
|
||||
// tensor value restored from checkpoint.
|
||||
optional TFProfTensorProto tensor_value = 15;
|
||||
// op execution time.
|
||||
optional int64 exec_micros = 2;
|
||||
// Total requested bytes by the op.
|
||||
optional int64 requested_bytes = 3;
|
||||
// Number of parameters if available.
|
||||
optional int64 parameters = 4;
|
||||
// Number of float operations.
|
||||
optional int64 float_ops = 13;
|
||||
// Number of inputs to the op.
|
||||
optional int64 inputs = 5;
|
||||
// Device the op is assigned to.
|
||||
optional string device = 10;
|
||||
|
||||
// The following are the aggregated stats from all accounted descendants and
|
||||
// the op itself. The actual descendants depend on the data structure used
|
||||
// (scope, graph).
|
||||
optional int64 total_exec_micros = 6;
|
||||
optional int64 total_requested_bytes = 7;
|
||||
optional int64 total_parameters = 8;
|
||||
optional int64 total_float_ops = 14;
|
||||
optional int64 total_inputs = 9;
|
||||
|
||||
// shape information, if available.
|
||||
repeated TensorShapeProto shapes = 11;
|
||||
// Descendants of the graph. The actual descendants depend on the data
|
||||
// structure used (scope, graph).
|
||||
repeated TFProfNode children = 12;
|
||||
}
|
@ -47,6 +47,22 @@ bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ParseInt64Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
|
||||
tensorflow::int64* dst, bool* value_parsing_ok) {
|
||||
*value_parsing_ok = true;
|
||||
if (arg.Consume("--") && arg.Consume(flag) && arg.Consume("=")) {
|
||||
char extra;
|
||||
if (sscanf(arg.data(), "%lld%c", dst, &extra) != 1) {
|
||||
LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag
|
||||
<< ".";
|
||||
*value_parsing_ok = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
|
||||
bool* dst, bool* value_parsing_ok) {
|
||||
*value_parsing_ok = true;
|
||||
@ -78,6 +94,9 @@ bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
|
||||
Flag::Flag(const char* name, tensorflow::int32* dst)
|
||||
: name_(name), type_(TYPE_INT), int_value_(dst) {}
|
||||
|
||||
Flag::Flag(const char* name, tensorflow::int64* dst)
|
||||
: name_(name), type_(TYPE_INT64), int64_value_(dst) {}
|
||||
|
||||
Flag::Flag(const char* name, bool* dst)
|
||||
: name_(name), type_(TYPE_BOOL), bool_value_(dst) {}
|
||||
|
||||
@ -88,6 +107,8 @@ bool Flag::Parse(string arg, bool* value_parsing_ok) const {
|
||||
bool result = false;
|
||||
if (type_ == TYPE_INT) {
|
||||
result = ParseInt32Flag(arg, name_, int_value_, value_parsing_ok);
|
||||
} else if (type_ == TYPE_INT64) {
|
||||
result = ParseInt64Flag(arg, name_, int64_value_, value_parsing_ok);
|
||||
} else if (type_ == TYPE_BOOL) {
|
||||
result = ParseBoolFlag(arg, name_, bool_value_, value_parsing_ok);
|
||||
} else if (type_ == TYPE_STRING) {
|
||||
|
@ -49,6 +49,7 @@ namespace tensorflow {
|
||||
class Flag {
|
||||
public:
|
||||
Flag(const char* name, int32* dst1);
|
||||
Flag(const char* name, int64* dst1);
|
||||
Flag(const char* name, bool* dst);
|
||||
Flag(const char* name, string* dst);
|
||||
|
||||
@ -56,8 +57,9 @@ class Flag {
|
||||
|
||||
private:
|
||||
string name_;
|
||||
enum { TYPE_INT, TYPE_BOOL, TYPE_STRING } type_;
|
||||
enum { TYPE_INT, TYPE_INT64, TYPE_BOOL, TYPE_STRING } type_;
|
||||
int* int_value_;
|
||||
int64* int64_value_;
|
||||
bool* bool_value_;
|
||||
string* string_value_;
|
||||
};
|
||||
|
@ -33,19 +33,21 @@ std::vector<char*> CharPointerVectorFromStrings(
|
||||
|
||||
TEST(CommandLineFlagsTest, BasicUsage) {
|
||||
int some_int = 10;
|
||||
int64 some_int64 = 21474836470; // max int32 is 2147483647
|
||||
bool some_switch = false;
|
||||
tensorflow::string some_name = "something";
|
||||
int argc = 4;
|
||||
int argc = 5;
|
||||
std::vector<tensorflow::string> argv_strings = {
|
||||
"program_name", "--some_int=20", "--some_switch",
|
||||
"--some_name=somethingelse"};
|
||||
"program_name", "--some_int=20", "--some_int64=214748364700",
|
||||
"--some_switch", "--some_name=somethingelse"};
|
||||
std::vector<char*> argv_array = CharPointerVectorFromStrings(argv_strings);
|
||||
bool parsed_ok =
|
||||
ParseFlags(&argc, argv_array.data(), {Flag("some_int", &some_int),
|
||||
Flag("some_switch", &some_switch),
|
||||
Flag("some_name", &some_name)});
|
||||
bool parsed_ok = ParseFlags(
|
||||
&argc, argv_array.data(),
|
||||
{Flag("some_int", &some_int), Flag("some_int64", &some_int64),
|
||||
Flag("some_switch", &some_switch), Flag("some_name", &some_name)});
|
||||
EXPECT_EQ(true, parsed_ok);
|
||||
EXPECT_EQ(20, some_int);
|
||||
EXPECT_EQ(214748364700, some_int64);
|
||||
EXPECT_EQ(true, some_switch);
|
||||
EXPECT_EQ("somethingelse", some_name);
|
||||
EXPECT_EQ(argc, 1);
|
||||
|
@ -152,6 +152,14 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
actual = "@grpc//:grpc++_unsecure",
|
||||
)
|
||||
|
||||
native.new_git_repository(
|
||||
name = "linenoise",
|
||||
commit = "c894b9e59f02203dbe4e2be657572cf88c4230c3",
|
||||
init_submodules = True,
|
||||
remote = "https://github.com/antirez/linenoise.git",
|
||||
build_file = str(Label("//:linenoise.BUILD")),
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
name = "jsoncpp_git",
|
||||
url = "http://github.com/open-source-parsers/jsoncpp/archive/11086dd6a7eba04289944367ca82cea71299ed70.tar.gz",
|
||||
|
Loading…
Reference in New Issue
Block a user