diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 7a2105201f3..feb1d490f88 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -121,8 +121,6 @@ filegroup(
"//tensorflow/contrib/tensorboard:all_files",
"//tensorflow/contrib/testing:all_files",
"//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
- "//tensorflow/contrib/tfprof/tools/tfprof:all_files",
- "//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
"//tensorflow/contrib/training:all_files",
"//tensorflow/contrib/util:all_files",
"//tensorflow/core:all_files",
@@ -180,6 +178,8 @@ filegroup(
"//tensorflow/tools/proto_text:all_files",
"//tensorflow/tools/quantization:all_files",
"//tensorflow/tools/test:all_files",
+ "//tensorflow/tools/tfprof:all_files",
+ "//tensorflow/tools/tfprof/internal:all_files",
"//tensorflow/user_ops:all_files",
"//third_party/hadoop:all_files",
],
diff --git a/tensorflow/contrib/tfprof/BUILD b/tensorflow/contrib/tfprof/BUILD
index d55bda1bd05..e817cb86dfd 100644
--- a/tensorflow/contrib/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/BUILD
@@ -12,6 +12,7 @@ py_library(
srcs_version = "PY2AND3",
visibility = ["//tensorflow:__subpackages__"],
deps = [
+ "//tensorflow/contrib/tfprof/python/tools/tfprof:model_analyzer",
"//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
],
)
diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md
index 013be486767..e103cb21216 100644
--- a/tensorflow/contrib/tfprof/README.md
+++ b/tensorflow/contrib/tfprof/README.md
@@ -20,434 +20,9 @@ and measures system performance.
4. Explore model based on name scope or graph structure.
5. Selectively grouping/filtering/accounting/ordering ops.
-### Interfaces
+tfprof can be used as CommandLine Interface (CLI) and Python API.
+CLI locates in tensorflow/tools/tfprof.
+Python API locates in tensorflow/contrib/tfprof.
+Tutorial locates in tensorflow/tools/tfprof/README.md
-[CLI Tutorials](#cli-tutorials):
-It supports interactive mode for exploration and single-shot mode for
-scripts. Outputs can be dumped to files or printed in terminal.
-
-Python API Tutorials: Python API is not released yet.
-
-## CLI Tutorials
-
-Tutorials are based on a 32 layers ResNet.
-TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
-
-### Examples
-
-1) Start `tfprof` command line tool
-
-```shell
-# Build the tool.
-bazel build -c opt tensorflow/contrib/tfprof/...
-
-# Help information, including detail 'option' instructions.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
-#
-# The following commands will start tfprof interactive mode.
-#
-# Profile model shapes and parameters only.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
- --graph_path=/graph.pbtxt
-#
-# Additionally profile checkpoint statistics and values.
-# Use '-account_type_regexes _checkpoint_variables' to select
-# checkpoint tensors.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
- --graph_path=graph.pbtxt \
- --checkpoint_path=model.ckpt
-#
-# Additionally profile ops requested memory and timing.
-# See CLI Input Files section on generating run_meta file.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
- --graph_path=graph.pbtxt \
- --run_meta_path=run_meta \
- --checkpoint_path=model.ckpt
-#
-# tfprof_log is used to define customized op types and float ops.
-# Use tfprof_logger.write_op_log() to create tfprof_log.
-# See 11) in Examples section on generating tfprof_log file.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
- --graph_path=graph.pbtxt \
- --run_meta_path=run_meta \
- --op_log_path=tfprof_log \
- --checkpoint_path=model.ckpt
-```
-Note that `graph.pbtxt` is an ASCII text format.
-
-2) Press enter to show the default options
-
-```shell
-tfprof>
-tfprof>
--max_depth 4
--min_bytes 0
--min_micros 0
--min_params 0
--min_float_ops 0
--device_regexes .*
--order_by name
--account_type_regexes Variable
--start_name_regexes .*
--trim_name_regexes
--show_name_regexes .*
--hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
--account_displayed_op_only false
-# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
-# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
--select params
--viz false
--dump_to_file
-```
-
-3) I want to see the `BatchNorm`'s gamma value in checkpoint.
-
-```shell
-# Requires --graph_path, --checkpoint_path.
-tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
-_TFProfRoot ()
- unit_1_0/shared_activation/init_bn/gamma ()
-[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
- unit_1_0/sub2/bn2/gamma ()
-[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
-```
-
-4) I want to see my checkpoint tensors shape and number of parameters.
-
-```shell
-# Requires --graph_path, --checkpoint_path.
-# Increase -max_depth to see all tensors.
-tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
-_TFProfRoot (--/930.58k params)
- global_step (0/0 params)
- init/init_conv/DW (3x3x3x16, 432/864 params)
- pool_logit/DW (64x10, 640/1.28k params)
- pool_logit/DW/Momentum (64x10, 640/640 params)
- pool_logit/biases (10, 10/20 params)
- pool_logit/biases/Momentum (10, 10/10 params)
- unit_last/final_bn/beta (64, 64/128 params)
- unit_last/final_bn/gamma (64, 64/128 params)
- unit_last/final_bn/moving_mean (64, 64/64 params)
- unit_last/final_bn/moving_variance (64, 64/64 params)
-```
-
-5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
-it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
-graph dependencies.
-
-```shell
-# Requires --graph_path, --run_meta_path.
-tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
-_TFProfRoot (0us/3.61sec)
- init/init_conv/Conv2D (11.75ms/3.10sec)
- random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
- unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
- unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
- unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
- unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
-```
-
-6) I want to know the expensive operations during the back propagation.
-Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
-command to explore based on name scope hierarchies.
-
-```shell
-# Requires --graph_path, --run_meta_path.
-tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
-_TFProfRoot (0us/2.29sec)
- gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
- gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
- gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
- gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
- gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
- gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
- ...
-```
-
-7) Show the number of float operations in the model.
-Note: float operations calculation depends on
-1) op.RegisterStatistics. If an op doesn’t
-have RegisterStatistics defined, its float operations cannot be counted.
-2) fully defined shape is also necessary in order to calculate flops.
-float operations number is provided by tensorflow::tfprof::OpLog logged from
-Python API.
-
-```shell
-# Requires --graph_path, --op_log_path.
-tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
-_TFProfRoot (0/17.63b flops)
- gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
- gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
- init/init_conv/Conv2D (113.25m/113.25m flops)
- pool_logit/xw_plus_b (1.28k/165.12k flops)
- pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
- unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
- unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
- unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
- unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
- ...
-```
-
-8) Show the number of parameters of all `tf.trainable_variables()` in the model.
-
-```shell
-# Requires --graph_path --op_log_path.
-# store option for future commands.
-tfprof> set -account_type_regexes _trainable_variables
-tfprof> scope -max_depth 4 -select params
-_TFProfRoot (--/464.15k params)
- init/init_conv/DW (3x3x3x16, 432/432 params)
- pool_logit/DW (64x10, 640/640 params)
- pool_logit/biases (10, 10/10 params)
- unit_last/final_bn/beta (64, 64/64 params)
- unit_last/final_bn/gamma (64, 64/64 params)
-```
-
-Where does “_trainable_variables” come from? It is from the OpLog file
-generated by write_op_log() Python API. write_op_log() help users create some
-common op types implicitly. Users can define their own op types and log it
-through the write_op_log() API.
-
-9) What if I’m lazy and don’t want to define op type? I have given my ops
-well-defined names in my model’s code. And want to use names to select a group
-of ops. Let’s try it!
-
-```shell
-tfprof> set -account_type_regexes .*
-tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
-_TFProfRoot (0/18.43k params)
- unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
- unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
-```
-
-The above command allows you to filter ops that match specific names.
-`-account_displayed_op_only` asks tfprof to only account ops displayed
-in terminal. Otherwise, tfprof accounts all ops matched by
-`-account_type_regexes` recursively even if they are hidden due to some
-options such as -max_depth.
-
-10) TensorFlow has built-in op types. For example, built-in op type `Variable`
-seems to include `Variable's` created by your model. However, be careful when
-depending on it because TensorFlow creates extra `Variable` ops implicitly and
-the implicitly created ops can have the same prefix as the `Variable's` you
-defined.
-
-In the following example, extra `Variables` are created and “/Momentum” is
-appended to their names. This might cause you “model capacity” calculation
-to get wrong.
-
-```shell
-tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
-_TFProfRoot (--/930.58k params)
- global_step (1/1 params)
- init/init_conv/DW (3x3x3x16, 432/864 params)
- pool_logit/DW (64x10, 640/1.28k params)
- pool_logit/DW/Momentum (64x10, 640/640 params)
- pool_logit/biases (10, 10/20 params)
- pool_logit/biases/Momentum (10, 10/10 params)
- unit_last/final_bn/beta (64, 64/128 params)
- unit_last/final_bn/gamma (64, 64/128 params)
- unit_last/final_bn/moving_mean (64, 64/64 params)
- unit_last/final_bn/moving_variance (64, 64/64 params)
-```
-
-
-11) A example of defining extra op type for ops using `OpLog`
-
-First, in Python code, create an `OpLog` proto and add op type
-information to it:
-
-```python
-
-op_log = tfprof_log_pb2.OpLog()
-entry = op_log.log_entries.add()
-entry.name = 'pool_logit/DW'
-entry.types.append('pool_logit')
-entry = op_log.log_entries.add()
-entry.name = 'pool_logit/biases'
-# Alternatively:
-# var = tf.get_variable(xxx)
-# entry.name = var.op.name
-entry.types.append('pool_logit')
-```
-
-Second, call write_op_log to write the OpLog proto.
-
-```python
-tf.tfprof.tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
-```
-
-Third, when starting the tfprof tool, specify
-"--op_log_path /tmp/my_op_log_dir/op_log"
-
-```shell
-tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
-_TFProfRoot (--/650 params)
- pool_logit/DW (64x10, 640/640 params)
- pool_logit/biases (10, 10/10 params)
-```
-
-Note that when you call
-`tf.tfprof.tfprof_logger.write_op_log(...)`, the tool adds all `Variables`
-inside `tf.trainable_variables()` to `_trainable_variables`.
-
-12) Run tfprof in one-shot mode and dump result to file.
-
-```shell
-# Printed to stdout if --dump_to_file is not set.
-tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt \
- --max_depth 3 \
- --dump_to_file "/tmp/dump"
-Reading Files...
-Parsing GraphDef...
-Preparing Views...
-
-cat /tmp/dump
-_TFProfRoot (--/930.58k params)
- global_step (0/0 params)
- pool_logit/DW (64x10, 640/1.28k params)
- pool_logit/biases (10, 10/20 params)
-```
-
-13) Analyze how balanced Variable are on parameter servers.
-
-In this tutorial, I'm going to use a seq2seq model, which are split
-on several gpus at workers and several parameter servers.
-
-In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
-gpu0. They share an op_type called 'gpu0'.
-
-```shell
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
- --graph_path ~/tfprof/textsum/graph.pbtxt \
- --run_meta_path ~/tfprof/textsum/run_meta
-
-# Looks like ps task 1 is holding twice more parameters than task 0.
-tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
-_TFProfRoot (--/25.81m params)
-tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
-_TFProfRoot (--/58.84m params)
-```
-
-### CLI Input Files
-
-tfprof command line inference (CLI) loads dumped files from a tensorflow model.
-Convert them into in-memory data structures. To use it, users need to specify
-the locations of the dumped files. The following are the dumped files loaded
-by tfprof:
-
---graph_path: GraphDef text file (required). Used to build in-memory
-representation of the model. For example, graph.pbtxt written by tf.Supervisor
-is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
-using tf.Graph.as_graph_def() or other API.
-
---run_meta_path: tensorflow::RunMetadata.
-Used to get the memory and time consumption of
-each op of the model. Users need to enable it. For example, the following code
-snippet writes a RunMetadata file:
-
-```python
-run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
-run_metadata = config_pb2.RunMetadata()
-# Once a while, call it the get the RunMeta.
-_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
-with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
- f.write(run_metadata.SerializeToString())
-```
-
---op_log_path:
-tensorflow::tfprof::OpLog. A proto used to provide extra op information
-for ops. By giving a group of ops a type name, users can easily aggregate the
-statistics for those ops without accidently missing or including extra ops.
-tfprof exposes the following Python API to add op information and logging.
-
-```python
-tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
-```
-
---checkpoint_path:
-TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
-provides checkpointed tensors' values.
-
-
-## Design
-
-
-### In-memory representation
-
-Scope: This representation organizes ops based on name scope hierarchy,
-similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
-For example op1 with name “name1/name2” is a child of op2 with name “name1”.
-
-Graph: The representation organizes ops based on op inputs. Hence it is
-a graph structure. The graph is a “directed acyclic graph” (hopefully), with
-direction from “output to input”. The direction is design this way so that users
-can trace from “result” to its “sources”.
-
-### Command line options
-
-tfprof’s major goals are to measure system performance and quicly analyze
-model architectures. Hence, its commands and options should allow users to achieve
-these 2 goals easily.
-
-graph: It is expected that users will mostly use graph representation to
-debug system performance. Hence, tfprof supports graph command, which pulls the
-graph in-memory representation described above.
-
-scope: It is expected that some users might want to explore their model
-statistics using the name scope information they defined in the Python codes.
-Hence, tfprof supports “scope” command, which pulls the tree in-memory
-representation.
-
-set: It is used to store the options so that user doesn’t need to
-re-type the same option again and again in the follow up command line. Note that
-tfprof has traditional terminal’s history and auto-complete support.
-
-help: print help information.
-
-Options: Run “tfprof help” to get detailed explanations.
-
-```python
-"-max_depth",
-"-min_bytes",
-"-min_micros",
-"-min_params",
-"-min_float_ops",
-"-order_by",
-"-account_type_regexes",
-"-start_name_regexes",
-"-trim_name_regexes",
-"-show_name_regexes",
-"-hide_name_regexes",
-"-account_displayed_op_only",
-"-select",
-"-viz", # Only supported for graph command.
-"-dump_to_file",
-```
-
-A key design is that stats are aggregated from descendants up to ancestors.
-`-account_type_regexes` is used to decide which ops stat is accounted. It makes
-decision based on op type. Usually set it to `.*` if no extra type information
-is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
-`-min/max` and `-show/hide/trim/start` options are only used the optionally
-displayed or hide ops based on ops’ name and stats. However, they don’t prevent
-tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
-aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
-an option to break this rule. When it is set, only displayed ops are accounted.
-
-Regexes are all comma-separated, for example `-show_name_regexes`
-`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
-is not expected to show up in op names.
-
-`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
-(notice the indent printed) are sorted according to order_by.
-
-## Future Work
-
-* Load SummaryWriter event logs so that it can show the latest summary value.
-
-* Better sorting and aggregation of outputs. Easier comprehension.
-
-* Currently, shape information is based on `graph.pbtxt`. When the shape
-information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
-and `Checkpoint` to complete shape information.
+Enjoy!
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/__init__.py b/tensorflow/contrib/tfprof/__init__.py
index ce777979b96..129dad2726c 100644
--- a/tensorflow/contrib/tfprof/__init__.py
+++ b/tensorflow/contrib/tfprof/__init__.py
@@ -17,5 +17,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from tensorflow.contrib.tfprof.python.tools.tfprof import model_analyzer
from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
from tensorflow.python.util.all_util import make_all
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
index 87a8311486f..07677c6ed73 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
@@ -3,14 +3,36 @@ licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:public"])
load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
+
+py_library(
+ name = "model_analyzer",
+ srcs = ["model_analyzer.py"],
+ srcs_version = "PY2AND3",
+ deps = [
+ "//tensorflow/contrib/tfprof/python/tools/tfprof:pywrap_tensorflow_print_model_analysis_lib",
+ "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
+ "//tensorflow/tools/tfprof:protos_all_py",
+ ],
+)
+
+py_test(
+ name = "model_analyzer_test",
+ srcs = ["model_analyzer_test.py"],
+ srcs_version = "PY2AND3",
+ deps = [
+ ":model_analyzer",
+ "//tensorflow:tensorflow_py",
+ ],
+)
py_library(
name = "tfprof_logger",
srcs = ["tfprof_logger.py"],
srcs_version = "PY2AND3",
deps = [
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
"//tensorflow/python:framework_for_generated_wrappers",
+ "//tensorflow/tools/tfprof:protos_all_py",
],
)
@@ -20,7 +42,34 @@ tf_py_test(
additional_deps = [
":tfprof_logger",
"//tensorflow:tensorflow_py",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
+ "//tensorflow/tools/tfprof:protos_all_py",
+ ],
+)
+
+tf_py_wrap_cc(
+ name = "pywrap_tensorflow_print_model_analysis_lib",
+ srcs = ["pywrap_tensorflow_print_model_analysis.i"],
+ swig_includes = [
+ "//tensorflow/python:lib/core/strings.i",
+ "//tensorflow/python:platform/base.i",
+ ],
+ deps = [
+ "//tensorflow/core:framework_headers_lib",
+ "//tensorflow/tools/tfprof/internal:print_model_analysis_hdr",
+ "//util/python:python_headers",
+ ],
+)
+
+py_test(
+ name = "print_model_analysis_test",
+ srcs = ["print_model_analysis_test.py"],
+ srcs_version = "PY2AND3",
+ deps = [
+ ":pywrap_tensorflow_print_model_analysis_lib",
+ "//tensorflow:tensorflow_py",
+ "//tensorflow/python:framework_test_lib",
+ "//tensorflow/python:platform_test",
+ "//tensorflow/tools/tfprof:protos_all_py",
],
)
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
new file mode 100644
index 00000000000..92943b1adb4
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
@@ -0,0 +1,187 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model Analyzer.
+
+Analyze model, including shape, params, time, memory, structure, etc.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
+from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
+from tensorflow.tools.tfprof import tfprof_options_pb2
+from tensorflow.tools.tfprof import tfprof_output_pb2
+
+# pylint: disable=bad-whitespace
+# pylint: disable=bad-continuation
+# 2 example tfprof_options for print_model_analysis API.
+#
+# Show the parameter statistics of trainable variables.
+TRAINABLE_VARS_PARAMS_STAT_OPTIONS = {
+ 'max_depth': 10000,
+ 'min_bytes': 0,
+ 'min_micros': 0,
+ 'min_params': 0,
+ 'min_float_ops': 0,
+ 'device_regexes': ['.*'],
+ 'order_by': 'name',
+ 'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
+ 'start_name_regexes': ['.*'],
+ 'trim_name_regexes': [],
+ 'show_name_regexes': ['.*'],
+ 'hide_name_regexes': [],
+ 'account_displayed_op_only': True,
+ 'select': ['params'],
+ 'viz': False,
+ 'dump_to_file': ''
+}
+
+# Show the number float operations.
+FLOAT_OPS_OPTIONS = {
+ 'max_depth': 10000,
+ 'min_bytes': 0,
+ 'min_micros': 0,
+ 'min_params': 0,
+ 'min_float_ops': 1,
+ 'device_regexes': ['.*'],
+ 'order_by': 'float_ops',
+ 'account_type_regexes': ['.*'],
+ 'start_name_regexes': ['.*'],
+ 'trim_name_regexes': [],
+ 'show_name_regexes': ['.*'],
+ 'hide_name_regexes': [],
+ 'account_displayed_op_only': True,
+ 'select': ['float_ops'],
+ 'viz': False,
+ 'dump_to_file': ''
+}
+
+# Show number of parameters on parameter server 0.
+# It is recommended to provide`run_meta` argument
+# to have complete device placement info.
+PRINT_PARAMS_ON_DEVICE = {
+ 'max_depth': 1,
+ 'min_bytes': 0,
+ 'min_micros': 0,
+ 'min_params': 0,
+ 'min_float_ops': 0,
+ 'device_regexes': ['.*'],
+ 'order_by': 'name',
+ 'account_type_regexes': ['.*ps.*task:0.*'],
+ 'start_name_regexes': ['.*'],
+ 'trim_name_regexes': [],
+ 'show_name_regexes': ['.*'],
+ 'hide_name_regexes': [],
+ 'account_displayed_op_only': False,
+ 'select': ['device', 'params'],
+ 'viz': False,
+ 'dump_to_file': ''
+}
+
+# Show the timing stats and memory demands.
+PRINT_ALL_TIMING_MEMORY = {
+ 'max_depth': 10000,
+ 'min_bytes': 1, # Only >=1
+ 'min_micros': 1, # Only >=1
+ 'min_params': 0,
+ 'min_float_ops': 0,
+ 'device_regexes': ['.*'],
+ 'order_by': 'name',
+ 'account_type_regexes': ['.*'],
+ 'start_name_regexes': ['.*'],
+ 'trim_name_regexes': [],
+ 'show_name_regexes': ['.*'],
+ 'hide_name_regexes': [],
+ 'account_displayed_op_only': True,
+ 'select': ['micros', 'bytes'],
+ 'viz': False,
+ 'dump_to_file': ''
+}
+
+# pylint: enable=bad-whitespace
+# pylint: enable=bad-continuation
+
+
+def print_model_analysis(graph,
+ run_meta=None,
+ op_log=None,
+ tfprof_cmd='scope',
+ tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS):
+ """Print model statistics.
+
+ Prints the model statistics to stdout. Also returns the results
+ in a TFProfNode proto. See go/tfprof or run tfprof tool:
+ 'bazel run third_party/tensorflow/tools/tfprof help'
+
+ Examples:
+ Show the parameter/shape statistics of tf.trainable_variables().
+ print_model_analysis(sess.graph).
+
+ Show number of float ops. Only ops with RegisterStatistics defined
+ are counted.
+ show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS
+ print_model_analysis(sess.graph, tfprof_options=show_float_op_opts)
+
+ Args:
+ graph: tf.Graph.
+ run_meta: tensorflow::RunMetadata proto. When provided, also shows valid
+ timing and memory information when 'select' option contains
+ 'micros' and 'bytes'.
+ op_log: tensorflow::tfprof::OpLog proto. users can use this proto to
+ group together ops and use a op_type to select the group.
+ tfprof_cmd: string. Either 'scope' or 'graph'. 'scope' view organize
+ ops using their name scopes. 'graph' view organize ops using
+ their graph inputs.
+ tfprof_options: See 'tfprof help' for details.
+ Returns:
+ TFProfNode proto. Side effect: a formatted output to stdout.
+ """
+ # pylint: disable=protected-access
+ op_log = tfprof_logger._merge_default_with_oplog(graph, op_log, run_meta)
+ # pylint: enable=protected-access
+ opts = tfprof_options_pb2.OptionsProto()
+ opts.max_depth = tfprof_options['max_depth']
+ opts.min_bytes = tfprof_options['min_bytes']
+ opts.min_micros = tfprof_options['min_micros']
+ opts.min_params = tfprof_options['min_params']
+ opts.min_float_ops = tfprof_options['min_float_ops']
+ for p in tfprof_options['device_regexes']:
+ opts.device_regexes.append(p)
+ opts.order_by = tfprof_options['order_by']
+ for p in tfprof_options['account_type_regexes']:
+ opts.account_type_regexes.append(p)
+ for p in tfprof_options['start_name_regexes']:
+ opts.start_name_regexes.append(p)
+ for p in tfprof_options['trim_name_regexes']:
+ opts.trim_name_regexes.append(p)
+ for p in tfprof_options['show_name_regexes']:
+ opts.show_name_regexes.append(p)
+ for p in tfprof_options['hide_name_regexes']:
+ opts.hide_name_regexes.append(p)
+ opts.account_displayed_op_only = tfprof_options['account_displayed_op_only']
+ for p in tfprof_options['select']:
+ opts.select.append(p)
+ opts.viz = tfprof_options['viz']
+ opts.dump_to_file = tfprof_options['dump_to_file']
+
+ run_meta_str = run_meta.SerializeToString() if run_meta else b''
+ op_log_str = op_log.SerializeToString() if op_log else b''
+
+ tfprof_node = tfprof_output_pb2.TFProfNode()
+ tfprof_node.ParseFromString(
+ print_mdl.PrintModelAnalysis(
+ graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str,
+ tfprof_cmd.encode('utf-8'), opts.SerializeToString()))
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
new file mode 100644
index 00000000000..2673a64d333
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
@@ -0,0 +1,84 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow as tf
+
+
+class PrintModelAnalysisTest(tf.test.TestCase):
+
+ def _BuildSmallModel(self):
+ image = tf.zeros([2, 6, 6, 3])
+ kernel = tf.get_variable(
+ 'DW', [3, 3, 3, 6],
+ tf.float32,
+ initializer=tf.random_normal_initializer(stddev=0.001))
+ x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
+ kernel = tf.get_variable(
+ 'DW2', [2, 2, 6, 12],
+ tf.float32,
+ initializer=tf.random_normal_initializer(stddev=0.001))
+ x = tf.nn.conv2d(x, kernel, [1, 2, 2, 1], padding='SAME')
+ return x
+
+ def testDumpToFile(self):
+ opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
+ opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
+
+ with tf.Session() as sess:
+ _ = self._BuildSmallModel()
+ tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ sess.graph, tfprof_options=opts)
+
+ with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
+ self.assertEqual('_TFProfRoot (--/450 params)\n'
+ ' DW (3x3x3x6, 162/162 params)\n'
+ ' DW2 (2x2x6x12, 288/288 params)\n',
+ f.read().decode('utf-8'))
+
+ def testSelectEverything(self):
+ opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
+ opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
+ opts['account_type_regexes'] = ['.*']
+ opts['select'] = [
+ 'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types'
+ ]
+
+ with tf.Session() as sess:
+ x = self._BuildSmallModel()
+
+ sess.run(tf.initialize_all_variables())
+ run_meta = tf.RunMetadata()
+ _ = sess.run(x,
+ options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
+ run_metadata=run_meta)
+
+ tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ sess.graph, run_meta, tfprof_options=opts)
+
+ with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
+ # pylint: disable=line-too-long
+ self.assertEqual(
+ '_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
+ f.read().decode('utf-8'))
+ # pylint: enable=line-too-long
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
new file mode 100644
index 00000000000..4000f0024e8
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
@@ -0,0 +1,227 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""print_model_analysis test."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from google.protobuf import text_format
+from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
+from tensorflow.tools.tfprof import tfprof_options_pb2
+from tensorflow.tools.tfprof import tfprof_output_pb2
+
+# pylint: disable=bad-whitespace
+# pylint: disable=bad-continuation
+TEST_OPTIONS = {
+ 'max_depth': 10000,
+ 'min_bytes': 0,
+ 'min_micros': 0,
+ 'min_params': 0,
+ 'min_float_ops': 0,
+ 'device_regexes': ['.*'],
+ 'order_by': 'name',
+ 'account_type_regexes': ['.*'],
+ 'start_name_regexes': ['.*'],
+ 'trim_name_regexes': [],
+ 'show_name_regexes': ['.*'],
+ 'hide_name_regexes': [],
+ 'account_displayed_op_only': True,
+ 'select': ['params'],
+ 'viz': False
+}
+
+# pylint: enable=bad-whitespace
+# pylint: enable=bad-continuation
+
+
+class PrintModelAnalysisTest(tf.test.TestCase):
+
+ def _BuildSmallModel(self):
+ image = tf.zeros([2, 6, 6, 3])
+ kernel = tf.get_variable(
+ 'DW', [6, 6, 3, 6],
+ tf.float32,
+ initializer=tf.random_normal_initializer(stddev=0.001))
+ x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
+ return x
+
+ def testPrintModelAnalysis(self):
+ opts = tfprof_options_pb2.OptionsProto()
+ opts.max_depth = TEST_OPTIONS['max_depth']
+ opts.min_bytes = TEST_OPTIONS['min_bytes']
+ opts.min_micros = TEST_OPTIONS['min_micros']
+ opts.min_params = TEST_OPTIONS['min_params']
+ opts.min_float_ops = TEST_OPTIONS['min_float_ops']
+ for p in TEST_OPTIONS['device_regexes']:
+ opts.device_regexes.append(p)
+ opts.order_by = TEST_OPTIONS['order_by']
+ for p in TEST_OPTIONS['account_type_regexes']:
+ opts.account_type_regexes.append(p)
+ for p in TEST_OPTIONS['start_name_regexes']:
+ opts.start_name_regexes.append(p)
+ for p in TEST_OPTIONS['trim_name_regexes']:
+ opts.trim_name_regexes.append(p)
+ for p in TEST_OPTIONS['show_name_regexes']:
+ opts.show_name_regexes.append(p)
+ for p in TEST_OPTIONS['hide_name_regexes']:
+ opts.hide_name_regexes.append(p)
+ opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only']
+ for p in TEST_OPTIONS['select']:
+ opts.select.append(p)
+ opts.viz = TEST_OPTIONS['viz']
+
+ with tf.Session() as sess:
+ _ = self._BuildSmallModel()
+ tfprof_pb = tfprof_output_pb2.TFProfNode()
+ tfprof_pb.ParseFromString(
+ print_mdl.PrintModelAnalysis(sess.graph.as_graph_def(
+ ).SerializeToString(), b'', b'', b'scope', opts.SerializeToString()))
+
+ expected_pb = tfprof_output_pb2.TFProfNode()
+ text_format.Merge(r"""name: "_TFProfRoot"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 648
+ children {
+ name: "Conv2D"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "DW"
+ exec_micros: 0
+ requested_bytes: 0
+ parameters: 648
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 648
+ children {
+ name: "DW/Assign"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "DW/Initializer"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ children {
+ name: "DW/Initializer/random_normal"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ children {
+ name: "DW/Initializer/random_normal/RandomStandardNormal"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "DW/Initializer/random_normal/mean"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "DW/Initializer/random_normal/mul"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "DW/Initializer/random_normal/shape"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "DW/Initializer/random_normal/stddev"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ float_ops: 0
+ total_float_ops: 0
+ }
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "DW/read"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ float_ops: 0
+ total_float_ops: 0
+ }
+ children {
+ name: "zeros"
+ exec_micros: 0
+ requested_bytes: 0
+ total_exec_micros: 0
+ total_requested_bytes: 0
+ total_parameters: 0
+ float_ops: 0
+ total_float_ops: 0
+ }
+ float_ops: 0
+ total_float_ops: 0""", expected_pb)
+ self.assertEqual(expected_pb, tfprof_pb)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
new file mode 100644
index 00000000000..05b734a699f
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
@@ -0,0 +1,43 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%include "tensorflow/python/lib/core/strings.i"
+%include "tensorflow/python/platform/base.i"
+
+%{
+#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
+#include "tensorflow/core/framework/types.h"
+%}
+
+%typemap(typecheck) const string & = char *;
+%typemap(in) const string& (string temp) {
+ if (!_PyObjAs($input, &temp)) return NULL;
+ $1 = &temp;
+}
+%typemap(out) const string& {
+ $result = PyString_FromStringAndSize($1->data(), $1->size());
+}
+%apply const string & {string &};
+%apply const string & {string *};
+
+%ignoreall
+
+%unignore tensorflow;
+%unignore tensorflow::tfprof;
+%unignore tensorflow::tfprof::PrintModelAnalysis;
+
+%include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
+
+%unignoreall
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
index 53dd2632b69..1f710bc970c 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
@@ -24,8 +24,8 @@ import os
import sys
import tensorflow as tf
-from tensorflow.contrib.tfprof.tools.tfprof import tfprof_log_pb2
from tensorflow.python.framework import ops
+from tensorflow.tools.tfprof import tfprof_log_pb2
TRAINABLE_VARIABLES = '_trainable_variables'
REGISTERED_FLOP_STATS = 'flops'
@@ -85,7 +85,7 @@ def _get_logged_ops(graph, run_meta=None):
if node.name not in logged_ops:
entry = tfprof_log_pb2.OpLogEntry()
entry.name = node.name
- entry.float_ops = stats.value
+ entry.float_ops = int(stats.value)
logged_ops[entry.name] = entry
for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 0845028b5b7..92f41457a15 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1317,7 +1317,7 @@ cc_library(
"platform/regexp.h",
],
visibility = [
- "//tensorflow/contrib/tfprof:__subpackages__",
+ "//tensorflow/tools/tfprof:__subpackages__",
],
deps = [":lib_internal"],
)
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9624f8507cc..5bcf94a735c 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1861,6 +1861,7 @@ tf_py_wrap_cc(
"//tensorflow/c:tf_status_helper",
"//tensorflow/core:lib",
"//tensorflow/core/distributed_runtime:server_lib",
+ "//tensorflow/tools/tfprof/internal:print_model_analysis",
"//util/python:python_headers",
] + tf_additional_lib_deps(),
)
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/BUILD b/tensorflow/tools/tfprof/BUILD
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/BUILD
rename to tensorflow/tools/tfprof/BUILD
index da161b1ffa1..56e1fb7ae4d 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/BUILD
+++ b/tensorflow/tools/tfprof/BUILD
@@ -26,13 +26,13 @@ cc_binary(
":protos_all_cc",
"//tensorflow/c:c_api",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
- "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
- "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
"//tensorflow/core:framework_headers_lib",
"//tensorflow/core:framework_internal",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
+ "//tensorflow/tools/tfprof/internal:tfprof_options",
+ "//tensorflow/tools/tfprof/internal:tfprof_stats",
+ "//tensorflow/tools/tfprof/internal:tfprof_utils",
"@linenoise//:linenoise",
],
)
diff --git a/tensorflow/tools/tfprof/README.md b/tensorflow/tools/tfprof/README.md
new file mode 100644
index 00000000000..8618abe0d5e
--- /dev/null
+++ b/tensorflow/tools/tfprof/README.md
@@ -0,0 +1,455 @@
+# tfprof: A Profiling Tool for TensorFlow Models
+
+Internal User Please Use: go/tfprof
+
+Author: Xin Pan (xpan@google.com, github: panyx0718)
+
+Consultants: Jon Shlens, Pete Warden
+
+
+## Introduction
+
+tfprof is a profiling tool for TensorFlow that analyzes model architectures
+and measures system performance.
+
+###Major Features
+
+1. Measure model parameters, float operations, tensor shapes.
+2. Measure op execution times, requested memory size and device placement.
+3. Inspect checkpoint tensors' shapes and their values.
+4. Explore model based on name scope or graph structure.
+5. Selectively grouping/filtering/accounting/ordering ops.
+
+### Interfaces
+
+[CLI Tutorials](#cli-tutorials):
+It supports interactive mode for exploration and single-shot mode for
+scripts. Outputs can be dumped to files or printed in terminal.
+
+Python API Tutorials: Python API is not released yet.
+
+## CLI Tutorials
+
+Tutorials are based on a 32 layers ResNet.
+TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
+
+### Examples
+
+1) Start `tfprof` command line tool
+
+```shell
+# Build the tool.
+bazel build -c opt tensorflow/tools/tfprof/...
+
+# Help information, including detail 'option' instructions.
+bazel-bin/tensorflow/tools/tfprof/tfprof help
+#
+# The following commands will start tfprof interactive mode.
+#
+# Profile model shapes and parameters only.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+ --graph_path=graph.pbtxt
+#
+# Additionally profile checkpoint statistics and values.
+# Use '-account_type_regexes _checkpoint_variables' to select
+# checkpoint tensors.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+ --graph_path=graph.pbtxt \
+ --checkpoint_path=model.ckpt
+#
+# Additionally profile ops requested memory and timing.
+# See CLI Input Files section on generating run_meta file.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+ --graph_path=graph.pbtxt \
+ --run_meta_path=run_meta \
+ --checkpoint_path=model.ckpt
+#
+# tfprof_log is used to define customized op types and float ops.
+# Use tfprof_logger.write_op_log() to create tfprof_log.
+# See 11) in Examples section on generating tfprof_log file.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+ --graph_path=graph.pbtxt \
+ --run_meta_path=run_meta \
+ --op_log_path=tfprof_log \
+ --checkpoint_path=model.ckpt
+```
+Note that `graph.pbtxt` is an ASCII text format.
+
+2) Press enter to show the default options
+
+```shell
+tfprof>
+tfprof>
+-max_depth 4
+-min_bytes 0
+-min_micros 0
+-min_params 0
+-min_float_ops 0
+-device_regexes .*
+-order_by name
+-account_type_regexes Variable
+-start_name_regexes .*
+-trim_name_regexes
+-show_name_regexes .*
+-hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
+-account_displayed_op_only false
+# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
+# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
+-select params
+-viz false
+-dump_to_file
+```
+
+3) I want to see the `BatchNorm`'s gamma value in checkpoint.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
+_TFProfRoot ()
+ unit_1_0/shared_activation/init_bn/gamma ()
+[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
+ unit_1_0/sub2/bn2/gamma ()
+[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
+```
+
+4) I want to see my checkpoint tensors shape and number of parameters.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+# Increase -max_depth to see all tensors.
+tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
+_TFProfRoot (--/930.58k params)
+ global_step (0/0 params)
+ init/init_conv/DW (3x3x3x16, 432/864 params)
+ pool_logit/DW (64x10, 640/1.28k params)
+ pool_logit/DW/Momentum (64x10, 640/640 params)
+ pool_logit/biases (10, 10/20 params)
+ pool_logit/biases/Momentum (10, 10/10 params)
+ unit_last/final_bn/beta (64, 64/128 params)
+ unit_last/final_bn/gamma (64, 64/128 params)
+ unit_last/final_bn/moving_mean (64, 64/64 params)
+ unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
+it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
+graph dependencies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/3.61sec)
+ init/init_conv/Conv2D (11.75ms/3.10sec)
+ random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
+ unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
+ unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
+ unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
+ unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
+```
+
+6) I want to know the expensive operations during the back propagation.
+Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
+command to explore based on name scope hierarchies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/2.29sec)
+ gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
+ gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
+ gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
+ gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
+ gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
+ gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
+ ...
+```
+
+7) Show the number of float operations in the model.
+Note: float operations calculation depends on
+1) op.RegisterStatistics. If an op doesn’t
+have RegisterStatistics defined, its float operations cannot be counted.
+2) fully defined shape is also necessary in order to calculate flops.
+float operations number is provided by tensorflow::tfprof::OpLog logged from
+Python API.
+
+```shell
+# Requires --graph_path, --op_log_path.
+tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
+_TFProfRoot (0/17.63b flops)
+ gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
+ gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
+ init/init_conv/Conv2D (113.25m/113.25m flops)
+ pool_logit/xw_plus_b (1.28k/165.12k flops)
+ pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
+ unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
+ unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
+ unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
+ unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
+ ...
+```
+
+8) Show the number of parameters of all `tf.trainable_variables()` in the model.
+
+```shell
+# Requires --graph_path --op_log_path.
+# store option for future commands.
+tfprof> set -account_type_regexes _trainable_variables
+tfprof> scope -max_depth 4 -select params
+_TFProfRoot (--/464.15k params)
+ init/init_conv/DW (3x3x3x16, 432/432 params)
+ pool_logit/DW (64x10, 640/640 params)
+ pool_logit/biases (10, 10/10 params)
+ unit_last/final_bn/beta (64, 64/64 params)
+ unit_last/final_bn/gamma (64, 64/64 params)
+```
+
+Where does “_trainable_variables” come from? It is from the OpLog file
+generated by write_op_log() Python API. write_op_log() help users create some
+common op types implicitly. Users can define their own op types and log it
+through the write_op_log() API.
+
+9) What if I’m lazy and don’t want to define op type? I have given my ops
+well-defined names in my model’s code. And want to use names to select a group
+of ops. Let’s try it!
+
+```shell
+tfprof> set -account_type_regexes .*
+tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
+_TFProfRoot (0/18.43k params)
+ unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
+ unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
+```
+
+The above command allows you to filter ops that match specific names.
+`-account_displayed_op_only` asks tfprof to only account ops displayed
+in terminal. Otherwise, tfprof accounts all ops matched by
+`-account_type_regexes` recursively even if they are hidden due to some
+options such as -max_depth.
+
+10) TensorFlow has built-in op types. For example, built-in op type `Variable`
+seems to include `Variable's` created by your model. However, be careful when
+depending on it because TensorFlow creates extra `Variable` ops implicitly and
+the implicitly created ops can have the same prefix as the `Variable's` you
+defined.
+
+In the following example, extra `Variables` are created and “/Momentum” is
+appended to their names. This might cause you “model capacity” calculation
+to get wrong.
+
+```shell
+tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
+_TFProfRoot (--/930.58k params)
+ global_step (1/1 params)
+ init/init_conv/DW (3x3x3x16, 432/864 params)
+ pool_logit/DW (64x10, 640/1.28k params)
+ pool_logit/DW/Momentum (64x10, 640/640 params)
+ pool_logit/biases (10, 10/20 params)
+ pool_logit/biases/Momentum (10, 10/10 params)
+ unit_last/final_bn/beta (64, 64/128 params)
+ unit_last/final_bn/gamma (64, 64/128 params)
+ unit_last/final_bn/moving_mean (64, 64/64 params)
+ unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+
+11) A example of defining extra op type for ops using `OpLog`
+
+First, in Python code, create an `OpLog` proto and add op type
+information to it:
+
+```python
+
+op_log = tfprof_log_pb2.OpLog()
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/DW'
+entry.types.append('pool_logit')
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/biases'
+# Alternatively:
+# var = tf.get_variable(xxx)
+# entry.name = var.op.name
+entry.types.append('pool_logit')
+```
+
+Second, call write_op_log to write the OpLog proto.
+
+```python
+tf.contrib.tfprof.tfprof_logger.write_op_log(
+ sess.graph, /tmp/my_op_log_dir, op_log)
+```
+
+Third, when starting the tfprof tool, specify
+"--op_log_path /tmp/my_op_log_dir/op_log"
+
+```shell
+tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
+_TFProfRoot (--/650 params)
+ pool_logit/DW (64x10, 640/640 params)
+ pool_logit/biases (10, 10/10 params)
+```
+
+Note that when you call
+`tf.contrib.tfprof.tfprof_logger.write_op_log(...)`,
+the tool adds all `Variables` inside `tf.trainable_variables()` to
+`_trainable_variables`.
+
+12) Run tfprof in one-shot mode and dump result to file.
+
+```shell
+# Printed to stdout if --dump_to_file is not set.
+tfprof scope --graph_path=graph.pbtxt \
+ --max_depth=3 \
+ --dump_to_file="/tmp/dump"
+Reading Files...
+Parsing GraphDef...
+Preparing Views...
+
+cat /tmp/dump
+_TFProfRoot (--/930.58k params)
+ global_step (0/0 params)
+ pool_logit/DW (64x10, 640/1.28k params)
+ pool_logit/biases (10, 10/20 params)
+```
+
+13) Analyze how balanced Variable are on parameter servers.
+
+In this tutorial, I'm going to use a seq2seq model, which are split
+on several gpus at workers and several parameter servers.
+
+In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
+gpu0. They share an op_type called 'gpu0'.
+
+```shell
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+ --graph_path ~/tfprof/textsum/graph.pbtxt \
+ --run_meta_path ~/tfprof/textsum/run_meta
+
+# Looks like ps task 1 is holding twice more parameters than task 0.
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
+_TFProfRoot (--/25.81m params)
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
+_TFProfRoot (--/58.84m params)
+```
+
+### CLI Input Files
+
+tfprof command line inference (CLI) loads dumped files from a tensorflow model.
+Convert them into in-memory data structures. To use it, users need to specify
+the locations of the dumped files. The following are the dumped files loaded
+by tfprof:
+
+--graph_path: GraphDef text file (required). Used to build in-memory
+representation of the model. For example, graph.pbtxt written by tf.Supervisor
+is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
+using tf.Graph.as_graph_def() or other API.
+
+--run_meta_path: tensorflow::RunMetadata.
+Used to get the memory and time consumption of
+each op of the model. Users need to enable it. For example, the following code
+snippet writes a RunMetadata file:
+
+```python
+run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
+run_metadata = config_pb2.RunMetadata()
+# Once a while, call it the get the RunMeta.
+_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
+with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
+ f.write(run_metadata.SerializeToString())
+```
+
+--op_log_path:
+tensorflow::tfprof::OpLog. A proto used to provide extra op information
+for ops. By giving a group of ops a type name, users can easily aggregate the
+statistics for those ops without accidently missing or including extra ops.
+tfprof exposes the following Python API to add op information and logging.
+
+```python
+tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
+```
+
+--checkpoint_path:
+TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
+provides checkpointed tensors' values.
+
+
+## Design
+
+
+### In-memory representation
+
+Scope: This representation organizes ops based on name scope hierarchy,
+similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
+For example op1 with name “name1/name2” is a child of op2 with name “name1”.
+
+Graph: The representation organizes ops based on op inputs. Hence it is
+a graph structure. The graph is a “directed acyclic graph” (hopefully), with
+direction from “output to input”. The direction is design this way so that users
+can trace from “result” to its “sources”.
+
+### Command line options
+
+tfprof’s major goals are to measure system performance and quicly analyze
+model architectures. Hence, its commands and options should allow users to achieve
+these 2 goals easily.
+
+graph: It is expected that users will mostly use graph representation to
+debug system performance. Hence, tfprof supports graph command, which pulls the
+graph in-memory representation described above.
+
+scope: It is expected that some users might want to explore their model
+statistics using the name scope information they defined in the Python codes.
+Hence, tfprof supports “scope” command, which pulls the tree in-memory
+representation.
+
+set: It is used to store the options so that user doesn’t need to
+re-type the same option again and again in the follow up command line. Note that
+tfprof has traditional terminal’s history and auto-complete support.
+
+help: print help information.
+
+Options: Run “tfprof help” to get detailed explanations.
+
+```python
+"-max_depth",
+"-min_bytes",
+"-min_micros",
+"-min_params",
+"-min_float_ops",
+"-order_by",
+"-account_type_regexes",
+"-start_name_regexes",
+"-trim_name_regexes",
+"-show_name_regexes",
+"-hide_name_regexes",
+"-account_displayed_op_only",
+"-select",
+"-viz", # Only supported for graph command.
+"-dump_to_file",
+```
+
+A key design is that stats are aggregated from descendants up to ancestors.
+`-account_type_regexes` is used to decide which ops stat is accounted. It makes
+decision based on op type. Usually set it to `.*` if no extra type information
+is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
+`-min/max` and `-show/hide/trim/start` options are only used the optionally
+displayed or hide ops based on ops’ name and stats. However, they don’t prevent
+tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
+aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
+an option to break this rule. When it is set, only displayed ops are accounted.
+
+Regexes are all comma-separated, for example `-show_name_regexes`
+`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
+is not expected to show up in op names.
+
+`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
+(notice the indent printed) are sorted according to order_by.
+
+## Future Work
+
+* Load SummaryWriter event logs so that it can show the latest summary value.
+
+* Better sorting and aggregation of outputs. Easier comprehension.
+
+* Currently, shape information is based on `graph.pbtxt`. When the shape
+information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
+and `Checkpoint` to complete shape information.
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD b/tensorflow/tools/tfprof/internal/BUILD
similarity index 86%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
rename to tensorflow/tools/tfprof/internal/BUILD
index 7a4b4c0c98f..7476a5ad412 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
+++ b/tensorflow/tools/tfprof/internal/BUILD
@@ -1,5 +1,9 @@
package(
default_visibility = ["//tensorflow:__subpackages__"],
+ features = [
+ "-layering_check",
+ "-parse_headers",
+ ],
)
licenses(["notice"]) # Apache 2.0
@@ -18,10 +22,10 @@ cc_library(
":tfprof_show",
":tfprof_utils",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:regexp_internal",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
@@ -49,11 +53,11 @@ cc_library(
":tfprof_utils",
"//tensorflow/c:c_api",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:regexp_internal",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
@@ -69,10 +73,10 @@ cc_library(
":tfprof_tensor",
":tfprof_utils",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:regexp_internal",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
@@ -87,10 +91,10 @@ cc_library(
":tfprof_tensor",
":tfprof_utils",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:regexp_internal",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
@@ -109,12 +113,12 @@ tf_cc_test(
":tfprof_stats",
":tfprof_utils",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
@@ -138,6 +142,18 @@ cc_library(
deps = [
"//tensorflow/core:framework_headers_lib",
"//tensorflow/core:lib",
+ "//tensorflow/tools/tfprof:protos_all_cc",
+ ],
+)
+
+cc_library(
+ name = "print_model_analysis_hdr",
+ hdrs = [
+ "print_model_analysis.h",
+ ],
+ deps = [
+ "//tensorflow/core:framework_lite",
+ "//tensorflow/core:protos_all_cc",
],
)
@@ -149,10 +165,11 @@ cc_library(
":tfprof_options",
":tfprof_stats",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
+ alwayslink = 1,
)
tf_cc_test(
@@ -170,12 +187,12 @@ tf_cc_test(
":tfprof_stats",
":tfprof_utils",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
@@ -185,9 +202,9 @@ cc_library(
hdrs = ["tfprof_tensor.h"],
copts = ["-Wno-sign-compare"],
deps = [
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
@@ -203,12 +220,12 @@ tf_cc_test(
":tfprof_stats",
":tfprof_utils",
"//tensorflow/c:checkpoint_reader",
- "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
+ "//tensorflow/tools/tfprof:protos_all_cc",
],
)
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc b/tensorflow/tools/tfprof/internal/print_model_analysis.cc
similarity index 73%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
rename to tensorflow/tools/tfprof/internal/print_model_analysis.cc
index ab1e47b32dd..dfe4019fbb4 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
+++ b/tensorflow/tools/tfprof/internal/print_model_analysis.cc
@@ -13,20 +13,26 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
+#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
#include
#include
#include
#include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
namespace tensorflow {
namespace tfprof {
string PrintModelAnalysis(const string* graph, const string* run_meta,
const string* op_log, const string* command,
- const Options* options) {
+ const string* options) {
CHECK(graph) << "graph mustn't be null";
CHECK(command) << "command mustn't be null";
CHECK(options) << "options mustn't be null";
@@ -50,16 +56,18 @@ string PrintModelAnalysis(const string* graph, const string* run_meta,
TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
std::move(op_log_ptr), std::move(ckpt_reader));
- if (options->dump_to_file.empty()) {
+ Options opts = Options::FromProtoStr(*options);
+
+ if (opts.dump_to_file.empty()) {
printf("\n=========================Options=============================\n");
- printf("%s", options->ToString().c_str());
+ printf("%s", opts.ToString().c_str());
printf("\n==================Model Analysis Report======================\n");
- TFProfNode root(tf_stats.PrintGraph(*command, *options));
+ TFProfNode root(tf_stats.PrintGraph(*command, opts));
printf("\n======================End of Report==========================\n");
fflush(stdout);
return root.SerializeAsString();
}
- return tf_stats.PrintGraph(*command, *options).SerializeAsString();
+ return tf_stats.PrintGraph(*command, opts).SerializeAsString();
}
} // namespace tfprof
} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h b/tensorflow/tools/tfprof/internal/print_model_analysis.h
similarity index 62%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
rename to tensorflow/tools/tfprof/internal/print_model_analysis.h
index 579147f1641..071ac7102ca 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
+++ b/tensorflow/tools/tfprof/internal/print_model_analysis.h
@@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
#include
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/framework/types.h"
namespace tensorflow {
namespace tfprof {
-
-// ***This API is only for swig.***
+class Options;
+// ***This API is only for swig. Don't user it directory!***
//
// Interface defined for Python API swig. Calls the tfprof core API.
// 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
@@ -37,9 +32,9 @@ namespace tfprof {
// if not available.
string PrintModelAnalysis(const string* graph, const string* run_meta,
const string* op_log, const string* command,
- const Options* options);
+ const string* options);
} // namespace tfprof
} // namespace tensorflow
-#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt b/tensorflow/tools/tfprof/internal/testdata/ckpt
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
rename to tensorflow/tools/tfprof/internal/testdata/ckpt
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt b/tensorflow/tools/tfprof/internal/testdata/graph.pbtxt
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
rename to tensorflow/tools/tfprof/internal/testdata/graph.pbtxt
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta b/tensorflow/tools/tfprof/internal/testdata/run_meta
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
rename to tensorflow/tools/tfprof/internal/testdata/run_meta
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log b/tensorflow/tools/tfprof/internal/testdata/tfprof_log
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
rename to tensorflow/tools/tfprof/internal/testdata/tfprof_log
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h b/tensorflow/tools/tfprof/internal/tfprof_constants.h
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
rename to tensorflow/tools/tfprof/internal/tfprof_constants.h
index 169ebae4a75..e495128728b 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_constants.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
namespace tensorflow {
namespace tfprof {
@@ -34,4 +34,4 @@ static const char* const kCkptVarType = "_checkpoint_variables";
} // namespace tfprof
} // namespace tensorflow
-#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#endif // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc b/tensorflow/tools/tfprof/internal/tfprof_graph.cc
similarity index 97%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
rename to tensorflow/tools/tfprof/internal/tfprof_graph.cc
index 287fd78d46c..469b258f98b 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_graph.cc
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
#include
#include
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/regexp.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
namespace tensorflow {
namespace tfprof {
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h b/tensorflow/tools/tfprof/internal/tfprof_graph.h
similarity index 85%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
rename to tensorflow/tools/tfprof/internal/tfprof_graph.h
index ee54534f56b..b16f80b33db 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_graph.h
@@ -16,8 +16,8 @@ limitations under the License.
// Build a graph structure based on op inputs/outputs. The graph is a directed
// acyclic graph pointing *from outputs to inputs*.
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
#include
#include