Add tfprof python API to tf.contrib and move tfprof CLI to tensorflow/tools.

Change: 137207286
2016-10-25 14:02:41 -08:00 · 2016-10-25 14:02:41 -08:00 · 289ddb1cb6
commit 289ddb1cb6
parent d97c2ad2b6
45 changed files with 1275 additions and 583 deletions
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@ -121,8 +121,6 @@ filegroup(
        "//tensorflow/contrib/tensorboard:all_files",
        "//tensorflow/contrib/testing:all_files",
        "//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
        "//tensorflow/contrib/tfprof/tools/tfprof:all_files",
        "//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
        "//tensorflow/contrib/training:all_files",
        "//tensorflow/contrib/util:all_files",
        "//tensorflow/core:all_files",
@ -180,6 +178,8 @@ filegroup(
        "//tensorflow/tools/proto_text:all_files",
        "//tensorflow/tools/quantization:all_files",
        "//tensorflow/tools/test:all_files",
        "//tensorflow/tools/tfprof:all_files",
        "//tensorflow/tools/tfprof/internal:all_files",
        "//tensorflow/user_ops:all_files",
        "//third_party/hadoop:all_files",
    ],
--- a/tensorflow/contrib/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/BUILD
@ -12,6 +12,7 @@ py_library(
    srcs_version = "PY2AND3",
    visibility = ["//tensorflow:__subpackages__"],
    deps = [
        "//tensorflow/contrib/tfprof/python/tools/tfprof:model_analyzer",
        "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
    ],
 )
--- a/tensorflow/contrib/tfprof/README.md
+++ b/tensorflow/contrib/tfprof/README.md
@ -20,434 +20,9 @@ and measures system performance.
 4.  Explore model based on name scope or graph structure.
 5.  Selectively grouping/filtering/accounting/ordering ops.
-### Interfaces
+tfprof can be used as CommandLine Interface (CLI) and Python API.
 CLI locates in tensorflow/tools/tfprof.
 Python API locates in tensorflow/contrib/tfprof.
 Tutorial locates in tensorflow/tools/tfprof/README.md
-[CLI Tutorials](#cli-tutorials):
+Enjoy!
 It supports interactive mode for exploration and single-shot mode for
 scripts. Outputs can be dumped to files or printed in terminal.
 Python API Tutorials: Python API is not released yet.
 ## CLI Tutorials
 Tutorials are based on a 32 layers ResNet.
 TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
 ### Examples
 1) Start `tfprof` command line tool
 ```shell
 # Build the tool.
 bazel build -c opt tensorflow/contrib/tfprof/...
 # Help information, including detail 'option' instructions.
 bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
 #
 # The following commands will start tfprof interactive mode.
 #
 # Profile model shapes and parameters only.
 bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
    --graph_path=/graph.pbtxt
 #
 # Additionally profile checkpoint statistics and values.
 # Use '-account_type_regexes _checkpoint_variables' to select
 # checkpoint tensors.
 bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
    --graph_path=graph.pbtxt \
    --checkpoint_path=model.ckpt
 #
 # Additionally profile ops requested memory and timing.
 # See CLI Input Files section on generating run_meta file.
 bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
    --graph_path=graph.pbtxt \
    --run_meta_path=run_meta \
    --checkpoint_path=model.ckpt
 #
 # tfprof_log is used to define customized op types and float ops.
 # Use tfprof_logger.write_op_log() to create tfprof_log.
 # See 11) in Examples section on generating tfprof_log file.
 bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
    --graph_path=graph.pbtxt \
    --run_meta_path=run_meta \
    --op_log_path=tfprof_log \
    --checkpoint_path=model.ckpt
 ```
 Note that `graph.pbtxt` is an ASCII text format.
 2) Press enter to show the default options
 ```shell
 tfprof>
 tfprof>
 -max_depth                  4
 -min_bytes                  0
 -min_micros                 0
 -min_params                 0
 -min_float_ops              0
 -device_regexes             .*
 -order_by                   name
 -account_type_regexes       Variable
 -start_name_regexes         .*
 -trim_name_regexes
 -show_name_regexes          .*
 -hide_name_regexes          IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
 -account_displayed_op_only  false
 # supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
 # [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
 -select                     params
 -viz                        false
 -dump_to_file
 ```
 3) I want to see the `BatchNorm`'s gamma value in checkpoint.
 ```shell
 # Requires --graph_path, --checkpoint_path.
 tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
 _TFProfRoot ()
  unit_1_0/shared_activation/init_bn/gamma ()
 [1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
  unit_1_0/sub2/bn2/gamma ()
 [1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
 ```
 4) I want to see my checkpoint tensors shape and number of parameters.
 ```shell
 # Requires --graph_path, --checkpoint_path.
 # Increase -max_depth to see all tensors.
 tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
 _TFProfRoot (--/930.58k params)
  global_step (0/0 params)
  init/init_conv/DW (3x3x3x16, 432/864 params)
  pool_logit/DW (64x10, 640/1.28k params)
    pool_logit/DW/Momentum (64x10, 640/640 params)
  pool_logit/biases (10, 10/20 params)
    pool_logit/biases/Momentum (10, 10/10 params)
  unit_last/final_bn/beta (64, 64/128 params)
  unit_last/final_bn/gamma (64, 64/128 params)
  unit_last/final_bn/moving_mean (64, 64/64 params)
  unit_last/final_bn/moving_variance (64, 64/64 params)
 ```
 5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
 it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
 graph dependencies.
 ```shell
 # Requires --graph_path, --run_meta_path.
 tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
 _TFProfRoot (0us/3.61sec)
  init/init_conv/Conv2D (11.75ms/3.10sec)
    random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
  unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
  unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
  unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
  unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
 ```
 6) I want to know the expensive operations during the back propagation.
 Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
 command to explore based on name scope hierarchies.
 ```shell
 # Requires --graph_path, --run_meta_path.
 tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
 _TFProfRoot (0us/2.29sec)
  gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
  gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
  gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
  gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
  gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
  gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
  ...
 ```
 7) Show the number of float operations in the model.
 Note: float operations calculation depends on
 1) op.RegisterStatistics. If an op doesn’t
 have RegisterStatistics defined, its float operations cannot be counted.
 2) fully defined shape is also necessary in order to calculate flops.
 float operations number is provided by tensorflow::tfprof::OpLog logged from
 Python API.
 ```shell
 # Requires --graph_path, --op_log_path.
 tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
 _TFProfRoot (0/17.63b flops)
  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
  init/init_conv/Conv2D (113.25m/113.25m flops)
  pool_logit/xw_plus_b (1.28k/165.12k flops)
    pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
  unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
  unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
  unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
  unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
  ...
 ```
 8) Show the number of parameters of all `tf.trainable_variables()` in the model.
 ```shell
 # Requires --graph_path --op_log_path.
 # store option for future commands.
 tfprof> set -account_type_regexes _trainable_variables
 tfprof> scope -max_depth 4 -select params
 _TFProfRoot (--/464.15k params)
  init/init_conv/DW (3x3x3x16, 432/432 params)
  pool_logit/DW (64x10, 640/640 params)
  pool_logit/biases (10, 10/10 params)
  unit_last/final_bn/beta (64, 64/64 params)
  unit_last/final_bn/gamma (64, 64/64 params)
 ```
 Where does “_trainable_variables” come from? It is from the OpLog file
 generated by write_op_log() Python API. write_op_log() help users create some
 common op types implicitly. Users can define their own op types and log it
 through the write_op_log() API.
 9) What if I’m lazy and don’t want to define op type? I have given my ops
 well-defined names in my model’s code. And want to use names to select a group
 of ops. Let’s try it!
 ```shell
 tfprof> set -account_type_regexes .*
 tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
 _TFProfRoot (0/18.43k params)
  unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
  unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
 ```
 The above command allows you to filter ops that match specific names.
 `-account_displayed_op_only` asks tfprof to only account ops displayed
 in terminal. Otherwise, tfprof accounts all ops matched by
 `-account_type_regexes` recursively even if they are hidden due to some
 options such as -max_depth.
 10) TensorFlow has built-in op types. For example, built-in op type `Variable`
 seems to include `Variable's` created by your model. However, be careful when
 depending on it because TensorFlow creates extra `Variable` ops implicitly and
 the implicitly created ops can have the same prefix as the `Variable's` you
 defined.
 In the following example, extra `Variables` are created and “/Momentum” is
 appended to their names. This might cause you “model capacity” calculation
 to get wrong.
 ```shell
 tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
 _TFProfRoot (--/930.58k params)
  global_step (1/1 params)
  init/init_conv/DW (3x3x3x16, 432/864 params)
  pool_logit/DW (64x10, 640/1.28k params)
    pool_logit/DW/Momentum (64x10, 640/640 params)
  pool_logit/biases (10, 10/20 params)
    pool_logit/biases/Momentum (10, 10/10 params)
  unit_last/final_bn/beta (64, 64/128 params)
  unit_last/final_bn/gamma (64, 64/128 params)
  unit_last/final_bn/moving_mean (64, 64/64 params)
  unit_last/final_bn/moving_variance (64, 64/64 params)
 ```
 11) A example of defining extra op type for ops using `OpLog`
 First, in Python code, create an `OpLog` proto and add op type
 information to it:
 ```python
 op_log = tfprof_log_pb2.OpLog()
 entry = op_log.log_entries.add()
 entry.name = 'pool_logit/DW'
 entry.types.append('pool_logit')
 entry = op_log.log_entries.add()
 entry.name = 'pool_logit/biases'
 # Alternatively:
 # var = tf.get_variable(xxx)
 # entry.name = var.op.name
 entry.types.append('pool_logit')
 ```
 Second, call write_op_log to write the OpLog proto.
 ```python
 tf.tfprof.tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
 ```
 Third, when starting the tfprof tool, specify
 "--op_log_path /tmp/my_op_log_dir/op_log"
 ```shell
 tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
 _TFProfRoot (--/650 params)
  pool_logit/DW (64x10, 640/640 params)
  pool_logit/biases (10, 10/10 params)
 ```
 Note that when you call
 `tf.tfprof.tfprof_logger.write_op_log(...)`, the tool adds all `Variables`
 inside `tf.trainable_variables()` to `_trainable_variables`.
 12) Run tfprof in one-shot mode and dump result to file.
 ```shell
 # Printed to stdout if --dump_to_file is not set.
 tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt  \
             --max_depth 3 \
             --dump_to_file "/tmp/dump"
 Reading Files...
 Parsing GraphDef...
 Preparing Views...
 cat /tmp/dump
 _TFProfRoot (--/930.58k params)
  global_step (0/0 params)
  pool_logit/DW (64x10, 640/1.28k params)
  pool_logit/biases (10, 10/20 params)
 ```
 13) Analyze how balanced Variable are on parameter servers.
 In this tutorial, I'm going to use a seq2seq model, which are split
 on several gpus at workers and several parameter servers.
 In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
 gpu0. They share an op_type called 'gpu0'.
 ```shell
 bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
  --graph_path ~/tfprof/textsum/graph.pbtxt  \
  --run_meta_path ~/tfprof/textsum/run_meta
 # Looks like ps task 1 is holding twice more parameters than task 0.
 tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
 _TFProfRoot (--/25.81m params)
 tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
 _TFProfRoot (--/58.84m params)
 ```
 ### CLI Input Files
 tfprof command line inference (CLI) loads dumped files from a tensorflow model.
 Convert them into in-memory data structures. To use it, users need to specify
 the locations of the dumped files. The following are the dumped files loaded
 by tfprof:
 <b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
 representation of the model. For example, graph.pbtxt written by tf.Supervisor
 is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
 using tf.Graph.as_graph_def() or other API.
 <b>--run_meta_path:</b> tensorflow::RunMetadata.
 Used to get the memory and time consumption of
 each op of the model. Users need to enable it. For example, the following code
 snippet writes a RunMetadata file:
 ```python
 run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
 run_metadata = config_pb2.RunMetadata()
 # Once a while, call it the get the RunMeta.
 _ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
 with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
  f.write(run_metadata.SerializeToString())
 ```
 <b>--op_log_path:</b>
 tensorflow::tfprof::OpLog. A proto used to provide extra op information
 for ops. By giving a group of ops a type name, users can easily aggregate the
 statistics for those ops without accidently missing or including extra ops.
 tfprof exposes the following Python API to add op information and logging.
 ```python
 tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
 ```
 <b>--checkpoint_path:</b>
 TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
 provides checkpointed tensors' values.
 ## Design
 ### In-memory representation
 <b>Scope:</b> This representation organizes ops based on name scope hierarchy,
 similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
 For example op1 with name “name1/name2” is a child of op2 with name “name1”.
 <b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
 a graph structure. The graph is a “directed acyclic graph” (hopefully), with
 direction from “output to input”. The direction is design this way so that users
 can trace from “result” to its “sources”.
 ### Command line options
 tfprof’s major goals are to measure system performance and quicly analyze
 model architectures. Hence, its commands and options should allow users to achieve
 these 2 goals easily.
 <b>graph:</b> It is expected that users will mostly use graph representation to
 debug system performance. Hence, tfprof supports graph command, which pulls the
 graph in-memory representation described above.
 <b>scope:</b> It is expected that some users might want to explore their model
 statistics using the name scope information they defined in the Python codes.
 Hence, tfprof supports “scope” command, which pulls the tree in-memory
 representation.
 <b>set:</b> It is used to store the options so that user doesn’t need to
 re-type the same option again and again in the follow up command line. Note that
 tfprof has traditional terminal’s history and auto-complete support.
 <b>help:</b> print help information.
 <b>Options:</b> Run “tfprof help” to get detailed explanations.
 ```python
 "-max_depth",
 "-min_bytes",
 "-min_micros",
 "-min_params",
 "-min_float_ops",
 "-order_by",
 "-account_type_regexes",
 "-start_name_regexes",
 "-trim_name_regexes",
 "-show_name_regexes",
 "-hide_name_regexes",
 "-account_displayed_op_only",
 "-select",
 "-viz",  # Only supported for graph command.
 "-dump_to_file",
 ```
 A key design is that stats are aggregated from descendants up to ancestors.
 `-account_type_regexes` is used to decide which ops stat is accounted. It makes
 decision based on op type. Usually set it to `.*` if no extra type information
 is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
 `-min/max` and `-show/hide/trim/start` options are only used the optionally
 displayed or hide ops based on ops’ name and stats. However, they don’t prevent
 tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
 aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
 an option to break this rule. When it is set, only displayed ops are accounted.
 Regexes are all comma-separated, for example `-show_name_regexes`
 `regex1.*,regex2.*`. It is designed this way because it is convenient and comma
 is not expected to show up in op names.
 `-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
 (notice the indent printed) are sorted according to order_by.
 ## Future Work
 * Load SummaryWriter event logs so that it can show the latest summary value.
 * Better sorting and aggregation of outputs. Easier comprehension.
 * Currently, shape information is based on `graph.pbtxt`. When the shape
 information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
 and `Checkpoint` to complete shape information.
--- a/tensorflow/contrib/tfprof/init.py
+++ b/tensorflow/contrib/tfprof/init.py
@ -17,5 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.contrib.tfprof.python.tools.tfprof import model_analyzer
 from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
 from tensorflow.python.util.all_util import make_all
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
@ -3,14 +3,36 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//visibility:public"])
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
 load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
 py_library(
    name = "model_analyzer",
    srcs = ["model_analyzer.py"],
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow/contrib/tfprof/python/tools/tfprof:pywrap_tensorflow_print_model_analysis_lib",
        "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
        "//tensorflow/tools/tfprof:protos_all_py",
    ],
 )
 py_test(
    name = "model_analyzer_test",
    srcs = ["model_analyzer_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":model_analyzer",
        "//tensorflow:tensorflow_py",
    ],
 )
 py_library(
    name = "tfprof_logger",
    srcs = ["tfprof_logger.py"],
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/tools/tfprof:protos_all_py",
    ],
 )
@ -20,7 +42,34 @@ tf_py_test(
    additional_deps = [
        ":tfprof_logger",
        "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
+        "//tensorflow/tools/tfprof:protos_all_py",
    ],
 )
 tf_py_wrap_cc(
    name = "pywrap_tensorflow_print_model_analysis_lib",
    srcs = ["pywrap_tensorflow_print_model_analysis.i"],
    swig_includes = [
        "//tensorflow/python:lib/core/strings.i",
        "//tensorflow/python:platform/base.i",
    ],
    deps = [
        "//tensorflow/core:framework_headers_lib",
        "//tensorflow/tools/tfprof/internal:print_model_analysis_hdr",
        "//util/python:python_headers",
    ],
 )
 py_test(
    name = "print_model_analysis_test",
    srcs = ["print_model_analysis_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":pywrap_tensorflow_print_model_analysis_lib",
        "//tensorflow:tensorflow_py",
        "//tensorflow/python:framework_test_lib",
        "//tensorflow/python:platform_test",
        "//tensorflow/tools/tfprof:protos_all_py",
    ],
 )
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
@ -0,0 +1,187 @@
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Model Analyzer.
 Analyze model, including shape, params, time, memory, structure, etc.
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
 from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
 from tensorflow.tools.tfprof import tfprof_options_pb2
 from tensorflow.tools.tfprof import tfprof_output_pb2
 # pylint: disable=bad-whitespace
 # pylint: disable=bad-continuation
 # 2 example tfprof_options for print_model_analysis API.
 #
 # Show the parameter statistics of trainable variables.
 TRAINABLE_VARS_PARAMS_STAT_OPTIONS = {
    'max_depth': 10000,
    'min_bytes': 0,
    'min_micros': 0,
    'min_params': 0,
    'min_float_ops': 0,
    'device_regexes': ['.*'],
    'order_by': 'name',
    'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
    'start_name_regexes': ['.*'],
    'trim_name_regexes': [],
    'show_name_regexes': ['.*'],
    'hide_name_regexes': [],
    'account_displayed_op_only': True,
    'select': ['params'],
    'viz': False,
    'dump_to_file': ''
 }
 # Show the number float operations.
 FLOAT_OPS_OPTIONS = {
    'max_depth': 10000,
    'min_bytes': 0,
    'min_micros': 0,
    'min_params': 0,
    'min_float_ops': 1,
    'device_regexes': ['.*'],
    'order_by': 'float_ops',
    'account_type_regexes': ['.*'],
    'start_name_regexes': ['.*'],
    'trim_name_regexes': [],
    'show_name_regexes': ['.*'],
    'hide_name_regexes': [],
    'account_displayed_op_only': True,
    'select': ['float_ops'],
    'viz': False,
    'dump_to_file': ''
 }
 # Show number of parameters on parameter server 0.
 # It is recommended to provide`run_meta` argument
 # to have complete device placement info.
 PRINT_PARAMS_ON_DEVICE = {
    'max_depth': 1,
    'min_bytes': 0,
    'min_micros': 0,
    'min_params': 0,
    'min_float_ops': 0,
    'device_regexes': ['.*'],
    'order_by': 'name',
    'account_type_regexes': ['.*ps.*task:0.*'],
    'start_name_regexes': ['.*'],
    'trim_name_regexes': [],
    'show_name_regexes': ['.*'],
    'hide_name_regexes': [],
    'account_displayed_op_only': False,
    'select': ['device', 'params'],
    'viz': False,
    'dump_to_file': ''
 }
 # Show the timing stats and memory demands.
 PRINT_ALL_TIMING_MEMORY = {
    'max_depth': 10000,
    'min_bytes': 1,  # Only >=1
    'min_micros': 1,  # Only >=1
    'min_params': 0,
    'min_float_ops': 0,
    'device_regexes': ['.*'],
    'order_by': 'name',
    'account_type_regexes': ['.*'],
    'start_name_regexes': ['.*'],
    'trim_name_regexes': [],
    'show_name_regexes': ['.*'],
    'hide_name_regexes': [],
    'account_displayed_op_only': True,
    'select': ['micros', 'bytes'],
    'viz': False,
    'dump_to_file': ''
 }
 # pylint: enable=bad-whitespace
 # pylint: enable=bad-continuation
 def print_model_analysis(graph,
                         run_meta=None,
                         op_log=None,
                         tfprof_cmd='scope',
                         tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS):
  """Print model statistics.
    Prints the model statistics to stdout. Also returns the results
    in a TFProfNode proto. See go/tfprof or run tfprof tool:
    'bazel run third_party/tensorflow/tools/tfprof help'
    Examples:
      Show the parameter/shape statistics of tf.trainable_variables().
        print_model_analysis(sess.graph).
      Show number of float ops. Only ops with RegisterStatistics defined
      are counted.
        show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS
        print_model_analysis(sess.graph, tfprof_options=show_float_op_opts)
  Args:
    graph: tf.Graph.
    run_meta: tensorflow::RunMetadata proto. When provided, also shows valid
              timing and memory information when 'select' option contains
              'micros' and 'bytes'.
    op_log: tensorflow::tfprof::OpLog proto. users can use this proto to
            group together ops and use a op_type to select the group.
    tfprof_cmd: string. Either 'scope' or 'graph'. 'scope' view organize
                ops using their name scopes. 'graph' view organize ops using
                their graph inputs.
    tfprof_options: See 'tfprof help' for details.
  Returns:
    TFProfNode proto. Side effect: a formatted output to stdout.
  """
  # pylint: disable=protected-access
  op_log = tfprof_logger._merge_default_with_oplog(graph, op_log, run_meta)
  # pylint: enable=protected-access
  opts = tfprof_options_pb2.OptionsProto()
  opts.max_depth = tfprof_options['max_depth']
  opts.min_bytes = tfprof_options['min_bytes']
  opts.min_micros = tfprof_options['min_micros']
  opts.min_params = tfprof_options['min_params']
  opts.min_float_ops = tfprof_options['min_float_ops']
  for p in tfprof_options['device_regexes']:
    opts.device_regexes.append(p)
  opts.order_by = tfprof_options['order_by']
  for p in tfprof_options['account_type_regexes']:
    opts.account_type_regexes.append(p)
  for p in tfprof_options['start_name_regexes']:
    opts.start_name_regexes.append(p)
  for p in tfprof_options['trim_name_regexes']:
    opts.trim_name_regexes.append(p)
  for p in tfprof_options['show_name_regexes']:
    opts.show_name_regexes.append(p)
  for p in tfprof_options['hide_name_regexes']:
    opts.hide_name_regexes.append(p)
  opts.account_displayed_op_only = tfprof_options['account_displayed_op_only']
  for p in tfprof_options['select']:
    opts.select.append(p)
  opts.viz = tfprof_options['viz']
  opts.dump_to_file = tfprof_options['dump_to_file']
  run_meta_str = run_meta.SerializeToString() if run_meta else b''
  op_log_str = op_log.SerializeToString() if op_log else b''
  tfprof_node = tfprof_output_pb2.TFProfNode()
  tfprof_node.ParseFromString(
      print_mdl.PrintModelAnalysis(
          graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str,
          tfprof_cmd.encode('utf-8'), opts.SerializeToString()))
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
@ -0,0 +1,84 @@
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import tensorflow as tf
 class PrintModelAnalysisTest(tf.test.TestCase):
  def _BuildSmallModel(self):
    image = tf.zeros([2, 6, 6, 3])
    kernel = tf.get_variable(
        'DW', [3, 3, 3, 6],
        tf.float32,
        initializer=tf.random_normal_initializer(stddev=0.001))
    x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
    kernel = tf.get_variable(
        'DW2', [2, 2, 6, 12],
        tf.float32,
        initializer=tf.random_normal_initializer(stddev=0.001))
    x = tf.nn.conv2d(x, kernel, [1, 2, 2, 1], padding='SAME')
    return x
  def testDumpToFile(self):
    opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
    opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
    with tf.Session() as sess:
      _ = self._BuildSmallModel()
      tf.contrib.tfprof.model_analyzer.print_model_analysis(
          sess.graph, tfprof_options=opts)
      with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
        self.assertEqual('_TFProfRoot (--/450 params)\n'
                         '  DW (3x3x3x6, 162/162 params)\n'
                         '  DW2 (2x2x6x12, 288/288 params)\n',
                         f.read().decode('utf-8'))
  def testSelectEverything(self):
    opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
    opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
    opts['account_type_regexes'] = ['.*']
    opts['select'] = [
        'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types'
    ]
    with tf.Session() as sess:
      x = self._BuildSmallModel()
      sess.run(tf.initialize_all_variables())
      run_meta = tf.RunMetadata()
      _ = sess.run(x,
                   options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)
      tf.contrib.tfprof.model_analyzer.print_model_analysis(
          sess.graph, run_meta, tfprof_options=opts)
      with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
        # pylint: disable=line-too-long
        self.assertEqual(
            '_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n  Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n  zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
            f.read().decode('utf-8'))
        # pylint: enable=line-too-long
 if __name__ == '__main__':
  tf.test.main()
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
@ -0,0 +1,227 @@
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """print_model_analysis test."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import tensorflow as tf
 from google.protobuf import text_format
 from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
 from tensorflow.tools.tfprof import tfprof_options_pb2
 from tensorflow.tools.tfprof import tfprof_output_pb2
 # pylint: disable=bad-whitespace
 # pylint: disable=bad-continuation
 TEST_OPTIONS = {
    'max_depth': 10000,
    'min_bytes': 0,
    'min_micros': 0,
    'min_params': 0,
    'min_float_ops': 0,
    'device_regexes': ['.*'],
    'order_by': 'name',
    'account_type_regexes': ['.*'],
    'start_name_regexes': ['.*'],
    'trim_name_regexes': [],
    'show_name_regexes': ['.*'],
    'hide_name_regexes': [],
    'account_displayed_op_only': True,
    'select': ['params'],
    'viz': False
 }
 # pylint: enable=bad-whitespace
 # pylint: enable=bad-continuation
 class PrintModelAnalysisTest(tf.test.TestCase):
  def _BuildSmallModel(self):
    image = tf.zeros([2, 6, 6, 3])
    kernel = tf.get_variable(
        'DW', [6, 6, 3, 6],
        tf.float32,
        initializer=tf.random_normal_initializer(stddev=0.001))
    x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
    return x
  def testPrintModelAnalysis(self):
    opts = tfprof_options_pb2.OptionsProto()
    opts.max_depth = TEST_OPTIONS['max_depth']
    opts.min_bytes = TEST_OPTIONS['min_bytes']
    opts.min_micros = TEST_OPTIONS['min_micros']
    opts.min_params = TEST_OPTIONS['min_params']
    opts.min_float_ops = TEST_OPTIONS['min_float_ops']
    for p in TEST_OPTIONS['device_regexes']:
      opts.device_regexes.append(p)
    opts.order_by = TEST_OPTIONS['order_by']
    for p in TEST_OPTIONS['account_type_regexes']:
      opts.account_type_regexes.append(p)
    for p in TEST_OPTIONS['start_name_regexes']:
      opts.start_name_regexes.append(p)
    for p in TEST_OPTIONS['trim_name_regexes']:
      opts.trim_name_regexes.append(p)
    for p in TEST_OPTIONS['show_name_regexes']:
      opts.show_name_regexes.append(p)
    for p in TEST_OPTIONS['hide_name_regexes']:
      opts.hide_name_regexes.append(p)
    opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only']
    for p in TEST_OPTIONS['select']:
      opts.select.append(p)
    opts.viz = TEST_OPTIONS['viz']
    with tf.Session() as sess:
      _ = self._BuildSmallModel()
      tfprof_pb = tfprof_output_pb2.TFProfNode()
      tfprof_pb.ParseFromString(
          print_mdl.PrintModelAnalysis(sess.graph.as_graph_def(
          ).SerializeToString(), b'', b'', b'scope', opts.SerializeToString()))
      expected_pb = tfprof_output_pb2.TFProfNode()
      text_format.Merge(r"""name: "_TFProfRoot"
              exec_micros: 0
              requested_bytes: 0
              total_exec_micros: 0
              total_requested_bytes: 0
              total_parameters: 648
              children {
                name: "Conv2D"
                exec_micros: 0
                requested_bytes: 0
                total_exec_micros: 0
                total_requested_bytes: 0
                total_parameters: 0
                float_ops: 0
                total_float_ops: 0
              }
              children {
                name: "DW"
                exec_micros: 0
                requested_bytes: 0
                parameters: 648
                total_exec_micros: 0
                total_requested_bytes: 0
                total_parameters: 648
                children {
                  name: "DW/Assign"
                  exec_micros: 0
                  requested_bytes: 0
                  total_exec_micros: 0
                  total_requested_bytes: 0
                  total_parameters: 0
                  float_ops: 0
                  total_float_ops: 0
                }
                children {
                  name: "DW/Initializer"
                  exec_micros: 0
                  requested_bytes: 0
                  total_exec_micros: 0
                  total_requested_bytes: 0
                  total_parameters: 0
                  children {
                    name: "DW/Initializer/random_normal"
                    exec_micros: 0
                    requested_bytes: 0
                    total_exec_micros: 0
                    total_requested_bytes: 0
                    total_parameters: 0
                    children {
                      name: "DW/Initializer/random_normal/RandomStandardNormal"
                      exec_micros: 0
                      requested_bytes: 0
                      total_exec_micros: 0
                      total_requested_bytes: 0
                      total_parameters: 0
                      float_ops: 0
                      total_float_ops: 0
                    }
                    children {
                      name: "DW/Initializer/random_normal/mean"
                      exec_micros: 0
                      requested_bytes: 0
                      total_exec_micros: 0
                      total_requested_bytes: 0
                      total_parameters: 0
                      float_ops: 0
                      total_float_ops: 0
                    }
                    children {
                      name: "DW/Initializer/random_normal/mul"
                      exec_micros: 0
                      requested_bytes: 0
                      total_exec_micros: 0
                      total_requested_bytes: 0
                      total_parameters: 0
                      float_ops: 0
                      total_float_ops: 0
                    }
                    children {
                      name: "DW/Initializer/random_normal/shape"
                      exec_micros: 0
                      requested_bytes: 0
                      total_exec_micros: 0
                      total_requested_bytes: 0
                      total_parameters: 0
                      float_ops: 0
                      total_float_ops: 0
                    }
                    children {
                      name: "DW/Initializer/random_normal/stddev"
                      exec_micros: 0
                      requested_bytes: 0
                      total_exec_micros: 0
                      total_requested_bytes: 0
                      total_parameters: 0
                      float_ops: 0
                      total_float_ops: 0
                    }
                    float_ops: 0
                    total_float_ops: 0
                  }
                  float_ops: 0
                  total_float_ops: 0
                }
                children {
                  name: "DW/read"
                  exec_micros: 0
                  requested_bytes: 0
                  total_exec_micros: 0
                  total_requested_bytes: 0
                  total_parameters: 0
                  float_ops: 0
                  total_float_ops: 0
                }
                float_ops: 0
                total_float_ops: 0
              }
              children {
                name: "zeros"
                exec_micros: 0
                requested_bytes: 0
                total_exec_micros: 0
                total_requested_bytes: 0
                total_parameters: 0
                float_ops: 0
                total_float_ops: 0
              }
              float_ops: 0
              total_float_ops: 0""", expected_pb)
      self.assertEqual(expected_pb, tfprof_pb)
 if __name__ == '__main__':
  tf.test.main()
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
@ -0,0 +1,43 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 %include "tensorflow/python/lib/core/strings.i"
 %include "tensorflow/python/platform/base.i"
 %{
 #include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
 #include "tensorflow/core/framework/types.h"
 %}
 %typemap(typecheck) const string & = char *;
 %typemap(in) const string& (string temp) {
  if (!_PyObjAs<string>($input, &temp)) return NULL;
  $1 = &temp;
 }
 %typemap(out) const string& {
  $result = PyString_FromStringAndSize($1->data(), $1->size());
 }
 %apply const string & {string &};
 %apply const string & {string *};
 %ignoreall
 %unignore tensorflow;
 %unignore tensorflow::tfprof;
 %unignore tensorflow::tfprof::PrintModelAnalysis;
 %include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
 %unignoreall
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
@ -24,8 +24,8 @@ import os
 import sys
 import tensorflow as tf
 from tensorflow.contrib.tfprof.tools.tfprof import tfprof_log_pb2
 from tensorflow.python.framework import ops
 from tensorflow.tools.tfprof import tfprof_log_pb2
 TRAINABLE_VARIABLES = '_trainable_variables'
 REGISTERED_FLOP_STATS = 'flops'
@ -85,7 +85,7 @@ def _get_logged_ops(graph, run_meta=None):
    if node.name not in logged_ops:
      entry = tfprof_log_pb2.OpLogEntry()
      entry.name = node.name
-      entry.float_ops = stats.value
+      entry.float_ops = int(stats.value)
      logged_ops[entry.name] = entry
  for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -1317,7 +1317,7 @@ cc_library(
        "platform/regexp.h",
    ],
    visibility = [
-        "//tensorflow/contrib/tfprof:__subpackages__",
+        "//tensorflow/tools/tfprof:__subpackages__",
    ],
    deps = [":lib_internal"],
 )
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@ -1861,6 +1861,7 @@ tf_py_wrap_cc(
        "//tensorflow/c:tf_status_helper",
        "//tensorflow/core:lib",
        "//tensorflow/core/distributed_runtime:server_lib",
        "//tensorflow/tools/tfprof/internal:print_model_analysis",
        "//util/python:python_headers",
    ] + tf_additional_lib_deps(),
 )
--- a/tensorflow/contrib/tfprof/tools/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/tools/tfprof/BUILD
@ -26,13 +26,13 @@ cc_binary(
        ":protos_all_cc",
        "//tensorflow/c:c_api",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
        "//tensorflow/core:framework_headers_lib",
        "//tensorflow/core:framework_internal",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/tools/tfprof/internal:tfprof_options",
        "//tensorflow/tools/tfprof/internal:tfprof_stats",
        "//tensorflow/tools/tfprof/internal:tfprof_utils",
        "@linenoise//:linenoise",
    ],
 )
--- a/tensorflow/tools/tfprof/README.md
+++ b/tensorflow/tools/tfprof/README.md
@ -0,0 +1,455 @@
 # tfprof: A Profiling Tool for TensorFlow Models
 Internal User Please Use: go/tfprof
 Author: Xin Pan (xpan@google.com, github: panyx0718)
 Consultants: Jon Shlens, Pete Warden
 ## Introduction
 tfprof is a profiling tool for TensorFlow that analyzes model architectures
 and measures system performance.
 ###Major Features
 1.  Measure model parameters, float operations, tensor shapes.
 2.  Measure op execution times, requested memory size and device placement.
 3.  Inspect checkpoint tensors' shapes and their values.
 4.  Explore model based on name scope or graph structure.
 5.  Selectively grouping/filtering/accounting/ordering ops.
 ### Interfaces
 [CLI Tutorials](#cli-tutorials):
 It supports interactive mode for exploration and single-shot mode for
 scripts. Outputs can be dumped to files or printed in terminal.
 Python API Tutorials: Python API is not released yet.
 ## CLI Tutorials
 Tutorials are based on a 32 layers ResNet.
 TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
 ### Examples
 1) Start `tfprof` command line tool
 ```shell
 # Build the tool.
 bazel build -c opt tensorflow/tools/tfprof/...
 # Help information, including detail 'option' instructions.
 bazel-bin/tensorflow/tools/tfprof/tfprof help
 #
 # The following commands will start tfprof interactive mode.
 #
 # Profile model shapes and parameters only.
 bazel-bin/tensorflow/tools/tfprof/tfprof \
    --graph_path=graph.pbtxt
 #
 # Additionally profile checkpoint statistics and values.
 # Use '-account_type_regexes _checkpoint_variables' to select
 # checkpoint tensors.
 bazel-bin/tensorflow/tools/tfprof/tfprof \
    --graph_path=graph.pbtxt \
    --checkpoint_path=model.ckpt
 #
 # Additionally profile ops requested memory and timing.
 # See CLI Input Files section on generating run_meta file.
 bazel-bin/tensorflow/tools/tfprof/tfprof \
    --graph_path=graph.pbtxt \
    --run_meta_path=run_meta \
    --checkpoint_path=model.ckpt
 #
 # tfprof_log is used to define customized op types and float ops.
 # Use tfprof_logger.write_op_log() to create tfprof_log.
 # See 11) in Examples section on generating tfprof_log file.
 bazel-bin/tensorflow/tools/tfprof/tfprof \
    --graph_path=graph.pbtxt \
    --run_meta_path=run_meta \
    --op_log_path=tfprof_log \
    --checkpoint_path=model.ckpt
 ```
 Note that `graph.pbtxt` is an ASCII text format.
 2) Press enter to show the default options
 ```shell
 tfprof>
 tfprof>
 -max_depth                  4
 -min_bytes                  0
 -min_micros                 0
 -min_params                 0
 -min_float_ops              0
 -device_regexes             .*
 -order_by                   name
 -account_type_regexes       Variable
 -start_name_regexes         .*
 -trim_name_regexes
 -show_name_regexes          .*
 -hide_name_regexes          IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
 -account_displayed_op_only  false
 # supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
 # [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
 -select                     params
 -viz                        false
 -dump_to_file
 ```
 3) I want to see the `BatchNorm`'s gamma value in checkpoint.
 ```shell
 # Requires --graph_path, --checkpoint_path.
 tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
 _TFProfRoot ()
  unit_1_0/shared_activation/init_bn/gamma ()
 [1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
  unit_1_0/sub2/bn2/gamma ()
 [1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
 ```
 4) I want to see my checkpoint tensors shape and number of parameters.
 ```shell
 # Requires --graph_path, --checkpoint_path.
 # Increase -max_depth to see all tensors.
 tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
 _TFProfRoot (--/930.58k params)
  global_step (0/0 params)
  init/init_conv/DW (3x3x3x16, 432/864 params)
  pool_logit/DW (64x10, 640/1.28k params)
    pool_logit/DW/Momentum (64x10, 640/640 params)
  pool_logit/biases (10, 10/20 params)
    pool_logit/biases/Momentum (10, 10/10 params)
  unit_last/final_bn/beta (64, 64/128 params)
  unit_last/final_bn/gamma (64, 64/128 params)
  unit_last/final_bn/moving_mean (64, 64/64 params)
  unit_last/final_bn/moving_variance (64, 64/64 params)
 ```
 5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
 it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
 graph dependencies.
 ```shell
 # Requires --graph_path, --run_meta_path.
 tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
 _TFProfRoot (0us/3.61sec)
  init/init_conv/Conv2D (11.75ms/3.10sec)
    random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
  unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
  unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
  unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
  unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
 ```
 6) I want to know the expensive operations during the back propagation.
 Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
 command to explore based on name scope hierarchies.
 ```shell
 # Requires --graph_path, --run_meta_path.
 tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
 _TFProfRoot (0us/2.29sec)
  gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
  gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
  gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
  gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
  gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
  gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
  ...
 ```
 7) Show the number of float operations in the model.
 Note: float operations calculation depends on
 1) op.RegisterStatistics. If an op doesn’t
 have RegisterStatistics defined, its float operations cannot be counted.
 2) fully defined shape is also necessary in order to calculate flops.
 float operations number is provided by tensorflow::tfprof::OpLog logged from
 Python API.
 ```shell
 # Requires --graph_path, --op_log_path.
 tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
 _TFProfRoot (0/17.63b flops)
  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
  init/init_conv/Conv2D (113.25m/113.25m flops)
  pool_logit/xw_plus_b (1.28k/165.12k flops)
    pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
  unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
  unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
  unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
  unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
  ...
 ```
 8) Show the number of parameters of all `tf.trainable_variables()` in the model.
 ```shell
 # Requires --graph_path --op_log_path.
 # store option for future commands.
 tfprof> set -account_type_regexes _trainable_variables
 tfprof> scope -max_depth 4 -select params
 _TFProfRoot (--/464.15k params)
  init/init_conv/DW (3x3x3x16, 432/432 params)
  pool_logit/DW (64x10, 640/640 params)
  pool_logit/biases (10, 10/10 params)
  unit_last/final_bn/beta (64, 64/64 params)
  unit_last/final_bn/gamma (64, 64/64 params)
 ```
 Where does “_trainable_variables” come from? It is from the OpLog file
 generated by write_op_log() Python API. write_op_log() help users create some
 common op types implicitly. Users can define their own op types and log it
 through the write_op_log() API.
 9) What if I’m lazy and don’t want to define op type? I have given my ops
 well-defined names in my model’s code. And want to use names to select a group
 of ops. Let’s try it!
 ```shell
 tfprof> set -account_type_regexes .*
 tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
 _TFProfRoot (0/18.43k params)
  unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
  unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
 ```
 The above command allows you to filter ops that match specific names.
 `-account_displayed_op_only` asks tfprof to only account ops displayed
 in terminal. Otherwise, tfprof accounts all ops matched by
 `-account_type_regexes` recursively even if they are hidden due to some
 options such as -max_depth.
 10) TensorFlow has built-in op types. For example, built-in op type `Variable`
 seems to include `Variable's` created by your model. However, be careful when
 depending on it because TensorFlow creates extra `Variable` ops implicitly and
 the implicitly created ops can have the same prefix as the `Variable's` you
 defined.
 In the following example, extra `Variables` are created and “/Momentum” is
 appended to their names. This might cause you “model capacity” calculation
 to get wrong.
 ```shell
 tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
 _TFProfRoot (--/930.58k params)
  global_step (1/1 params)
  init/init_conv/DW (3x3x3x16, 432/864 params)
  pool_logit/DW (64x10, 640/1.28k params)
    pool_logit/DW/Momentum (64x10, 640/640 params)
  pool_logit/biases (10, 10/20 params)
    pool_logit/biases/Momentum (10, 10/10 params)
  unit_last/final_bn/beta (64, 64/128 params)
  unit_last/final_bn/gamma (64, 64/128 params)
  unit_last/final_bn/moving_mean (64, 64/64 params)
  unit_last/final_bn/moving_variance (64, 64/64 params)
 ```
 11) A example of defining extra op type for ops using `OpLog`
 First, in Python code, create an `OpLog` proto and add op type
 information to it:
 ```python
 op_log = tfprof_log_pb2.OpLog()
 entry = op_log.log_entries.add()
 entry.name = 'pool_logit/DW'
 entry.types.append('pool_logit')
 entry = op_log.log_entries.add()
 entry.name = 'pool_logit/biases'
 # Alternatively:
 # var = tf.get_variable(xxx)
 # entry.name = var.op.name
 entry.types.append('pool_logit')
 ```
 Second, call write_op_log to write the OpLog proto.
 ```python
 tf.contrib.tfprof.tfprof_logger.write_op_log(
    sess.graph, /tmp/my_op_log_dir, op_log)
 ```
 Third, when starting the tfprof tool, specify
 "--op_log_path /tmp/my_op_log_dir/op_log"
 ```shell
 tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
 _TFProfRoot (--/650 params)
  pool_logit/DW (64x10, 640/640 params)
  pool_logit/biases (10, 10/10 params)
 ```
 Note that when you call
 `tf.contrib.tfprof.tfprof_logger.write_op_log(...)`,
 the tool adds all `Variables` inside `tf.trainable_variables()` to
 `_trainable_variables`.
 12) Run tfprof in one-shot mode and dump result to file.
 ```shell
 # Printed to stdout if --dump_to_file is not set.
 tfprof scope --graph_path=graph.pbtxt  \
             --max_depth=3 \
             --dump_to_file="/tmp/dump"
 Reading Files...
 Parsing GraphDef...
 Preparing Views...
 cat /tmp/dump
 _TFProfRoot (--/930.58k params)
  global_step (0/0 params)
  pool_logit/DW (64x10, 640/1.28k params)
  pool_logit/biases (10, 10/20 params)
 ```
 13) Analyze how balanced Variable are on parameter servers.
 In this tutorial, I'm going to use a seq2seq model, which are split
 on several gpus at workers and several parameter servers.
 In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
 gpu0. They share an op_type called 'gpu0'.
 ```shell
 bazel-bin/tensorflow/tools/tfprof/tfprof \
  --graph_path ~/tfprof/textsum/graph.pbtxt  \
  --run_meta_path ~/tfprof/textsum/run_meta
 # Looks like ps task 1 is holding twice more parameters than task 0.
 tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
 _TFProfRoot (--/25.81m params)
 tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
 _TFProfRoot (--/58.84m params)
 ```
 ### CLI Input Files
 tfprof command line inference (CLI) loads dumped files from a tensorflow model.
 Convert them into in-memory data structures. To use it, users need to specify
 the locations of the dumped files. The following are the dumped files loaded
 by tfprof:
 <b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
 representation of the model. For example, graph.pbtxt written by tf.Supervisor
 is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
 using tf.Graph.as_graph_def() or other API.
 <b>--run_meta_path:</b> tensorflow::RunMetadata.
 Used to get the memory and time consumption of
 each op of the model. Users need to enable it. For example, the following code
 snippet writes a RunMetadata file:
 ```python
 run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
 run_metadata = config_pb2.RunMetadata()
 # Once a while, call it the get the RunMeta.
 _ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
 with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
  f.write(run_metadata.SerializeToString())
 ```
 <b>--op_log_path:</b>
 tensorflow::tfprof::OpLog. A proto used to provide extra op information
 for ops. By giving a group of ops a type name, users can easily aggregate the
 statistics for those ops without accidently missing or including extra ops.
 tfprof exposes the following Python API to add op information and logging.
 ```python
 tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
 ```
 <b>--checkpoint_path:</b>
 TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
 provides checkpointed tensors' values.
 ## Design
 ### In-memory representation
 <b>Scope:</b> This representation organizes ops based on name scope hierarchy,
 similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
 For example op1 with name “name1/name2” is a child of op2 with name “name1”.
 <b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
 a graph structure. The graph is a “directed acyclic graph” (hopefully), with
 direction from “output to input”. The direction is design this way so that users
 can trace from “result” to its “sources”.
 ### Command line options
 tfprof’s major goals are to measure system performance and quicly analyze
 model architectures. Hence, its commands and options should allow users to achieve
 these 2 goals easily.
 <b>graph:</b> It is expected that users will mostly use graph representation to
 debug system performance. Hence, tfprof supports graph command, which pulls the
 graph in-memory representation described above.
 <b>scope:</b> It is expected that some users might want to explore their model
 statistics using the name scope information they defined in the Python codes.
 Hence, tfprof supports “scope” command, which pulls the tree in-memory
 representation.
 <b>set:</b> It is used to store the options so that user doesn’t need to
 re-type the same option again and again in the follow up command line. Note that
 tfprof has traditional terminal’s history and auto-complete support.
 <b>help:</b> print help information.
 <b>Options:</b> Run “tfprof help” to get detailed explanations.
 ```python
 "-max_depth",
 "-min_bytes",
 "-min_micros",
 "-min_params",
 "-min_float_ops",
 "-order_by",
 "-account_type_regexes",
 "-start_name_regexes",
 "-trim_name_regexes",
 "-show_name_regexes",
 "-hide_name_regexes",
 "-account_displayed_op_only",
 "-select",
 "-viz",  # Only supported for graph command.
 "-dump_to_file",
 ```
 A key design is that stats are aggregated from descendants up to ancestors.
 `-account_type_regexes` is used to decide which ops stat is accounted. It makes
 decision based on op type. Usually set it to `.*` if no extra type information
 is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
 `-min/max` and `-show/hide/trim/start` options are only used the optionally
 displayed or hide ops based on ops’ name and stats. However, they don’t prevent
 tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
 aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
 an option to break this rule. When it is set, only displayed ops are accounted.
 Regexes are all comma-separated, for example `-show_name_regexes`
 `regex1.*,regex2.*`. It is designed this way because it is convenient and comma
 is not expected to show up in op names.
 `-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
 (notice the indent printed) are sorted according to order_by.
 ## Future Work
 * Load SummaryWriter event logs so that it can show the latest summary value.
 * Better sorting and aggregation of outputs. Easier comprehension.
 * Currently, shape information is based on `graph.pbtxt`. When the shape
 information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
 and `Checkpoint` to complete shape information.
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
@ -1,5 +1,9 @@
 package(
    default_visibility = ["//tensorflow:__subpackages__"],
    features = [
        "-layering_check",
        "-parse_headers",
    ],
 )
 licenses(["notice"])  # Apache 2.0
@ -18,10 +22,10 @@ cc_library(
        ":tfprof_show",
        ":tfprof_utils",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:regexp_internal",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
@ -49,11 +53,11 @@ cc_library(
        ":tfprof_utils",
        "//tensorflow/c:c_api",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:regexp_internal",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
@ -69,10 +73,10 @@ cc_library(
        ":tfprof_tensor",
        ":tfprof_utils",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:regexp_internal",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
@ -87,10 +91,10 @@ cc_library(
        ":tfprof_tensor",
        ":tfprof_utils",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:regexp_internal",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
@ -109,12 +113,12 @@ tf_cc_test(
        ":tfprof_stats",
        ":tfprof_utils",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
        "//tensorflow/core:testlib",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
@ -138,6 +142,18 @@ cc_library(
    deps = [
        "//tensorflow/core:framework_headers_lib",
        "//tensorflow/core:lib",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
 cc_library(
    name = "print_model_analysis_hdr",
    hdrs = [
        "print_model_analysis.h",
    ],
    deps = [
        "//tensorflow/core:framework_lite",
        "//tensorflow/core:protos_all_cc",
    ],
 )
@ -149,10 +165,11 @@ cc_library(
        ":tfprof_options",
        ":tfprof_stats",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
    alwayslink = 1,
 )
 tf_cc_test(
@ -170,12 +187,12 @@ tf_cc_test(
        ":tfprof_stats",
        ":tfprof_utils",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
        "//tensorflow/core:testlib",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
@ -185,9 +202,9 @@ cc_library(
    hdrs = ["tfprof_tensor.h"],
    copts = ["-Wno-sign-compare"],
    deps = [
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
@ -203,12 +220,12 @@ tf_cc_test(
        ":tfprof_stats",
        ":tfprof_utils",
        "//tensorflow/c:checkpoint_reader",
        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
        "//tensorflow/core:testlib",
        "//tensorflow/tools/tfprof:protos_all_cc",
    ],
 )
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
@ -13,20 +13,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
+#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
 #include <stdio.h>
 #include <memory>
 #include <utility>
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 #include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
 string PrintModelAnalysis(const string* graph, const string* run_meta,
                          const string* op_log, const string* command,
-                          const Options* options) {
+                          const string* options) {
  CHECK(graph) << "graph mustn't be null";
  CHECK(command) << "command mustn't be null";
  CHECK(options) << "options mustn't be null";
@ -50,16 +56,18 @@ string PrintModelAnalysis(const string* graph, const string* run_meta,
  TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
                   std::move(op_log_ptr), std::move(ckpt_reader));
-  if (options->dump_to_file.empty()) {
+  Options opts = Options::FromProtoStr(*options);
  if (opts.dump_to_file.empty()) {
    printf("\n=========================Options=============================\n");
-    printf("%s", options->ToString().c_str());
+    printf("%s", opts.ToString().c_str());
    printf("\n==================Model Analysis Report======================\n");
-    TFProfNode root(tf_stats.PrintGraph(*command, *options));
+    TFProfNode root(tf_stats.PrintGraph(*command, opts));
    printf("\n======================End of Report==========================\n");
    fflush(stdout);
    return root.SerializeAsString();
  }
-  return tf_stats.PrintGraph(*command, *options).SerializeAsString();
+  return tf_stats.PrintGraph(*command, opts).SerializeAsString();
 }
 }  // namespace tfprof
 }  // namespace tensorflow
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
 #include <string>
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 namespace tensorflow {
 namespace tfprof {
-
+class Options;
-// ***This API is only for swig.***
+// ***This API is only for swig. Don't user it directory!***
 //
 // Interface defined for Python API swig. Calls the tfprof core API.
 // 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
@ -37,9 +32,9 @@ namespace tfprof {
 // if not available.
 string PrintModelAnalysis(const string* graph, const string* run_meta,
                          const string* op_log, const string* command,
-                          const Options* options);
+                          const string* options);
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
 namespace tensorflow {
 namespace tfprof {
@ -34,4 +34,4 @@ static const char* const kCkptVarType = "_checkpoint_variables";
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
 #include <stdio.h>
 #include <utility>
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/regexp.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 namespace tensorflow {
 namespace tfprof {
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
@ -16,8 +16,8 @@ limitations under the License.
 // Build a graph structure based on op inputs/outputs. The graph is a directed
 // acyclic graph pointing *from outputs to inputs*.
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
 #include <deque>
 #include <map>
@ -27,13 +27,13 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_show.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
@ -113,4 +113,4 @@ class TFGraph : public TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/tensor_description.pb.h"
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
 #include <map>
 #include <set>
 #include <string>
 #include <vector>
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@ -29,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_description.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 namespace tensorflow {
 namespace tfprof {
@ -103,4 +103,4 @@ class TFNode {
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
@ -13,13 +13,41 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/tools/tfprof/tfprof_options.pb.h"
 namespace tensorflow {
 namespace tfprof {
 Options Options::FromProtoStr(const string& opts_proto_str) {
  OptionsProto opts_pb;
  CHECK(opts_pb.ParseFromString(opts_proto_str));
  Options opts(
      opts_pb.max_depth(), opts_pb.min_bytes(), opts_pb.min_micros(),
      opts_pb.min_params(), opts_pb.min_float_ops(),
      std::vector<string>(opts_pb.device_regexes().begin(),
                          opts_pb.device_regexes().end()),
      opts_pb.order_by(),
      std::vector<string>(opts_pb.account_type_regexes().begin(),
                          opts_pb.account_type_regexes().end()),
      std::vector<string>(opts_pb.start_name_regexes().begin(),
                          opts_pb.start_name_regexes().end()),
      std::vector<string>(opts_pb.trim_name_regexes().begin(),
                          opts_pb.trim_name_regexes().end()),
      std::vector<string>(opts_pb.show_name_regexes().begin(),
                          opts_pb.show_name_regexes().end()),
      std::vector<string>(opts_pb.hide_name_regexes().begin(),
                          opts_pb.hide_name_regexes().end()),
      opts_pb.account_displayed_op_only(),
      std::vector<string>(opts_pb.select().begin(), opts_pb.select().end()),
      opts_pb.viz(), opts_pb.dump_to_file());
  return opts;
 }
 string Options::ToString() const {
  const string s = strings::Printf(
      "%-28s%d\n"
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
 #include <set>
 #include <string>
@ -22,8 +22,6 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 namespace tensorflow {
 namespace tfprof {
@ -62,6 +60,8 @@ static const char* const kCmds[] = {
 struct Options {
 public:
  static Options FromProtoStr(const string& opts_proto_str);
  virtual ~Options() {}
  Options(int max_depth, tensorflow::int64 min_bytes,
          tensorflow::int64 min_micros, tensorflow::int64 min_params,
@ -116,4 +116,4 @@ struct Options {
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
 #include <stdio.h>
 #include <utility>
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/regexp.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 namespace tensorflow {
 namespace tfprof {
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
@ -17,8 +17,8 @@ limitations under the License.
 // For example, 'name1/name2' is a child of 'name1'.
 // Stats are aggregated from descendants from ancestors.
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
 #include <map>
 #include <memory>
@ -26,13 +26,13 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_show.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
@ -85,4 +85,4 @@ class TFScope : public TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
 #include <memory>
 #include <set>
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
@ -15,23 +15,23 @@ limitations under the License.
 // Parent class and utilities for tfprof_graph and tfprof_scope.
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
 #include <algorithm>
 #include <string>
 #include <vector>
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
@ -124,4 +124,4 @@ class TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
@ -13,30 +13,30 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 #include <utility>
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
 class TFProfShowTest : public ::testing::Test {
 protected:
  TFProfShowTest() {
-    string graph_path = io::JoinPath(
+    string graph_path =
-        testing::TensorFlowSrcRoot(),
+        io::JoinPath(testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
    std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
    TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
@ -44,19 +44,18 @@ class TFProfShowTest : public ::testing::Test {
        new tensorflow::RunMetadata());
    string run_meta_path =
        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+                     "tools/tfprof/internal/testdata/run_meta");
    TF_CHECK_OK(
        ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
    std::unique_ptr<OpLog> op_log_pb(new OpLog());
-    string op_log_path = io::JoinPath(
+    string op_log_path =
-        testing::TensorFlowSrcRoot(),
+        io::JoinPath(testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+                     "tools/tfprof/internal/testdata/tfprof_log");
    TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
-    string ckpt_path =
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
-        io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
    TF_Status* status = TF_NewStatus();
    std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
        new checkpoint::CheckpointReader(ckpt_path, status));
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 #include <stdio.h>
 #include <utility>
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
@ -20,8 +20,8 @@ limitations under the License.
 // 3. Accept command and options to selectively aggregate stats for analysis
 //    and print out the results.
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
 #include <map>
 #include <memory>
@ -29,20 +29,20 @@ limitations under the License.
 #include <string>
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_node.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_show.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
@ -79,4 +79,4 @@ class TFStats {
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
@ -13,31 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 #include <utility>
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
 class TFProfStatsTest : public ::testing::Test {
 protected:
  TFProfStatsTest() {
-    string graph_path = io::JoinPath(
+    string graph_path =
-        testing::TensorFlowSrcRoot(),
+        io::JoinPath(testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
    std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
    TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
@ -45,19 +45,18 @@ class TFProfStatsTest : public ::testing::Test {
        new tensorflow::RunMetadata());
    string run_meta_path =
        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+                     "tools/tfprof/internal/testdata/run_meta");
    TF_CHECK_OK(
        ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
    std::unique_ptr<OpLog> op_log_pb(new OpLog());
-    string op_log_path = io::JoinPath(
+    string op_log_path =
-        testing::TensorFlowSrcRoot(),
+        io::JoinPath(testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+                     "tools/tfprof/internal/testdata/tfprof_log");
    TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
-    string ckpt_path =
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
-        io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
    TF_Status* status = TF_NewStatus();
    std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
        new checkpoint::CheckpointReader(ckpt_path, status));
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 namespace tensorflow {
 namespace tfprof {
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
@ -19,16 +19,16 @@ limitations under the License.
 //    is not supported by TensorFlow CheckPointReader library, though it is
 //    supported in current code.
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
 #include <typeinfo>
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
@ -117,4 +117,4 @@ class TFProfTensor {
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
@ -14,34 +14,33 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 namespace tensorflow {
 namespace tfprof {
 class TFProfTensorTest : public ::testing::Test {
 protected:
  TFProfTensorTest() {
-    string graph_path = io::JoinPath(
+    string graph_path =
-        testing::TensorFlowSrcRoot(),
+        io::JoinPath(testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
    std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
    TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
    std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
    std::unique_ptr<OpLog> op_log_pb;
-    string ckpt_path =
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
-        io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
    TF_Status* status = TF_NewStatus();
    std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
        new checkpoint::CheckpointReader(ckpt_path, status));
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include <stdio.h>
 #include <algorithm>
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
 #include <string>
 #include <vector>
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 namespace tensorflow {
 namespace tfprof {
@ -47,4 +47,4 @@ void PrintHelp();
 }  // namespace tfprof
 }  // namespace tensorflow
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
--- a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
--- a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
@ -24,10 +24,6 @@ limitations under the License.
 #include "linenoise.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
@ -36,6 +32,10 @@ limitations under the License.
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/util/command_line_flags.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 #include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 #include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 using tensorflow::str_util::Split;
--- a/tensorflow/tools/tfprof/tfprof_options.proto
+++ b/tensorflow/tools/tfprof/tfprof_options.proto
@ -0,0 +1,24 @@
 syntax = "proto2";
 package tensorflow.tfprof;
 // Refers to tfprof_options.h/cc for documentation.
 // Only used to pass tfprof options from Python to C++.
 message OptionsProto {
  optional int64 max_depth = 1;
  optional int64 min_bytes = 2;
  optional int64 min_micros = 3;
  optional int64 min_params = 4;
  optional int64 min_float_ops = 5;
  repeated string device_regexes = 6;
  optional string order_by = 7;
  repeated string account_type_regexes = 8;
  repeated string start_name_regexes = 9;
  repeated string trim_name_regexes = 10;
  repeated string show_name_regexes = 11;
  repeated string hide_name_regexes = 12;
  optional bool account_displayed_op_only = 13;
  repeated string select = 14;
  optional bool viz = 15;
  optional string dump_to_file = 16;
 }
--- a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto