From 289ddb1cb6209d09e14ad44a4363f9a8b68e9006 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 14:02:41 -0800
Subject: [PATCH] Add tfprof python API to tf.contrib and move tfprof CLI to
 tensorflow/tools. Change: 137207286

---
 tensorflow/BUILD                              |   4 +-
 tensorflow/contrib/tfprof/BUILD               |   1 +
 tensorflow/contrib/tfprof/README.md           | 435 +----------------
 tensorflow/contrib/tfprof/__init__.py         |   1 +
 .../contrib/tfprof/python/tools/tfprof/BUILD  |  53 +-
 .../python/tools/tfprof/model_analyzer.py     | 187 +++++++
 .../tools/tfprof/model_analyzer_test.py       |  84 ++++
 .../tools/tfprof/print_model_analysis_test.py | 227 +++++++++
 .../pywrap_tensorflow_print_model_analysis.i  |  43 ++
 .../python/tools/tfprof/tfprof_logger.py      |   4 +-
 tensorflow/core/BUILD                         |   2 +-
 tensorflow/python/BUILD                       |   1 +
 .../{contrib/tfprof => }/tools/tfprof/BUILD   |   6 +-
 tensorflow/tools/tfprof/README.md             | 455 ++++++++++++++++++
 .../tfprof => }/tools/tfprof/internal/BUILD   |  35 +-
 .../tfprof/internal/print_model_analysis.cc   |  22 +-
 .../tfprof/internal/print_model_analysis.h    |  19 +-
 .../tools/tfprof/internal/testdata/ckpt       | Bin
 .../tfprof/internal/testdata/graph.pbtxt      |   0
 .../tools/tfprof/internal/testdata/run_meta   |   0
 .../tools/tfprof/internal/testdata/tfprof_log |   0
 .../tools/tfprof/internal/tfprof_constants.h  |   6 +-
 .../tools/tfprof/internal/tfprof_graph.cc     |   6 +-
 .../tools/tfprof/internal/tfprof_graph.h      |  16 +-
 .../tools/tfprof/internal/tfprof_node.cc      |   2 +-
 .../tools/tfprof/internal/tfprof_node.h       |   8 +-
 .../tools/tfprof/internal/tfprof_options.cc   |  30 +-
 .../tools/tfprof/internal/tfprof_options.h    |  10 +-
 .../tools/tfprof/internal/tfprof_scope.cc     |   6 +-
 .../tools/tfprof/internal/tfprof_scope.h      |  16 +-
 .../tools/tfprof/internal/tfprof_show.cc      |   2 +-
 .../tools/tfprof/internal/tfprof_show.h       |  18 +-
 .../tools/tfprof/internal/tfprof_show_test.cc |  31 +-
 .../tools/tfprof/internal/tfprof_stats.cc     |   2 +-
 .../tools/tfprof/internal/tfprof_stats.h      |  22 +-
 .../tfprof/internal/tfprof_stats_test.cc      |  31 +-
 .../tools/tfprof/internal/tfprof_tensor.cc    |   2 +-
 .../tools/tfprof/internal/tfprof_tensor.h     |   8 +-
 .../tfprof/internal/tfprof_tensor_test.cc     |  21 +-
 .../tools/tfprof/internal/tfprof_utils.cc     |   2 +-
 .../tools/tfprof/internal/tfprof_utils.h      |   8 +-
 .../tfprof => }/tools/tfprof/tfprof_log.proto |   0
 .../tfprof => }/tools/tfprof/tfprof_main.cc   |   8 +-
 tensorflow/tools/tfprof/tfprof_options.proto  |  24 +
 .../tools/tfprof/tfprof_output.proto          |   0
 45 files changed, 1275 insertions(+), 583 deletions(-)
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/BUILD (84%)
 create mode 100644 tensorflow/tools/tfprof/README.md
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/BUILD (86%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/print_model_analysis.cc (73%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/print_model_analysis.h (62%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/ckpt (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/graph.pbtxt (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/run_meta (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/tfprof_log (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_constants.h (84%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_graph.cc (97%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_graph.h (85%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_node.cc (95%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_node.h (90%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_options.cc (57%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_options.h (90%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_scope.cc (96%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_scope.h (80%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_show.cc (99%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_show.h (84%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_show_test.cc (76%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_stats.cc (98%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_stats.h (74%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_stats_test.cc (89%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_tensor.cc (97%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_tensor.h (92%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_tensor_test.cc (96%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_utils.cc (99%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_utils.h (81%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/tfprof_log.proto (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/tfprof_main.cc (97%)
 create mode 100644 tensorflow/tools/tfprof/tfprof_options.proto
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/tfprof_output.proto (100%)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 7a2105201f3..feb1d490f88 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -121,8 +121,6 @@ filegroup(
         "//tensorflow/contrib/tensorboard:all_files",
         "//tensorflow/contrib/testing:all_files",
         "//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
-        "//tensorflow/contrib/tfprof/tools/tfprof:all_files",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
         "//tensorflow/contrib/training:all_files",
         "//tensorflow/contrib/util:all_files",
         "//tensorflow/core:all_files",
@@ -180,6 +178,8 @@ filegroup(
         "//tensorflow/tools/proto_text:all_files",
         "//tensorflow/tools/quantization:all_files",
         "//tensorflow/tools/test:all_files",
+        "//tensorflow/tools/tfprof:all_files",
+        "//tensorflow/tools/tfprof/internal:all_files",
         "//tensorflow/user_ops:all_files",
         "//third_party/hadoop:all_files",
     ],
diff --git a/tensorflow/contrib/tfprof/BUILD b/tensorflow/contrib/tfprof/BUILD
index d55bda1bd05..e817cb86dfd 100644
--- a/tensorflow/contrib/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/BUILD
@@ -12,6 +12,7 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
+        "//tensorflow/contrib/tfprof/python/tools/tfprof:model_analyzer",
         "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
     ],
 )
diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md
index 013be486767..e103cb21216 100644
--- a/tensorflow/contrib/tfprof/README.md
+++ b/tensorflow/contrib/tfprof/README.md
@@ -20,434 +20,9 @@ and measures system performance.
 4.  Explore model based on name scope or graph structure.
 5.  Selectively grouping/filtering/accounting/ordering ops.
 
-### Interfaces
+tfprof can be used as CommandLine Interface (CLI) and Python API.
+CLI locates in tensorflow/tools/tfprof.
+Python API locates in tensorflow/contrib/tfprof.
+Tutorial locates in tensorflow/tools/tfprof/README.md
 
-[CLI Tutorials](#cli-tutorials):
-It supports interactive mode for exploration and single-shot mode for
-scripts. Outputs can be dumped to files or printed in terminal.
-
-Python API Tutorials: Python API is not released yet.
-
-## CLI Tutorials
-
-Tutorials are based on a 32 layers ResNet.
-TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
-
-### Examples
-
-1) Start `tfprof` command line tool
-
-```shell
-# Build the tool.
-bazel build -c opt tensorflow/contrib/tfprof/...
-
-# Help information, including detail 'option' instructions.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
-#
-# The following commands will start tfprof interactive mode.
-#
-# Profile model shapes and parameters only.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=/graph.pbtxt
-#
-# Additionally profile checkpoint statistics and values.
-# Use '-account_type_regexes _checkpoint_variables' to select
-# checkpoint tensors.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=graph.pbtxt \
-    --checkpoint_path=model.ckpt
-#
-# Additionally profile ops requested memory and timing.
-# See CLI Input Files section on generating run_meta file.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=graph.pbtxt \
-    --run_meta_path=run_meta \
-    --checkpoint_path=model.ckpt
-#
-# tfprof_log is used to define customized op types and float ops.
-# Use tfprof_logger.write_op_log() to create tfprof_log.
-# See 11) in Examples section on generating tfprof_log file.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=graph.pbtxt \
-    --run_meta_path=run_meta \
-    --op_log_path=tfprof_log \
-    --checkpoint_path=model.ckpt
-```
-Note that `graph.pbtxt` is an ASCII text format.
-
-2) Press enter to show the default options
-
-```shell
-tfprof>
-tfprof>
--max_depth                  4
--min_bytes                  0
--min_micros                 0
--min_params                 0
--min_float_ops              0
--device_regexes             .*
--order_by                   name
--account_type_regexes       Variable
--start_name_regexes         .*
--trim_name_regexes
--show_name_regexes          .*
--hide_name_regexes          IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
--account_displayed_op_only  false
-# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
-# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
--select                     params
--viz                        false
--dump_to_file
-```
-
-3) I want to see the `BatchNorm`'s gamma value in checkpoint.
-
-```shell
-# Requires --graph_path, --checkpoint_path.
-tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
-_TFProfRoot ()
-  unit_1_0/shared_activation/init_bn/gamma ()
-[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
-  unit_1_0/sub2/bn2/gamma ()
-[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
-```
-
-4) I want to see my checkpoint tensors shape and number of parameters.
-
-```shell
-# Requires --graph_path, --checkpoint_path.
-# Increase -max_depth to see all tensors.
-tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
-_TFProfRoot (--/930.58k params)
-  global_step (0/0 params)
-  init/init_conv/DW (3x3x3x16, 432/864 params)
-  pool_logit/DW (64x10, 640/1.28k params)
-    pool_logit/DW/Momentum (64x10, 640/640 params)
-  pool_logit/biases (10, 10/20 params)
-    pool_logit/biases/Momentum (10, 10/10 params)
-  unit_last/final_bn/beta (64, 64/128 params)
-  unit_last/final_bn/gamma (64, 64/128 params)
-  unit_last/final_bn/moving_mean (64, 64/64 params)
-  unit_last/final_bn/moving_variance (64, 64/64 params)
-```
-
-5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
-it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
-graph dependencies.
-
-```shell
-# Requires --graph_path, --run_meta_path.
-tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
-_TFProfRoot (0us/3.61sec)
-  init/init_conv/Conv2D (11.75ms/3.10sec)
-    random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
-  unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
-  unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
-  unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
-  unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
-```
-
-6) I want to know the expensive operations during the back propagation.
-Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
-command to explore based on name scope hierarchies.
-
-```shell
-# Requires --graph_path, --run_meta_path.
-tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
-_TFProfRoot (0us/2.29sec)
-  gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
-  gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
-  gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
-  gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
-  gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
-  gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
-  ...
-```
-
-7) Show the number of float operations in the model.
-Note: float operations calculation depends on
-1) op.RegisterStatistics. If an op doesn’t
-have RegisterStatistics defined, its float operations cannot be counted.
-2) fully defined shape is also necessary in order to calculate flops.
-float operations number is provided by tensorflow::tfprof::OpLog logged from
-Python API.
-
-```shell
-# Requires --graph_path, --op_log_path.
-tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
-_TFProfRoot (0/17.63b flops)
-  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
-  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
-  init/init_conv/Conv2D (113.25m/113.25m flops)
-  pool_logit/xw_plus_b (1.28k/165.12k flops)
-    pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
-  unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
-  unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
-  unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
-  unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
-  ...
-```
-
-8) Show the number of parameters of all `tf.trainable_variables()` in the model.
-
-```shell
-# Requires --graph_path --op_log_path.
-# store option for future commands.
-tfprof> set -account_type_regexes _trainable_variables
-tfprof> scope -max_depth 4 -select params
-_TFProfRoot (--/464.15k params)
-  init/init_conv/DW (3x3x3x16, 432/432 params)
-  pool_logit/DW (64x10, 640/640 params)
-  pool_logit/biases (10, 10/10 params)
-  unit_last/final_bn/beta (64, 64/64 params)
-  unit_last/final_bn/gamma (64, 64/64 params)
-```
-
-Where does “_trainable_variables” come from? It is from the OpLog file
-generated by write_op_log() Python API. write_op_log() help users create some
-common op types implicitly. Users can define their own op types and log it
-through the write_op_log() API.
-
-9) What if I’m lazy and don’t want to define op type? I have given my ops
-well-defined names in my model’s code. And want to use names to select a group
-of ops. Let’s try it!
-
-```shell
-tfprof> set -account_type_regexes .*
-tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
-_TFProfRoot (0/18.43k params)
-  unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
-  unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
-```
-
-The above command allows you to filter ops that match specific names.
-`-account_displayed_op_only` asks tfprof to only account ops displayed
-in terminal. Otherwise, tfprof accounts all ops matched by
-`-account_type_regexes` recursively even if they are hidden due to some
-options such as -max_depth.
-
-10) TensorFlow has built-in op types. For example, built-in op type `Variable`
-seems to include `Variable's` created by your model. However, be careful when
-depending on it because TensorFlow creates extra `Variable` ops implicitly and
-the implicitly created ops can have the same prefix as the `Variable's` you
-defined.
-
-In the following example, extra `Variables` are created and “/Momentum” is
-appended to their names. This might cause you “model capacity” calculation
-to get wrong.
-
-```shell
-tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
-_TFProfRoot (--/930.58k params)
-  global_step (1/1 params)
-  init/init_conv/DW (3x3x3x16, 432/864 params)
-  pool_logit/DW (64x10, 640/1.28k params)
-    pool_logit/DW/Momentum (64x10, 640/640 params)
-  pool_logit/biases (10, 10/20 params)
-    pool_logit/biases/Momentum (10, 10/10 params)
-  unit_last/final_bn/beta (64, 64/128 params)
-  unit_last/final_bn/gamma (64, 64/128 params)
-  unit_last/final_bn/moving_mean (64, 64/64 params)
-  unit_last/final_bn/moving_variance (64, 64/64 params)
-```
-
-
-11) A example of defining extra op type for ops using `OpLog`
-
-First, in Python code, create an `OpLog` proto and add op type
-information to it:
-
-```python
-
-op_log = tfprof_log_pb2.OpLog()
-entry = op_log.log_entries.add()
-entry.name = 'pool_logit/DW'
-entry.types.append('pool_logit')
-entry = op_log.log_entries.add()
-entry.name = 'pool_logit/biases'
-# Alternatively:
-# var = tf.get_variable(xxx)
-# entry.name = var.op.name
-entry.types.append('pool_logit')
-```
-
-Second, call write_op_log to write the OpLog proto.
-
-```python
-tf.tfprof.tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
-```
-
-Third, when starting the tfprof tool, specify
-"--op_log_path /tmp/my_op_log_dir/op_log"
-
-```shell
-tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
-_TFProfRoot (--/650 params)
-  pool_logit/DW (64x10, 640/640 params)
-  pool_logit/biases (10, 10/10 params)
-```
-
-Note that when you call
-`tf.tfprof.tfprof_logger.write_op_log(...)`, the tool adds all `Variables`
-inside `tf.trainable_variables()` to `_trainable_variables`.
-
-12) Run tfprof in one-shot mode and dump result to file.
-
-```shell
-# Printed to stdout if --dump_to_file is not set.
-tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt  \
-             --max_depth 3 \
-             --dump_to_file "/tmp/dump"
-Reading Files...
-Parsing GraphDef...
-Preparing Views...
-
-cat /tmp/dump
-_TFProfRoot (--/930.58k params)
-  global_step (0/0 params)
-  pool_logit/DW (64x10, 640/1.28k params)
-  pool_logit/biases (10, 10/20 params)
-```
-
-13) Analyze how balanced Variable are on parameter servers.
-
-In this tutorial, I'm going to use a seq2seq model, which are split
-on several gpus at workers and several parameter servers.
-
-In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
-gpu0. They share an op_type called 'gpu0'.
-
-```shell
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-  --graph_path ~/tfprof/textsum/graph.pbtxt  \
-  --run_meta_path ~/tfprof/textsum/run_meta
-
-# Looks like ps task 1 is holding twice more parameters than task 0.
-tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
-_TFProfRoot (--/25.81m params)
-tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
-_TFProfRoot (--/58.84m params)
-```
-
-### CLI Input Files
-
-tfprof command line inference (CLI) loads dumped files from a tensorflow model.
-Convert them into in-memory data structures. To use it, users need to specify
-the locations of the dumped files. The following are the dumped files loaded
-by tfprof:
-
-<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
-representation of the model. For example, graph.pbtxt written by tf.Supervisor
-is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
-using tf.Graph.as_graph_def() or other API.
-
-<b>--run_meta_path:</b> tensorflow::RunMetadata.
-Used to get the memory and time consumption of
-each op of the model. Users need to enable it. For example, the following code
-snippet writes a RunMetadata file:
-
-```python
-run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
-run_metadata = config_pb2.RunMetadata()
-# Once a while, call it the get the RunMeta.
-_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
-with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
-  f.write(run_metadata.SerializeToString())
-```
-
-<b>--op_log_path:</b>
-tensorflow::tfprof::OpLog. A proto used to provide extra op information
-for ops. By giving a group of ops a type name, users can easily aggregate the
-statistics for those ops without accidently missing or including extra ops.
-tfprof exposes the following Python API to add op information and logging.
-
-```python
-tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
-```
-
-<b>--checkpoint_path:</b>
-TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
-provides checkpointed tensors' values.
-
-
-## Design
-
-
-### In-memory representation
-
-<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
-similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
-For example op1 with name “name1/name2” is a child of op2 with name “name1”.
-
-<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
-a graph structure. The graph is a “directed acyclic graph” (hopefully), with
-direction from “output to input”. The direction is design this way so that users
-can trace from “result” to its “sources”.
-
-### Command line options
-
-tfprof’s major goals are to measure system performance and quicly analyze
-model architectures. Hence, its commands and options should allow users to achieve
-these 2 goals easily.
-
-<b>graph:</b> It is expected that users will mostly use graph representation to
-debug system performance. Hence, tfprof supports graph command, which pulls the
-graph in-memory representation described above.
-
-<b>scope:</b> It is expected that some users might want to explore their model
-statistics using the name scope information they defined in the Python codes.
-Hence, tfprof supports “scope” command, which pulls the tree in-memory
-representation.
-
-<b>set:</b> It is used to store the options so that user doesn’t need to
-re-type the same option again and again in the follow up command line. Note that
-tfprof has traditional terminal’s history and auto-complete support.
-
-<b>help:</b> print help information.
-
-<b>Options:</b> Run “tfprof help” to get detailed explanations.
-
-```python
-"-max_depth",
-"-min_bytes",
-"-min_micros",
-"-min_params",
-"-min_float_ops",
-"-order_by",
-"-account_type_regexes",
-"-start_name_regexes",
-"-trim_name_regexes",
-"-show_name_regexes",
-"-hide_name_regexes",
-"-account_displayed_op_only",
-"-select",
-"-viz",  # Only supported for graph command.
-"-dump_to_file",
-```
-
-A key design is that stats are aggregated from descendants up to ancestors.
-`-account_type_regexes` is used to decide which ops stat is accounted. It makes
-decision based on op type. Usually set it to `.*` if no extra type information
-is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
-`-min/max` and `-show/hide/trim/start` options are only used the optionally
-displayed or hide ops based on ops’ name and stats. However, they don’t prevent
-tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
-aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
-an option to break this rule. When it is set, only displayed ops are accounted.
-
-Regexes are all comma-separated, for example `-show_name_regexes`
-`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
-is not expected to show up in op names.
-
-`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
-(notice the indent printed) are sorted according to order_by.
-
-## Future Work
-
-* Load SummaryWriter event logs so that it can show the latest summary value.
-
-* Better sorting and aggregation of outputs. Easier comprehension.
-
-* Currently, shape information is based on `graph.pbtxt`. When the shape
-information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
-and `Checkpoint` to complete shape information.
+Enjoy!
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/__init__.py b/tensorflow/contrib/tfprof/__init__.py
index ce777979b96..129dad2726c 100644
--- a/tensorflow/contrib/tfprof/__init__.py
+++ b/tensorflow/contrib/tfprof/__init__.py
@@ -17,5 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.contrib.tfprof.python.tools.tfprof import model_analyzer
 from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
 from tensorflow.python.util.all_util import make_all
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
index 87a8311486f..07677c6ed73 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
@@ -3,14 +3,36 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//visibility:public"])
 
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
+
+py_library(
+    name = "model_analyzer",
+    srcs = ["model_analyzer.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/tfprof/python/tools/tfprof:pywrap_tensorflow_print_model_analysis_lib",
+        "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
+        "//tensorflow/tools/tfprof:protos_all_py",
+    ],
+)
+
+py_test(
+    name = "model_analyzer_test",
+    srcs = ["model_analyzer_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":model_analyzer",
+        "//tensorflow:tensorflow_py",
+    ],
+)
 
 py_library(
     name = "tfprof_logger",
     srcs = ["tfprof_logger.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/tools/tfprof:protos_all_py",
     ],
 )
 
@@ -20,7 +42,34 @@ tf_py_test(
     additional_deps = [
         ":tfprof_logger",
         "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
+        "//tensorflow/tools/tfprof:protos_all_py",
+    ],
+)
+
+tf_py_wrap_cc(
+    name = "pywrap_tensorflow_print_model_analysis_lib",
+    srcs = ["pywrap_tensorflow_print_model_analysis.i"],
+    swig_includes = [
+        "//tensorflow/python:lib/core/strings.i",
+        "//tensorflow/python:platform/base.i",
+    ],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/tools/tfprof/internal:print_model_analysis_hdr",
+        "//util/python:python_headers",
+    ],
+)
+
+py_test(
+    name = "print_model_analysis_test",
+    srcs = ["print_model_analysis_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":pywrap_tensorflow_print_model_analysis_lib",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/tools/tfprof:protos_all_py",
     ],
 )
 
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
new file mode 100644
index 00000000000..92943b1adb4
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
@@ -0,0 +1,187 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model Analyzer.
+
+Analyze model, including shape, params, time, memory, structure, etc.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
+from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
+from tensorflow.tools.tfprof import tfprof_options_pb2
+from tensorflow.tools.tfprof import tfprof_output_pb2
+
+# pylint: disable=bad-whitespace
+# pylint: disable=bad-continuation
+# 2 example tfprof_options for print_model_analysis API.
+#
+# Show the parameter statistics of trainable variables.
+TRAINABLE_VARS_PARAMS_STAT_OPTIONS = {
+    'max_depth': 10000,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['params'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# Show the number float operations.
+FLOAT_OPS_OPTIONS = {
+    'max_depth': 10000,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 1,
+    'device_regexes': ['.*'],
+    'order_by': 'float_ops',
+    'account_type_regexes': ['.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['float_ops'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# Show number of parameters on parameter server 0.
+# It is recommended to provide`run_meta` argument
+# to have complete device placement info.
+PRINT_PARAMS_ON_DEVICE = {
+    'max_depth': 1,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': ['.*ps.*task:0.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': False,
+    'select': ['device', 'params'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# Show the timing stats and memory demands.
+PRINT_ALL_TIMING_MEMORY = {
+    'max_depth': 10000,
+    'min_bytes': 1,  # Only >=1
+    'min_micros': 1,  # Only >=1
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': ['.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['micros', 'bytes'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# pylint: enable=bad-whitespace
+# pylint: enable=bad-continuation
+
+
+def print_model_analysis(graph,
+                         run_meta=None,
+                         op_log=None,
+                         tfprof_cmd='scope',
+                         tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS):
+  """Print model statistics.
+
+    Prints the model statistics to stdout. Also returns the results
+    in a TFProfNode proto. See go/tfprof or run tfprof tool:
+    'bazel run third_party/tensorflow/tools/tfprof help'
+
+    Examples:
+      Show the parameter/shape statistics of tf.trainable_variables().
+        print_model_analysis(sess.graph).
+
+      Show number of float ops. Only ops with RegisterStatistics defined
+      are counted.
+        show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS
+        print_model_analysis(sess.graph, tfprof_options=show_float_op_opts)
+
+  Args:
+    graph: tf.Graph.
+    run_meta: tensorflow::RunMetadata proto. When provided, also shows valid
+              timing and memory information when 'select' option contains
+              'micros' and 'bytes'.
+    op_log: tensorflow::tfprof::OpLog proto. users can use this proto to
+            group together ops and use a op_type to select the group.
+    tfprof_cmd: string. Either 'scope' or 'graph'. 'scope' view organize
+                ops using their name scopes. 'graph' view organize ops using
+                their graph inputs.
+    tfprof_options: See 'tfprof help' for details.
+  Returns:
+    TFProfNode proto. Side effect: a formatted output to stdout.
+  """
+  # pylint: disable=protected-access
+  op_log = tfprof_logger._merge_default_with_oplog(graph, op_log, run_meta)
+  # pylint: enable=protected-access
+  opts = tfprof_options_pb2.OptionsProto()
+  opts.max_depth = tfprof_options['max_depth']
+  opts.min_bytes = tfprof_options['min_bytes']
+  opts.min_micros = tfprof_options['min_micros']
+  opts.min_params = tfprof_options['min_params']
+  opts.min_float_ops = tfprof_options['min_float_ops']
+  for p in tfprof_options['device_regexes']:
+    opts.device_regexes.append(p)
+  opts.order_by = tfprof_options['order_by']
+  for p in tfprof_options['account_type_regexes']:
+    opts.account_type_regexes.append(p)
+  for p in tfprof_options['start_name_regexes']:
+    opts.start_name_regexes.append(p)
+  for p in tfprof_options['trim_name_regexes']:
+    opts.trim_name_regexes.append(p)
+  for p in tfprof_options['show_name_regexes']:
+    opts.show_name_regexes.append(p)
+  for p in tfprof_options['hide_name_regexes']:
+    opts.hide_name_regexes.append(p)
+  opts.account_displayed_op_only = tfprof_options['account_displayed_op_only']
+  for p in tfprof_options['select']:
+    opts.select.append(p)
+  opts.viz = tfprof_options['viz']
+  opts.dump_to_file = tfprof_options['dump_to_file']
+
+  run_meta_str = run_meta.SerializeToString() if run_meta else b''
+  op_log_str = op_log.SerializeToString() if op_log else b''
+
+  tfprof_node = tfprof_output_pb2.TFProfNode()
+  tfprof_node.ParseFromString(
+      print_mdl.PrintModelAnalysis(
+          graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str,
+          tfprof_cmd.encode('utf-8'), opts.SerializeToString()))
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
new file mode 100644
index 00000000000..2673a64d333
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
@@ -0,0 +1,84 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow as tf
+
+
+class PrintModelAnalysisTest(tf.test.TestCase):
+
+  def _BuildSmallModel(self):
+    image = tf.zeros([2, 6, 6, 3])
+    kernel = tf.get_variable(
+        'DW', [3, 3, 3, 6],
+        tf.float32,
+        initializer=tf.random_normal_initializer(stddev=0.001))
+    x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
+    kernel = tf.get_variable(
+        'DW2', [2, 2, 6, 12],
+        tf.float32,
+        initializer=tf.random_normal_initializer(stddev=0.001))
+    x = tf.nn.conv2d(x, kernel, [1, 2, 2, 1], padding='SAME')
+    return x
+
+  def testDumpToFile(self):
+    opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
+    opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
+
+    with tf.Session() as sess:
+      _ = self._BuildSmallModel()
+      tf.contrib.tfprof.model_analyzer.print_model_analysis(
+          sess.graph, tfprof_options=opts)
+
+      with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
+        self.assertEqual('_TFProfRoot (--/450 params)\n'
+                         '  DW (3x3x3x6, 162/162 params)\n'
+                         '  DW2 (2x2x6x12, 288/288 params)\n',
+                         f.read().decode('utf-8'))
+
+  def testSelectEverything(self):
+    opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
+    opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
+    opts['account_type_regexes'] = ['.*']
+    opts['select'] = [
+        'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types'
+    ]
+
+    with tf.Session() as sess:
+      x = self._BuildSmallModel()
+
+      sess.run(tf.initialize_all_variables())
+      run_meta = tf.RunMetadata()
+      _ = sess.run(x,
+                   options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
+                   run_metadata=run_meta)
+
+      tf.contrib.tfprof.model_analyzer.print_model_analysis(
+          sess.graph, run_meta, tfprof_options=opts)
+
+      with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
+        # pylint: disable=line-too-long
+        self.assertEqual(
+            '_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n  Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n  zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
+            f.read().decode('utf-8'))
+        # pylint: enable=line-too-long
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
new file mode 100644
index 00000000000..4000f0024e8
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
@@ -0,0 +1,227 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""print_model_analysis test."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from google.protobuf import text_format
+from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
+from tensorflow.tools.tfprof import tfprof_options_pb2
+from tensorflow.tools.tfprof import tfprof_output_pb2
+
+# pylint: disable=bad-whitespace
+# pylint: disable=bad-continuation
+TEST_OPTIONS = {
+    'max_depth': 10000,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': ['.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['params'],
+    'viz': False
+}
+
+# pylint: enable=bad-whitespace
+# pylint: enable=bad-continuation
+
+
+class PrintModelAnalysisTest(tf.test.TestCase):
+
+  def _BuildSmallModel(self):
+    image = tf.zeros([2, 6, 6, 3])
+    kernel = tf.get_variable(
+        'DW', [6, 6, 3, 6],
+        tf.float32,
+        initializer=tf.random_normal_initializer(stddev=0.001))
+    x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
+    return x
+
+  def testPrintModelAnalysis(self):
+    opts = tfprof_options_pb2.OptionsProto()
+    opts.max_depth = TEST_OPTIONS['max_depth']
+    opts.min_bytes = TEST_OPTIONS['min_bytes']
+    opts.min_micros = TEST_OPTIONS['min_micros']
+    opts.min_params = TEST_OPTIONS['min_params']
+    opts.min_float_ops = TEST_OPTIONS['min_float_ops']
+    for p in TEST_OPTIONS['device_regexes']:
+      opts.device_regexes.append(p)
+    opts.order_by = TEST_OPTIONS['order_by']
+    for p in TEST_OPTIONS['account_type_regexes']:
+      opts.account_type_regexes.append(p)
+    for p in TEST_OPTIONS['start_name_regexes']:
+      opts.start_name_regexes.append(p)
+    for p in TEST_OPTIONS['trim_name_regexes']:
+      opts.trim_name_regexes.append(p)
+    for p in TEST_OPTIONS['show_name_regexes']:
+      opts.show_name_regexes.append(p)
+    for p in TEST_OPTIONS['hide_name_regexes']:
+      opts.hide_name_regexes.append(p)
+    opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only']
+    for p in TEST_OPTIONS['select']:
+      opts.select.append(p)
+    opts.viz = TEST_OPTIONS['viz']
+
+    with tf.Session() as sess:
+      _ = self._BuildSmallModel()
+      tfprof_pb = tfprof_output_pb2.TFProfNode()
+      tfprof_pb.ParseFromString(
+          print_mdl.PrintModelAnalysis(sess.graph.as_graph_def(
+          ).SerializeToString(), b'', b'', b'scope', opts.SerializeToString()))
+
+      expected_pb = tfprof_output_pb2.TFProfNode()
+      text_format.Merge(r"""name: "_TFProfRoot"
+              exec_micros: 0
+              requested_bytes: 0
+              total_exec_micros: 0
+              total_requested_bytes: 0
+              total_parameters: 648
+              children {
+                name: "Conv2D"
+                exec_micros: 0
+                requested_bytes: 0
+                total_exec_micros: 0
+                total_requested_bytes: 0
+                total_parameters: 0
+                float_ops: 0
+                total_float_ops: 0
+              }
+              children {
+                name: "DW"
+                exec_micros: 0
+                requested_bytes: 0
+                parameters: 648
+                total_exec_micros: 0
+                total_requested_bytes: 0
+                total_parameters: 648
+                children {
+                  name: "DW/Assign"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                children {
+                  name: "DW/Initializer"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  children {
+                    name: "DW/Initializer/random_normal"
+                    exec_micros: 0
+                    requested_bytes: 0
+                    total_exec_micros: 0
+                    total_requested_bytes: 0
+                    total_parameters: 0
+                    children {
+                      name: "DW/Initializer/random_normal/RandomStandardNormal"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/mean"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/mul"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/shape"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/stddev"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    float_ops: 0
+                    total_float_ops: 0
+                  }
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                children {
+                  name: "DW/read"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                float_ops: 0
+                total_float_ops: 0
+              }
+              children {
+                name: "zeros"
+                exec_micros: 0
+                requested_bytes: 0
+                total_exec_micros: 0
+                total_requested_bytes: 0
+                total_parameters: 0
+                float_ops: 0
+                total_float_ops: 0
+              }
+              float_ops: 0
+              total_float_ops: 0""", expected_pb)
+      self.assertEqual(expected_pb, tfprof_pb)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
new file mode 100644
index 00000000000..05b734a699f
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
@@ -0,0 +1,43 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%include "tensorflow/python/lib/core/strings.i"
+%include "tensorflow/python/platform/base.i"
+
+%{
+#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
+#include "tensorflow/core/framework/types.h"
+%}
+
+%typemap(typecheck) const string & = char *;
+%typemap(in) const string& (string temp) {
+  if (!_PyObjAs<string>($input, &temp)) return NULL;
+  $1 = &temp;
+}
+%typemap(out) const string& {
+  $result = PyString_FromStringAndSize($1->data(), $1->size());
+}
+%apply const string & {string &};
+%apply const string & {string *};
+
+%ignoreall
+
+%unignore tensorflow;
+%unignore tensorflow::tfprof;
+%unignore tensorflow::tfprof::PrintModelAnalysis;
+
+%include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
+
+%unignoreall
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
index 53dd2632b69..1f710bc970c 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
@@ -24,8 +24,8 @@ import os
 import sys
 
 import tensorflow as tf
-from tensorflow.contrib.tfprof.tools.tfprof import tfprof_log_pb2
 from tensorflow.python.framework import ops
+from tensorflow.tools.tfprof import tfprof_log_pb2
 
 TRAINABLE_VARIABLES = '_trainable_variables'
 REGISTERED_FLOP_STATS = 'flops'
@@ -85,7 +85,7 @@ def _get_logged_ops(graph, run_meta=None):
     if node.name not in logged_ops:
       entry = tfprof_log_pb2.OpLogEntry()
       entry.name = node.name
-      entry.float_ops = stats.value
+      entry.float_ops = int(stats.value)
       logged_ops[entry.name] = entry
 
   for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 0845028b5b7..92f41457a15 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1317,7 +1317,7 @@ cc_library(
         "platform/regexp.h",
     ],
     visibility = [
-        "//tensorflow/contrib/tfprof:__subpackages__",
+        "//tensorflow/tools/tfprof:__subpackages__",
     ],
     deps = [":lib_internal"],
 )
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9624f8507cc..5bcf94a735c 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1861,6 +1861,7 @@ tf_py_wrap_cc(
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/core:lib",
         "//tensorflow/core/distributed_runtime:server_lib",
+        "//tensorflow/tools/tfprof/internal:print_model_analysis",
         "//util/python:python_headers",
     ] + tf_additional_lib_deps(),
 )
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/BUILD b/tensorflow/tools/tfprof/BUILD
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/BUILD
rename to tensorflow/tools/tfprof/BUILD
index da161b1ffa1..56e1fb7ae4d 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/BUILD
+++ b/tensorflow/tools/tfprof/BUILD
@@ -26,13 +26,13 @@ cc_binary(
         ":protos_all_cc",
         "//tensorflow/c:c_api",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/tools/tfprof/internal:tfprof_options",
+        "//tensorflow/tools/tfprof/internal:tfprof_stats",
+        "//tensorflow/tools/tfprof/internal:tfprof_utils",
         "@linenoise//:linenoise",
     ],
 )
diff --git a/tensorflow/tools/tfprof/README.md b/tensorflow/tools/tfprof/README.md
new file mode 100644
index 00000000000..8618abe0d5e
--- /dev/null
+++ b/tensorflow/tools/tfprof/README.md
@@ -0,0 +1,455 @@
+# tfprof: A Profiling Tool for TensorFlow Models
+
+Internal User Please Use: go/tfprof
+
+Author: Xin Pan (xpan@google.com, github: panyx0718)
+
+Consultants: Jon Shlens, Pete Warden
+
+
+## Introduction
+
+tfprof is a profiling tool for TensorFlow that analyzes model architectures
+and measures system performance.
+
+###Major Features
+
+1.  Measure model parameters, float operations, tensor shapes.
+2.  Measure op execution times, requested memory size and device placement.
+3.  Inspect checkpoint tensors' shapes and their values.
+4.  Explore model based on name scope or graph structure.
+5.  Selectively grouping/filtering/accounting/ordering ops.
+
+### Interfaces
+
+[CLI Tutorials](#cli-tutorials):
+It supports interactive mode for exploration and single-shot mode for
+scripts. Outputs can be dumped to files or printed in terminal.
+
+Python API Tutorials: Python API is not released yet.
+
+## CLI Tutorials
+
+Tutorials are based on a 32 layers ResNet.
+TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
+
+### Examples
+
+1) Start `tfprof` command line tool
+
+```shell
+# Build the tool.
+bazel build -c opt tensorflow/tools/tfprof/...
+
+# Help information, including detail 'option' instructions.
+bazel-bin/tensorflow/tools/tfprof/tfprof help
+#
+# The following commands will start tfprof interactive mode.
+#
+# Profile model shapes and parameters only.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt
+#
+# Additionally profile checkpoint statistics and values.
+# Use '-account_type_regexes _checkpoint_variables' to select
+# checkpoint tensors.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --checkpoint_path=model.ckpt
+#
+# Additionally profile ops requested memory and timing.
+# See CLI Input Files section on generating run_meta file.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --run_meta_path=run_meta \
+    --checkpoint_path=model.ckpt
+#
+# tfprof_log is used to define customized op types and float ops.
+# Use tfprof_logger.write_op_log() to create tfprof_log.
+# See 11) in Examples section on generating tfprof_log file.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --run_meta_path=run_meta \
+    --op_log_path=tfprof_log \
+    --checkpoint_path=model.ckpt
+```
+Note that `graph.pbtxt` is an ASCII text format.
+
+2) Press enter to show the default options
+
+```shell
+tfprof>
+tfprof>
+-max_depth                  4
+-min_bytes                  0
+-min_micros                 0
+-min_params                 0
+-min_float_ops              0
+-device_regexes             .*
+-order_by                   name
+-account_type_regexes       Variable
+-start_name_regexes         .*
+-trim_name_regexes
+-show_name_regexes          .*
+-hide_name_regexes          IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
+-account_displayed_op_only  false
+# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
+# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
+-select                     params
+-viz                        false
+-dump_to_file
+```
+
+3) I want to see the `BatchNorm`'s gamma value in checkpoint.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
+_TFProfRoot ()
+  unit_1_0/shared_activation/init_bn/gamma ()
+[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
+  unit_1_0/sub2/bn2/gamma ()
+[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
+```
+
+4) I want to see my checkpoint tensors shape and number of parameters.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+# Increase -max_depth to see all tensors.
+tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
+_TFProfRoot (--/930.58k params)
+  global_step (0/0 params)
+  init/init_conv/DW (3x3x3x16, 432/864 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+    pool_logit/DW/Momentum (64x10, 640/640 params)
+  pool_logit/biases (10, 10/20 params)
+    pool_logit/biases/Momentum (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/128 params)
+  unit_last/final_bn/gamma (64, 64/128 params)
+  unit_last/final_bn/moving_mean (64, 64/64 params)
+  unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
+it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
+graph dependencies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/3.61sec)
+  init/init_conv/Conv2D (11.75ms/3.10sec)
+    random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
+  unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
+  unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
+  unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
+  unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
+```
+
+6) I want to know the expensive operations during the back propagation.
+Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
+command to explore based on name scope hierarchies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/2.29sec)
+  gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
+  gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
+  gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
+  gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
+  gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
+  gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
+  ...
+```
+
+7) Show the number of float operations in the model.
+Note: float operations calculation depends on
+1) op.RegisterStatistics. If an op doesn’t
+have RegisterStatistics defined, its float operations cannot be counted.
+2) fully defined shape is also necessary in order to calculate flops.
+float operations number is provided by tensorflow::tfprof::OpLog logged from
+Python API.
+
+```shell
+# Requires --graph_path, --op_log_path.
+tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
+_TFProfRoot (0/17.63b flops)
+  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
+  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
+  init/init_conv/Conv2D (113.25m/113.25m flops)
+  pool_logit/xw_plus_b (1.28k/165.12k flops)
+    pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
+  unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
+  unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
+  unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
+  unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
+  ...
+```
+
+8) Show the number of parameters of all `tf.trainable_variables()` in the model.
+
+```shell
+# Requires --graph_path --op_log_path.
+# store option for future commands.
+tfprof> set -account_type_regexes _trainable_variables
+tfprof> scope -max_depth 4 -select params
+_TFProfRoot (--/464.15k params)
+  init/init_conv/DW (3x3x3x16, 432/432 params)
+  pool_logit/DW (64x10, 640/640 params)
+  pool_logit/biases (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/64 params)
+  unit_last/final_bn/gamma (64, 64/64 params)
+```
+
+Where does “_trainable_variables” come from? It is from the OpLog file
+generated by write_op_log() Python API. write_op_log() help users create some
+common op types implicitly. Users can define their own op types and log it
+through the write_op_log() API.
+
+9) What if I’m lazy and don’t want to define op type? I have given my ops
+well-defined names in my model’s code. And want to use names to select a group
+of ops. Let’s try it!
+
+```shell
+tfprof> set -account_type_regexes .*
+tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
+_TFProfRoot (0/18.43k params)
+  unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
+  unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
+```
+
+The above command allows you to filter ops that match specific names.
+`-account_displayed_op_only` asks tfprof to only account ops displayed
+in terminal. Otherwise, tfprof accounts all ops matched by
+`-account_type_regexes` recursively even if they are hidden due to some
+options such as -max_depth.
+
+10) TensorFlow has built-in op types. For example, built-in op type `Variable`
+seems to include `Variable's` created by your model. However, be careful when
+depending on it because TensorFlow creates extra `Variable` ops implicitly and
+the implicitly created ops can have the same prefix as the `Variable's` you
+defined.
+
+In the following example, extra `Variables` are created and “/Momentum” is
+appended to their names. This might cause you “model capacity” calculation
+to get wrong.
+
+```shell
+tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
+_TFProfRoot (--/930.58k params)
+  global_step (1/1 params)
+  init/init_conv/DW (3x3x3x16, 432/864 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+    pool_logit/DW/Momentum (64x10, 640/640 params)
+  pool_logit/biases (10, 10/20 params)
+    pool_logit/biases/Momentum (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/128 params)
+  unit_last/final_bn/gamma (64, 64/128 params)
+  unit_last/final_bn/moving_mean (64, 64/64 params)
+  unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+
+11) A example of defining extra op type for ops using `OpLog`
+
+First, in Python code, create an `OpLog` proto and add op type
+information to it:
+
+```python
+
+op_log = tfprof_log_pb2.OpLog()
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/DW'
+entry.types.append('pool_logit')
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/biases'
+# Alternatively:
+# var = tf.get_variable(xxx)
+# entry.name = var.op.name
+entry.types.append('pool_logit')
+```
+
+Second, call write_op_log to write the OpLog proto.
+
+```python
+tf.contrib.tfprof.tfprof_logger.write_op_log(
+    sess.graph, /tmp/my_op_log_dir, op_log)
+```
+
+Third, when starting the tfprof tool, specify
+"--op_log_path /tmp/my_op_log_dir/op_log"
+
+```shell
+tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
+_TFProfRoot (--/650 params)
+  pool_logit/DW (64x10, 640/640 params)
+  pool_logit/biases (10, 10/10 params)
+```
+
+Note that when you call
+`tf.contrib.tfprof.tfprof_logger.write_op_log(...)`,
+the tool adds all `Variables` inside `tf.trainable_variables()` to
+`_trainable_variables`.
+
+12) Run tfprof in one-shot mode and dump result to file.
+
+```shell
+# Printed to stdout if --dump_to_file is not set.
+tfprof scope --graph_path=graph.pbtxt  \
+             --max_depth=3 \
+             --dump_to_file="/tmp/dump"
+Reading Files...
+Parsing GraphDef...
+Preparing Views...
+
+cat /tmp/dump
+_TFProfRoot (--/930.58k params)
+  global_step (0/0 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+  pool_logit/biases (10, 10/20 params)
+```
+
+13) Analyze how balanced Variable are on parameter servers.
+
+In this tutorial, I'm going to use a seq2seq model, which are split
+on several gpus at workers and several parameter servers.
+
+In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
+gpu0. They share an op_type called 'gpu0'.
+
+```shell
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+  --graph_path ~/tfprof/textsum/graph.pbtxt  \
+  --run_meta_path ~/tfprof/textsum/run_meta
+
+# Looks like ps task 1 is holding twice more parameters than task 0.
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
+_TFProfRoot (--/25.81m params)
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
+_TFProfRoot (--/58.84m params)
+```
+
+### CLI Input Files
+
+tfprof command line inference (CLI) loads dumped files from a tensorflow model.
+Convert them into in-memory data structures. To use it, users need to specify
+the locations of the dumped files. The following are the dumped files loaded
+by tfprof:
+
+<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
+representation of the model. For example, graph.pbtxt written by tf.Supervisor
+is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
+using tf.Graph.as_graph_def() or other API.
+
+<b>--run_meta_path:</b> tensorflow::RunMetadata.
+Used to get the memory and time consumption of
+each op of the model. Users need to enable it. For example, the following code
+snippet writes a RunMetadata file:
+
+```python
+run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
+run_metadata = config_pb2.RunMetadata()
+# Once a while, call it the get the RunMeta.
+_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
+with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
+  f.write(run_metadata.SerializeToString())
+```
+
+<b>--op_log_path:</b>
+tensorflow::tfprof::OpLog. A proto used to provide extra op information
+for ops. By giving a group of ops a type name, users can easily aggregate the
+statistics for those ops without accidently missing or including extra ops.
+tfprof exposes the following Python API to add op information and logging.
+
+```python
+tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
+```
+
+<b>--checkpoint_path:</b>
+TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
+provides checkpointed tensors' values.
+
+
+## Design
+
+
+### In-memory representation
+
+<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
+similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
+For example op1 with name “name1/name2” is a child of op2 with name “name1”.
+
+<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
+a graph structure. The graph is a “directed acyclic graph” (hopefully), with
+direction from “output to input”. The direction is design this way so that users
+can trace from “result” to its “sources”.
+
+### Command line options
+
+tfprof’s major goals are to measure system performance and quicly analyze
+model architectures. Hence, its commands and options should allow users to achieve
+these 2 goals easily.
+
+<b>graph:</b> It is expected that users will mostly use graph representation to
+debug system performance. Hence, tfprof supports graph command, which pulls the
+graph in-memory representation described above.
+
+<b>scope:</b> It is expected that some users might want to explore their model
+statistics using the name scope information they defined in the Python codes.
+Hence, tfprof supports “scope” command, which pulls the tree in-memory
+representation.
+
+<b>set:</b> It is used to store the options so that user doesn’t need to
+re-type the same option again and again in the follow up command line. Note that
+tfprof has traditional terminal’s history and auto-complete support.
+
+<b>help:</b> print help information.
+
+<b>Options:</b> Run “tfprof help” to get detailed explanations.
+
+```python
+"-max_depth",
+"-min_bytes",
+"-min_micros",
+"-min_params",
+"-min_float_ops",
+"-order_by",
+"-account_type_regexes",
+"-start_name_regexes",
+"-trim_name_regexes",
+"-show_name_regexes",
+"-hide_name_regexes",
+"-account_displayed_op_only",
+"-select",
+"-viz",  # Only supported for graph command.
+"-dump_to_file",
+```
+
+A key design is that stats are aggregated from descendants up to ancestors.
+`-account_type_regexes` is used to decide which ops stat is accounted. It makes
+decision based on op type. Usually set it to `.*` if no extra type information
+is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
+`-min/max` and `-show/hide/trim/start` options are only used the optionally
+displayed or hide ops based on ops’ name and stats. However, they don’t prevent
+tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
+aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
+an option to break this rule. When it is set, only displayed ops are accounted.
+
+Regexes are all comma-separated, for example `-show_name_regexes`
+`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
+is not expected to show up in op names.
+
+`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
+(notice the indent printed) are sorted according to order_by.
+
+## Future Work
+
+* Load SummaryWriter event logs so that it can show the latest summary value.
+
+* Better sorting and aggregation of outputs. Easier comprehension.
+
+* Currently, shape information is based on `graph.pbtxt`. When the shape
+information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
+and `Checkpoint` to complete shape information.
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD b/tensorflow/tools/tfprof/internal/BUILD
similarity index 86%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
rename to tensorflow/tools/tfprof/internal/BUILD
index 7a4b4c0c98f..7476a5ad412 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
+++ b/tensorflow/tools/tfprof/internal/BUILD
@@ -1,5 +1,9 @@
 package(
     default_visibility = ["//tensorflow:__subpackages__"],
+    features = [
+        "-layering_check",
+        "-parse_headers",
+    ],
 )
 
 licenses(["notice"])  # Apache 2.0
@@ -18,10 +22,10 @@ cc_library(
         ":tfprof_show",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -49,11 +53,11 @@ cc_library(
         ":tfprof_utils",
         "//tensorflow/c:c_api",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -69,10 +73,10 @@ cc_library(
         ":tfprof_tensor",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -87,10 +91,10 @@ cc_library(
         ":tfprof_tensor",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -109,12 +113,12 @@ tf_cc_test(
         ":tfprof_stats",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -138,6 +142,18 @@ cc_library(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "print_model_analysis_hdr",
+    hdrs = [
+        "print_model_analysis.h",
+    ],
+    deps = [
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:protos_all_cc",
     ],
 )
 
@@ -149,10 +165,11 @@ cc_library(
         ":tfprof_options",
         ":tfprof_stats",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
+    alwayslink = 1,
 )
 
 tf_cc_test(
@@ -170,12 +187,12 @@ tf_cc_test(
         ":tfprof_stats",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -185,9 +202,9 @@ cc_library(
     hdrs = ["tfprof_tensor.h"],
     copts = ["-Wno-sign-compare"],
     deps = [
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -203,12 +220,12 @@ tf_cc_test(
         ":tfprof_stats",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc b/tensorflow/tools/tfprof/internal/print_model_analysis.cc
similarity index 73%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
rename to tensorflow/tools/tfprof/internal/print_model_analysis.cc
index ab1e47b32dd..dfe4019fbb4 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
+++ b/tensorflow/tools/tfprof/internal/print_model_analysis.cc
@@ -13,20 +13,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
+#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
 
 #include <stdio.h>
 #include <memory>
 #include <utility>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 string PrintModelAnalysis(const string* graph, const string* run_meta,
                           const string* op_log, const string* command,
-                          const Options* options) {
+                          const string* options) {
   CHECK(graph) << "graph mustn't be null";
   CHECK(command) << "command mustn't be null";
   CHECK(options) << "options mustn't be null";
@@ -50,16 +56,18 @@ string PrintModelAnalysis(const string* graph, const string* run_meta,
   TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
                    std::move(op_log_ptr), std::move(ckpt_reader));
 
-  if (options->dump_to_file.empty()) {
+  Options opts = Options::FromProtoStr(*options);
+
+  if (opts.dump_to_file.empty()) {
     printf("\n=========================Options=============================\n");
-    printf("%s", options->ToString().c_str());
+    printf("%s", opts.ToString().c_str());
     printf("\n==================Model Analysis Report======================\n");
-    TFProfNode root(tf_stats.PrintGraph(*command, *options));
+    TFProfNode root(tf_stats.PrintGraph(*command, opts));
     printf("\n======================End of Report==========================\n");
     fflush(stdout);
     return root.SerializeAsString();
   }
-  return tf_stats.PrintGraph(*command, *options).SerializeAsString();
+  return tf_stats.PrintGraph(*command, opts).SerializeAsString();
 }
 }  // namespace tfprof
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h b/tensorflow/tools/tfprof/internal/print_model_analysis.h
similarity index 62%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
rename to tensorflow/tools/tfprof/internal/print_model_analysis.h
index 579147f1641..071ac7102ca 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
+++ b/tensorflow/tools/tfprof/internal/print_model_analysis.h
@@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
 
 #include <string>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/framework/types.h"
 
 namespace tensorflow {
 namespace tfprof {
-
-// ***This API is only for swig.***
+class Options;
+// ***This API is only for swig. Don't user it directory!***
 //
 // Interface defined for Python API swig. Calls the tfprof core API.
 // 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
@@ -37,9 +32,9 @@ namespace tfprof {
 // if not available.
 string PrintModelAnalysis(const string* graph, const string* run_meta,
                           const string* op_log, const string* command,
-                          const Options* options);
+                          const string* options);
 
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt b/tensorflow/tools/tfprof/internal/testdata/ckpt
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
rename to tensorflow/tools/tfprof/internal/testdata/ckpt
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt b/tensorflow/tools/tfprof/internal/testdata/graph.pbtxt
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
rename to tensorflow/tools/tfprof/internal/testdata/graph.pbtxt
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta b/tensorflow/tools/tfprof/internal/testdata/run_meta
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
rename to tensorflow/tools/tfprof/internal/testdata/run_meta
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log b/tensorflow/tools/tfprof/internal/testdata/tfprof_log
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
rename to tensorflow/tools/tfprof/internal/testdata/tfprof_log
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h b/tensorflow/tools/tfprof/internal/tfprof_constants.h
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
rename to tensorflow/tools/tfprof/internal/tfprof_constants.h
index 169ebae4a75..e495128728b 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_constants.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
 
 namespace tensorflow {
 namespace tfprof {
@@ -34,4 +34,4 @@ static const char* const kCkptVarType = "_checkpoint_variables";
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc b/tensorflow/tools/tfprof/internal/tfprof_graph.cc
similarity index 97%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
rename to tensorflow/tools/tfprof/internal/tfprof_graph.cc
index 287fd78d46c..469b258f98b 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_graph.cc
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
 
 #include <stdio.h>
 #include <utility>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/regexp.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 
 namespace tensorflow {
 namespace tfprof {
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h b/tensorflow/tools/tfprof/internal/tfprof_graph.h
similarity index 85%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
rename to tensorflow/tools/tfprof/internal/tfprof_graph.h
index ee54534f56b..b16f80b33db 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_graph.h
@@ -16,8 +16,8 @@ limitations under the License.
 // Build a graph structure based on op inputs/outputs. The graph is a directed
 // acyclic graph pointing *from outputs to inputs*.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
 
 #include <deque>
 #include <map>
@@ -27,13 +27,13 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -113,4 +113,4 @@ class TFGraph : public TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc b/tensorflow/tools/tfprof/internal/tfprof_node.cc
similarity index 95%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
rename to tensorflow/tools/tfprof/internal/tfprof_node.cc
index 0e8ab366cbb..0e77439231d 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_node.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
 
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/tensor_description.pb.h"
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h b/tensorflow/tools/tfprof/internal/tfprof_node.h
similarity index 90%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
rename to tensorflow/tools/tfprof/internal/tfprof_node.h
index c8a8f5e7ec4..6ffb85506cc 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_node.h
@@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
 
 #include <map>
 #include <set>
 #include <string>
 #include <vector>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -29,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_description.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -103,4 +103,4 @@ class TFNode {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc b/tensorflow/tools/tfprof/internal/tfprof_options.cc
similarity index 57%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
rename to tensorflow/tools/tfprof/internal/tfprof_options.cc
index 2574415fdd4..03282533ffd 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_options.cc
@@ -13,13 +13,41 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/tools/tfprof/tfprof_options.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 
+Options Options::FromProtoStr(const string& opts_proto_str) {
+  OptionsProto opts_pb;
+  CHECK(opts_pb.ParseFromString(opts_proto_str));
+  Options opts(
+      opts_pb.max_depth(), opts_pb.min_bytes(), opts_pb.min_micros(),
+      opts_pb.min_params(), opts_pb.min_float_ops(),
+      std::vector<string>(opts_pb.device_regexes().begin(),
+                          opts_pb.device_regexes().end()),
+      opts_pb.order_by(),
+      std::vector<string>(opts_pb.account_type_regexes().begin(),
+                          opts_pb.account_type_regexes().end()),
+      std::vector<string>(opts_pb.start_name_regexes().begin(),
+                          opts_pb.start_name_regexes().end()),
+      std::vector<string>(opts_pb.trim_name_regexes().begin(),
+                          opts_pb.trim_name_regexes().end()),
+      std::vector<string>(opts_pb.show_name_regexes().begin(),
+                          opts_pb.show_name_regexes().end()),
+      std::vector<string>(opts_pb.hide_name_regexes().begin(),
+                          opts_pb.hide_name_regexes().end()),
+      opts_pb.account_displayed_op_only(),
+      std::vector<string>(opts_pb.select().begin(), opts_pb.select().end()),
+      opts_pb.viz(), opts_pb.dump_to_file());
+  return opts;
+}
+
 string Options::ToString() const {
   const string s = strings::Printf(
       "%-28s%d\n"
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h b/tensorflow/tools/tfprof/internal/tfprof_options.h
similarity index 90%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
rename to tensorflow/tools/tfprof/internal/tfprof_options.h
index a0c52e6d1af..a5b55e77fac 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_options.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
 
 #include <set>
 #include <string>
@@ -22,8 +22,6 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -62,6 +60,8 @@ static const char* const kCmds[] = {
 
 struct Options {
  public:
+  static Options FromProtoStr(const string& opts_proto_str);
+
   virtual ~Options() {}
   Options(int max_depth, tensorflow::int64 min_bytes,
           tensorflow::int64 min_micros, tensorflow::int64 min_params,
@@ -116,4 +116,4 @@ struct Options {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc b/tensorflow/tools/tfprof/internal/tfprof_scope.cc
similarity index 96%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
rename to tensorflow/tools/tfprof/internal/tfprof_scope.cc
index 6b2bc298ccb..949d2d54e42 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_scope.cc
@@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
 
 #include <stdio.h>
 #include <utility>
 
 #include "tensorflow/c/c_api.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/regexp.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 
 namespace tensorflow {
 namespace tfprof {
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h b/tensorflow/tools/tfprof/internal/tfprof_scope.h
similarity index 80%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
rename to tensorflow/tools/tfprof/internal/tfprof_scope.h
index 3a8ca52b43c..a7c58920a24 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_scope.h
@@ -17,8 +17,8 @@ limitations under the License.
 // For example, 'name1/name2' is a child of 'name1'.
 // Stats are aggregated from descendants from ancestors.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
 
 #include <map>
 #include <memory>
@@ -26,13 +26,13 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -85,4 +85,4 @@ class TFScope : public TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc b/tensorflow/tools/tfprof/internal/tfprof_show.cc
similarity index 99%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
rename to tensorflow/tools/tfprof/internal/tfprof_show.cc
index f7275d8ae4d..a8f1ac6ae94 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_show.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
 
 #include <memory>
 #include <set>
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h b/tensorflow/tools/tfprof/internal/tfprof_show.h
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
rename to tensorflow/tools/tfprof/internal/tfprof_show.h
index 4b5d6592e5a..a17358bb6b4 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_show.h
@@ -15,23 +15,23 @@ limitations under the License.
 
 // Parent class and utilities for tfprof_graph and tfprof_scope.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
 
 #include <algorithm>
 #include <string>
 #include <vector>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -124,4 +124,4 @@ class TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc
similarity index 76%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
rename to tensorflow/tools/tfprof/internal/tfprof_show_test.cc
index 81396e31cca..15794727649 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc
@@ -13,30 +13,30 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 
 #include <utility>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 class TFProfShowTest : public ::testing::Test {
  protected:
   TFProfShowTest() {
-    string graph_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    string graph_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
     std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
     TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
 
@@ -44,19 +44,18 @@ class TFProfShowTest : public ::testing::Test {
         new tensorflow::RunMetadata());
     string run_meta_path =
         io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+                     "tools/tfprof/internal/testdata/run_meta");
     TF_CHECK_OK(
         ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
 
     std::unique_ptr<OpLog> op_log_pb(new OpLog());
-    string op_log_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+    string op_log_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/tfprof_log");
     TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
 
-    string ckpt_path =
-        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
     TF_Status* status = TF_NewStatus();
     std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
         new checkpoint::CheckpointReader(ckpt_path, status));
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc b/tensorflow/tools/tfprof/internal/tfprof_stats.cc
similarity index 98%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
rename to tensorflow/tools/tfprof/internal/tfprof_stats.cc
index 54fce4772bd..4bb3a07eafa 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_stats.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 
 #include <stdio.h>
 #include <utility>
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h b/tensorflow/tools/tfprof/internal/tfprof_stats.h
similarity index 74%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
rename to tensorflow/tools/tfprof/internal/tfprof_stats.h
index 1246a2fae2f..3a8b46ae315 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_stats.h
@@ -20,8 +20,8 @@ limitations under the License.
 // 3. Accept command and options to selectively aggregate stats for analysis
 //    and print out the results.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
 
 #include <map>
 #include <memory>
@@ -29,20 +29,20 @@ limitations under the License.
 #include <string>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -79,4 +79,4 @@ class TFStats {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
similarity index 89%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
rename to tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
index 06b288fdce7..a6fcadbe95a 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
@@ -13,31 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 
 #include <utility>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 class TFProfStatsTest : public ::testing::Test {
  protected:
   TFProfStatsTest() {
-    string graph_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    string graph_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
     std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
     TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
 
@@ -45,19 +45,18 @@ class TFProfStatsTest : public ::testing::Test {
         new tensorflow::RunMetadata());
     string run_meta_path =
         io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+                     "tools/tfprof/internal/testdata/run_meta");
     TF_CHECK_OK(
         ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
 
     std::unique_ptr<OpLog> op_log_pb(new OpLog());
-    string op_log_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+    string op_log_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/tfprof_log");
     TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
 
-    string ckpt_path =
-        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
     TF_Status* status = TF_NewStatus();
     std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
         new checkpoint::CheckpointReader(ckpt_path, status));
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc b/tensorflow/tools/tfprof/internal/tfprof_tensor.cc
similarity index 97%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
rename to tensorflow/tools/tfprof/internal/tfprof_tensor.cc
index c21626919fa..297258fee11 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_tensor.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 
 namespace tensorflow {
 namespace tfprof {
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h b/tensorflow/tools/tfprof/internal/tfprof_tensor.h
similarity index 92%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
rename to tensorflow/tools/tfprof/internal/tfprof_tensor.h
index 471a1db4172..4f6fffd6504 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_tensor.h
@@ -19,16 +19,16 @@ limitations under the License.
 //    is not supported by TensorFlow CheckPointReader library, though it is
 //    supported in current code.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
 
 #include <typeinfo>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -117,4 +117,4 @@ class TFProfTensor {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
similarity index 96%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
rename to tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
index d3f1e3c7b70..1066e6208a4 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
@@ -14,34 +14,33 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 class TFProfTensorTest : public ::testing::Test {
  protected:
   TFProfTensorTest() {
-    string graph_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    string graph_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
     std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
     TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
 
     std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
     std::unique_ptr<OpLog> op_log_pb;
 
-    string ckpt_path =
-        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
     TF_Status* status = TF_NewStatus();
     std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
         new checkpoint::CheckpointReader(ckpt_path, status));
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc b/tensorflow/tools/tfprof/internal/tfprof_utils.cc
similarity index 99%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
rename to tensorflow/tools/tfprof/internal/tfprof_utils.cc
index 7610729a118..5783b9f4759 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_utils.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 
 #include <stdio.h>
 #include <algorithm>
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h b/tensorflow/tools/tfprof/internal/tfprof_utils.h
similarity index 81%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
rename to tensorflow/tools/tfprof/internal/tfprof_utils.h
index 6c1bba04fc2..13077a8fc5c 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_utils.h
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
 
 #include <string>
 #include <vector>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -47,4 +47,4 @@ void PrintHelp();
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto b/tensorflow/tools/tfprof/tfprof_log.proto
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
rename to tensorflow/tools/tfprof/tfprof_log.proto
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc b/tensorflow/tools/tfprof/tfprof_main.cc
similarity index 97%
rename from tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
rename to tensorflow/tools/tfprof/tfprof_main.cc
index 38b1588d72d..f72797f0a23 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
+++ b/tensorflow/tools/tfprof/tfprof_main.cc
@@ -24,10 +24,6 @@ limitations under the License.
 #include "linenoise.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -36,6 +32,10 @@ limitations under the License.
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 
 using tensorflow::str_util::Split;
 
diff --git a/tensorflow/tools/tfprof/tfprof_options.proto b/tensorflow/tools/tfprof/tfprof_options.proto
new file mode 100644
index 00000000000..0d8e6880390
--- /dev/null
+++ b/tensorflow/tools/tfprof/tfprof_options.proto
@@ -0,0 +1,24 @@
+syntax = "proto2";
+
+package tensorflow.tfprof;
+
+// Refers to tfprof_options.h/cc for documentation.
+// Only used to pass tfprof options from Python to C++.
+message OptionsProto {
+  optional int64 max_depth = 1;
+  optional int64 min_bytes = 2;
+  optional int64 min_micros = 3;
+  optional int64 min_params = 4;
+  optional int64 min_float_ops = 5;
+  repeated string device_regexes = 6;
+  optional string order_by = 7;
+  repeated string account_type_regexes = 8;
+  repeated string start_name_regexes = 9;
+  repeated string trim_name_regexes = 10;
+  repeated string show_name_regexes = 11;
+  repeated string hide_name_regexes = 12;
+  optional bool account_displayed_op_only = 13;
+  repeated string select = 14;
+  optional bool viz = 15;
+  optional string dump_to_file = 16;
+}
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto b/tensorflow/tools/tfprof/tfprof_output.proto
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
rename to tensorflow/tools/tfprof/tfprof_output.proto