diff --git a/linenoise.BUILD b/linenoise.BUILD
new file mode 100644
index 00000000000..9924a620f24
--- /dev/null
+++ b/linenoise.BUILD
@@ -0,0 +1,13 @@
+licenses(["notice"])  # 2-clause BSD
+
+exports_files(["LICENSE"])
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "linenoise",
+    srcs = ["linenoise.c"],
+    hdrs = ["linenoise.h"],
+)
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 284be34a784..b70337bb917 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -124,6 +124,9 @@ filegroup(
         "//tensorflow/contrib/tensor_forest/hybrid:all_files",
         "//tensorflow/contrib/tensorboard:all_files",
         "//tensorflow/contrib/testing:all_files",
+        "//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
+        "//tensorflow/contrib/tfprof/tools/tfprof:all_files",
+        "//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
         "//tensorflow/contrib/training:all_files",
         "//tensorflow/contrib/util:all_files",
         "//tensorflow/core:all_files",
diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md
new file mode 100644
index 00000000000..0e6420134a5
--- /dev/null
+++ b/tensorflow/contrib/tfprof/README.md
@@ -0,0 +1,453 @@
+# tfprof: A Profiling Tool for TensorFlow Models
+
+go/tfprof
+
+Author: Xin Pan (xpan@google.com, github: panyx0718)
+
+Consultants: Jon Shlens (shlens@google.com), Pete Warden (petewarden@google.com)
+
+[TOC]
+
+## Introduction
+
+tfprof is a profiling tool for TensorFlow that analyzes model architectures
+and measures system performance.
+
+###Major Features
+
+1.  Measure model parameters, float operations, tensor shapes.
+2.  Measure op execution times, requested memory size and device placement.
+3.  Inspect checkpoint tensors' shapes and their values.
+4.  Explore model based on name scope or graph structure.
+5.  Selectively grouping/filtering/accounting/ordering ops.
+
+### Interfaces
+
+[CLI Tutorials](#cli-tutorials):
+It supports interactive mode for exploration and single-shot mode for
+scripts. Outputs can be dumped to files or printed in terminal.
+
+Python API Tutorials: Python API is not released yet.
+
+## CLI Tutorials
+
+Tutorials are based on a 32 layers ResNet.
+TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
+
+### Examples
+
+1) Start `tfprof` command line tool
+
+```shell
+# Build the tool.
+bazel build -c opt tensorflow/contrib/tfprof/...
+
+# Help information, including detail 'option' instructions.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
+#
+# The following commands will start tfprof interactive mode.
+#
+# Profile model shapes and parameters only.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+    --graph_path=/graph.pbtxt
+#
+# Additionally profile checkpoint statistics and values.
+# Use '-account_type_regexes _checkpoint_variables' to select
+# checkpoint tensors.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --checkpoint_path=model.ckpt
+#
+# Additionally profile ops requested memory and timing.
+# See CLI Input Files section on generating run_meta file.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --run_meta_path=run_meta \
+    --checkpoint_path=model.ckpt
+#
+# tfprof_log is used to define customized op types and float ops.
+# Use tfprof_logger.write_op_log() to create tfprof_log.
+# See 11) in Examples section on generating tfprof_log file.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --run_meta_path=run_meta \
+    --op_log_path=tfprof_log \
+    --checkpoint_path=model.ckpt
+```
+Note that `graph.pbtxt` is an ASCII text format.
+
+2) Press enter to show the default options
+
+```shell
+tfprof>
+tfprof>
+-max_depth                  4
+-min_bytes                  0
+-min_micros                 0
+-min_params                 0
+-min_float_ops              0
+-device_regexes             .*
+-order_by                   name
+-account_type_regexes       Variable
+-start_name_regexes         .*
+-trim_name_regexes
+-show_name_regexes          .*
+-hide_name_regexes          IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
+-account_displayed_op_only  false
+# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
+# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
+-select                     params
+-viz                        false
+-dump_to_file
+```
+
+3) I want to see the `BatchNorm`'s gamma value in checkpoint.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
+_TFProfRoot ()
+  unit_1_0/shared_activation/init_bn/gamma ()
+[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
+  unit_1_0/sub2/bn2/gamma ()
+[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
+```
+
+4) I want to see my checkpoint tensors shape and number of parameters.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+# Increase -max_depth to see all tensors.
+tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
+_TFProfRoot (--/930.58k params)
+  global_step (0/0 params)
+  init/init_conv/DW (3x3x3x16, 432/864 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+    pool_logit/DW/Momentum (64x10, 640/640 params)
+  pool_logit/biases (10, 10/20 params)
+    pool_logit/biases/Momentum (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/128 params)
+  unit_last/final_bn/gamma (64, 64/128 params)
+  unit_last/final_bn/moving_mean (64, 64/64 params)
+  unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
+it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
+graph dependencies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/3.61sec)
+  init/init_conv/Conv2D (11.75ms/3.10sec)
+    random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
+  unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
+  unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
+  unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
+  unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
+```
+
+6) I want to know the expensive operations during the back propagation.
+Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
+command to explore based on name scope hierarchies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/2.29sec)
+  gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
+  gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
+  gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
+  gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
+  gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
+  gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
+  ...
+```
+
+7) Show the number of float operations in the model.
+Note: float operations calculation depends on
+1) op.RegisterStatistics. If an op doesn’t
+have RegisterStatistics defined, its float operations cannot be counted.
+2) fully defined shape is also necessary in order to calculate flops.
+float operations number is provided by tensorflow::tfprof::OpLog logged from
+Python API.
+
+```shell
+# Requires --graph_path, --op_log_path.
+tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
+_TFProfRoot (0/17.63b flops)
+  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
+  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
+  init/init_conv/Conv2D (113.25m/113.25m flops)
+  pool_logit/xw_plus_b (1.28k/165.12k flops)
+    pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
+  unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
+  unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
+  unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
+  unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
+  ...
+```
+
+8) Show the number of parameters of all `tf.trainable_variables()` in the model.
+
+```shell
+# Requires --graph_path --op_log_path.
+# store option for future commands.
+tfprof> set -account_type_regexes _trainable_variables
+tfprof> scope -max_depth 4 -select params
+_TFProfRoot (--/464.15k params)
+  init/init_conv/DW (3x3x3x16, 432/432 params)
+  pool_logit/DW (64x10, 640/640 params)
+  pool_logit/biases (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/64 params)
+  unit_last/final_bn/gamma (64, 64/64 params)
+```
+
+Where does “_trainable_variables” come from? It is from the OpLog file
+generated by write_op_log() Python API. write_op_log() help users create some
+common op types implicitly. Users can define their own op types and log it
+through the write_op_log() API.
+
+9) What if I’m lazy and don’t want to define op type? I have given my ops
+well-defined names in my model’s code. And want to use names to select a group
+of ops. Let’s try it!
+
+```shell
+tfprof> set -account_type_regexes .*
+tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
+_TFProfRoot (0/18.43k params)
+  unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
+  unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
+```
+
+The above command allows you to filter ops that match specific names.
+`-account_displayed_op_only` asks tfprof to only account ops displayed
+in terminal. Otherwise, tfprof accounts all ops matched by
+`-account_type_regexes` recursively even if they are hidden due to some
+options such as -max_depth.
+
+10) TensorFlow has built-in op types. For example, built-in op type `Variable`
+seems to include `Variable's` created by your model. However, be careful when
+depending on it because TensorFlow creates extra `Variable` ops implicitly and
+the implicitly created ops can have the same prefix as the `Variable's` you
+defined.
+
+In the following example, extra `Variables` are created and “/Momentum” is
+appended to their names. This might cause you “model capacity” calculation
+to get wrong.
+
+```shell
+tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
+_TFProfRoot (--/930.58k params)
+  global_step (1/1 params)
+  init/init_conv/DW (3x3x3x16, 432/864 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+    pool_logit/DW/Momentum (64x10, 640/640 params)
+  pool_logit/biases (10, 10/20 params)
+    pool_logit/biases/Momentum (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/128 params)
+  unit_last/final_bn/gamma (64, 64/128 params)
+  unit_last/final_bn/moving_mean (64, 64/64 params)
+  unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+
+11) A example of defining extra op type for ops using `OpLog`
+
+First, in Python code, create an `OpLog` proto and add op type
+information to it:
+
+```python
+op_log = tfprof_log_pb2.OpLog()
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/DW'
+entry.types.append('pool_logit')
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/biases'
+# Alternatively:
+# var = tf.get_variable(xxx)
+# entry.name = var.op.name
+entry.types.append('pool_logit')
+```
+
+Second, call write_op_log to write the OpLog proto.
+
+```python
+tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
+```
+
+Third, when starting the tfprof tool, specify
+"--op_log_path /tmp/my_op_log_dir/op_log"
+
+```shell
+tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
+_TFProfRoot (--/650 params)
+  pool_logit/DW (64x10, 640/640 params)
+  pool_logit/biases (10, 10/10 params)
+```
+
+Note that when you call
+`tfprof_logger.write_op_log(...)`, the tool adds all `Variables` inside
+`tf.trainable_variables()` to `_trainable_variables`.
+
+12) Run tfprof in one-shot mode and dump result to file.
+
+```shell
+# Printed to stdout if --dump_to_file is not set.
+tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt  \
+             --max_depth 3 \
+             --dump_to_file "/tmp/dump"
+Reading Files...
+Parsing GraphDef...
+Preparing Views...
+
+cat /tmp/dump
+_TFProfRoot (--/930.58k params)
+  global_step (0/0 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+  pool_logit/biases (10, 10/20 params)
+```
+
+13) Analyze how balanced Variable are on parameter servers.
+
+In this tutorial, I'm going to use a seq2seq model, which are split
+on several gpus at workers and several parameter servers.
+
+In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
+gpu0. They share an op_type called 'gpu0'.
+
+```shell
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+  --graph_path ~/tfprof/textsum/graph.pbtxt  \
+  --run_meta_path ~/tfprof/textsum/run_meta
+
+# Looks like ps task 1 is holding twice more parameters than task 0.
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
+_TFProfRoot (--/25.81m params)
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
+_TFProfRoot (--/58.84m params)
+```
+
+### CLI Input Files
+
+tfprof command line inference (CLI) loads dumped files from a tensorflow model.
+Convert them into in-memory data structures. To use it, users need to specify
+the locations of the dumped files. The following are the dumped files loaded
+by tfprof:
+
+<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
+representation of the model. For example, graph.pbtxt written by tf.Supervisor
+is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
+using tf.Graph.as_graph_def() or other API.
+
+<b>--run_meta_path:</b> tensorflow::RunMetadata.
+Used to get the memory and time consumption of
+each op of the model. Users need to enable it. For example, the following code
+snippet writes a RunMetadata file:
+
+```python
+run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
+run_metadata = config_pb2.RunMetadata()
+# Once a while, call it the get the RunMeta.
+_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
+with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
+  f.write(run_metadata.SerializeToString())
+```
+
+<b>--op_log_path:</b>
+tensorflow::tfprof::OpLog. A proto used to provide extra op information
+for ops. By giving a group of ops a type name, users can easily aggregate the
+statistics for those ops without accidently missing or including extra ops.
+tfprof exposes the following Python API to add op information and logging.
+
+```python
+  def write_op_log(graph, log_dir, op_log=None)
+```
+
+<b>--checkpoint_path:</b>
+TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
+provides checkpointed tensors' values.
+
+
+## Design
+
+
+### In-memory representation
+
+<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
+similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
+For example op1 with name “name1/name2” is a child of op2 with name “name1”.
+
+<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
+a graph structure. The graph is a “directed acyclic graph” (hopefully), with
+direction from “output to input”. The direction is design this way so that users
+can trace from “result” to its “sources”.
+
+### Command line options
+
+tfprof’s major goals are to measure system performance and quicly analyze
+model architectures. Hence, its commands and options should allow users to achieve
+these 2 goals easily.
+
+<b>graph:</b> It is expected that users will mostly use graph representation to
+debug system performance. Hence, tfprof supports graph command, which pulls the
+graph in-memory representation described above.
+
+<b>scope:</b> It is expected that some users might want to explore their model
+statistics using the name scope information they defined in the Python codes.
+Hence, tfprof supports “scope” command, which pulls the tree in-memory
+representation.
+
+<b>set:</b> It is used to store the options so that user doesn’t need to
+re-type the same option again and again in the follow up command line. Note that
+tfprof has traditional terminal’s history and auto-complete support.
+
+<b>help:</b> print help information.
+
+<b>Options:</b> Run “tfprof help” to get detailed explanations.
+
+```python
+"-max_depth",
+"-min_bytes",
+"-min_micros",
+"-min_params",
+"-min_float_ops",
+"-order_by",
+"-account_type_regexes",
+"-start_name_regexes",
+"-trim_name_regexes",
+"-show_name_regexes",
+"-hide_name_regexes",
+"-account_displayed_op_only",
+"-select",
+"-viz",  # Only supported for graph command.
+"-dump_to_file",
+```
+
+A key design is that stats are aggregated from descendants up to ancestors.
+`-account_type_regexes` is used to decide which ops stat is accounted. It makes
+decision based on op type. Usually set it to `.*` if no extra type information
+is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
+`-min/max` and `-show/hide/trim/start` options are only used the optionally
+displayed or hide ops based on ops’ name and stats. However, they don’t prevent
+tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
+aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
+an option to break this rule. When it is set, only displayed ops are accounted.
+
+Regexes are all comma-separated, for example `-show_name_regexes`
+`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
+is not expected to show up in op names.
+
+`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
+(notice the indent printed) are sorted according to order_by.
+
+## Future Work
+
+* Load SummaryWriter event logs so that it can show the latest summary value.
+
+* Better sorting and aggregation of outputs. Easier comprehension.
+
+* Currently, shape information is based on `graph.pbtxt`. When the shape
+information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
+and `Checkpoint` to complete shape information.
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
new file mode 100644
index 00000000000..d78020bbd87
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
@@ -0,0 +1,31 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+py_library(
+    name = "tfprof_logger",
+    srcs = ["tfprof_logger.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
+        "//tensorflow/python:framework_for_generated_wrappers",
+    ],
+)
+
+# -----------------------------------------------------------------------------
+# Google-internal targets.  These must be at the end for syncrepo.
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/__init__.py b/tensorflow/contrib/tfprof/python/tools/tfprof/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
new file mode 100644
index 00000000000..4a487461a38
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
@@ -0,0 +1,114 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Logging tensorflow::tfprof::OpLog.
+
+OpLog is used to add extra model information for offline analysis by tfprof.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow as tf
+from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_log_pb2
+from tensorflow.python.framework import ops
+
+TRAINABLE_VARIABLES = '_trainable_variables'
+REGISTERED_FLOP_STATS = 'flops'
+
+
+def _get_logged_ops(graph):
+  """Extract trainable model parameters and FLOPs for ops from a Graph.
+
+  Args:
+    graph: tf.Graph.
+  Returns:
+    logged_ops: dict mapping from op_name to OpLogEntry.
+  """
+  logged_ops = {}
+
+  graph_def = graph.as_graph_def()
+  for node in graph_def.node:
+    try:
+      stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS)
+    except ValueError:
+      # Catch Exception When shape is incomplete. Skip it.
+      stats = None
+
+    if not stats or not stats.value:
+      continue
+    if node.name not in logged_ops:
+      entry = tfprof_log_pb2.OpLogEntry()
+      entry.name = node.name
+      entry.float_ops = stats.value
+      logged_ops[entry.name] = entry
+
+  for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
+    if v.op.name not in logged_ops:
+      entry = tfprof_log_pb2.OpLogEntry()
+      entry.name = v.op.name
+      entry.types.append(TRAINABLE_VARIABLES)
+      logged_ops[entry.name] = entry
+    else:
+      logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES)
+  return logged_ops
+
+
+def _merge_default_with_oplog(graph, op_log=None):
+  """Merge the tfprof default extra info with caller's op_log.
+
+  Args:
+    graph: tf.Graph.
+    op_log: OpLog proto.
+  Returns:
+    tmp_op_log: Merged OpLog proto.
+  """
+  tmp_op_log = tfprof_log_pb2.OpLog()
+  logged_ops = _get_logged_ops(graph)
+  if not op_log:
+    tmp_op_log.log_entries.extend(logged_ops.values())
+  else:
+    all_ops = dict()
+    for entry in op_log.log_entries:
+      all_ops[entry.name] = entry
+    for op_name, entry in logged_ops.iteritems():
+      if op_name in all_ops:
+        all_ops[op_name].types.extend(entry.types)
+        if entry.float_ops > 0 and all_ops[op_name].float_ops == 0:
+          all_ops[op_name].float_ops = entry.float_ops
+      else:
+        all_ops[op_name] = entry
+    tmp_op_log.log_entries.extend(all_ops.values())
+  return tmp_op_log
+
+
+def write_op_log(graph, log_dir, op_log=None):
+  """Log provided 'op_log', and add additional model information below.
+
+    The API also assigns ops in tf.trainable_variables() an op type called
+    '_trainable_variables'.
+    The API also logs 'flops' statistics for ops with op.RegisterStatistics()
+    defined.
+
+  Args:
+    graph: tf.Graph.
+    log_dir: directory to write the log file.
+    op_log: OpLog proto.
+  """
+  op_log = _merge_default_with_oplog(graph, op_log)
+
+  with tf.gfile.Open(os.path.join(log_dir, 'tfprof_log'), 'w') as log:
+    log.write(op_log.SerializeToString())
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/tools/tfprof/BUILD
new file mode 100644
index 00000000000..da161b1ffa1
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/BUILD
@@ -0,0 +1,52 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+# -----------------------------------------------------------------------------
+# Google-internal targets.  These must be at the end for syncrepo.
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+cc_binary(
+    name = "tfprof",
+    srcs = ["tfprof_main.cc"],
+    deps = [
+        ":protos_all_cc",
+        "//tensorflow/c:c_api",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
+        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
+        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "@linenoise//:linenoise",
+    ],
+)
+
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
+
+tf_proto_library(
+    name = "protos_all",
+    srcs = glob(
+        ["**/*.proto"],
+    ),
+    cc_api_version = 2,
+    cc_libs = ["//tensorflow/core:protos_all_cc"],
+    go_api_version = 2,
+    java_api_version = 2,
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD b/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
new file mode 100644
index 00000000000..42812b345dc
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
@@ -0,0 +1,227 @@
+package(
+    default_visibility = ["//tensorflow:__subpackages__"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+cc_library(
+    name = "tfprof_stats",
+    srcs = ["tfprof_stats.cc"],
+    hdrs = ["tfprof_stats.h"],
+    deps = [
+        ":tfprof_graph",
+        ":tfprof_node",
+        ":tfprof_options",
+        ":tfprof_scope",
+        ":tfprof_show",
+        ":tfprof_utils",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "tfprof_node",
+    srcs = ["tfprof_node.cc"],
+    hdrs = ["tfprof_node.h"],
+    deps = [
+        ":tfprof_options",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "tfprof_scope",
+    srcs = ["tfprof_scope.cc"],
+    hdrs = ["tfprof_scope.h"],
+    deps = [
+        ":tfprof_constants",
+        ":tfprof_node",
+        ":tfprof_options",
+        ":tfprof_show",
+        ":tfprof_tensor",
+        ":tfprof_utils",
+        "//tensorflow/c:c_api",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "tfprof_graph",
+    srcs = ["tfprof_graph.cc"],
+    hdrs = ["tfprof_graph.h"],
+    deps = [
+        ":tfprof_constants",
+        ":tfprof_node",
+        ":tfprof_options",
+        ":tfprof_show",
+        ":tfprof_tensor",
+        ":tfprof_utils",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "tfprof_show",
+    srcs = ["tfprof_show.cc"],
+    hdrs = ["tfprof_show.h"],
+    deps = [
+        ":tfprof_constants",
+        ":tfprof_node",
+        ":tfprof_options",
+        ":tfprof_tensor",
+        ":tfprof_utils",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_cc_test(
+    name = "tfprof_show_test",
+    srcs = ["tfprof_show_test.cc"],
+    data = [
+        "testdata/ckpt",
+        "testdata/graph.pbtxt",
+        "testdata/run_meta",
+        "testdata/tfprof_log",
+    ],
+    deps = [
+        ":tfprof_constants",
+        ":tfprof_options",
+        ":tfprof_stats",
+        ":tfprof_utils",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
+cc_library(
+    name = "tfprof_utils",
+    srcs = ["tfprof_utils.cc"],
+    hdrs = ["tfprof_utils.h"],
+    deps = [
+        ":tfprof_options",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "tfprof_options",
+    srcs = ["tfprof_options.cc"],
+    hdrs = ["tfprof_options.h"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib",
+    ],
+)
+
+cc_library(
+    name = "print_model_analysis",
+    srcs = ["print_model_analysis.cc"],
+    hdrs = ["print_model_analysis.h"],
+    deps = [
+        ":tfprof_options",
+        ":tfprof_stats",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_cc_test(
+    name = "tfprof_stats_test",
+    srcs = ["tfprof_stats_test.cc"],
+    data = [
+        "testdata/ckpt",
+        "testdata/graph.pbtxt",
+        "testdata/run_meta",
+        "testdata/tfprof_log",
+    ],
+    deps = [
+        ":tfprof_constants",
+        ":tfprof_options",
+        ":tfprof_stats",
+        ":tfprof_utils",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
+cc_library(
+    name = "tfprof_tensor",
+    srcs = ["tfprof_tensor.cc"],
+    hdrs = ["tfprof_tensor.h"],
+    deps = [
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "tfprof_tensor_test",
+    srcs = ["tfprof_tensor_test.cc"],
+    data = [
+        "testdata/ckpt",
+        "testdata/graph.pbtxt",
+    ],
+    deps = [
+        ":tfprof_options",
+        ":tfprof_stats",
+        ":tfprof_utils",
+        "//tensorflow/c:checkpoint_reader",
+        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
+cc_library(
+    name = "tfprof_constants",
+    hdrs = ["tfprof_constants.h"],
+    deps = [
+    ],
+)
+# -----------------------------------------------------------------------------
+# Google-internal targets.  These must be at the end for syncrepo.
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
new file mode 100644
index 00000000000..ab1e47b32dd
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
@@ -0,0 +1,65 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
+
+#include <stdio.h>
+#include <memory>
+#include <utility>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+namespace tensorflow {
+namespace tfprof {
+string PrintModelAnalysis(const string* graph, const string* run_meta,
+                          const string* op_log, const string* command,
+                          const Options* options) {
+  CHECK(graph) << "graph mustn't be null";
+  CHECK(command) << "command mustn't be null";
+  CHECK(options) << "options mustn't be null";
+  std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
+  graph_ptr->ParseFromString(*graph);
+
+  std::unique_ptr<RunMetadata> run_meta_ptr;
+  if (run_meta) {
+    run_meta_ptr.reset(new RunMetadata());
+    run_meta_ptr->ParseFromString(*run_meta);
+  }
+
+  std::unique_ptr<OpLog> op_log_ptr;
+  if (op_log) {
+    op_log_ptr.reset(new OpLog());
+    op_log_ptr->ParseFromString(*op_log);
+  }
+
+  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader;
+
+  TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
+                   std::move(op_log_ptr), std::move(ckpt_reader));
+
+  if (options->dump_to_file.empty()) {
+    printf("\n=========================Options=============================\n");
+    printf("%s", options->ToString().c_str());
+    printf("\n==================Model Analysis Report======================\n");
+    TFProfNode root(tf_stats.PrintGraph(*command, *options));
+    printf("\n======================End of Report==========================\n");
+    fflush(stdout);
+    return root.SerializeAsString();
+  }
+  return tf_stats.PrintGraph(*command, *options).SerializeAsString();
+}
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
new file mode 100644
index 00000000000..579147f1641
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
@@ -0,0 +1,45 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+
+#include <string>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+// ***This API is only for swig.***
+//
+// Interface defined for Python API swig. Calls the tfprof core API.
+// 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
+// OpLog strings, respectively.
+// 'graph', 'command' and 'options' are required. Others can be nullptr
+// if not available.
+string PrintModelAnalysis(const string* graph, const string* run_meta,
+                          const string* op_log, const string* command,
+                          const Options* options);
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
new file mode 100644
index 00000000000..2f59f071c59
Binary files /dev/null and b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt differ
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
new file mode 100644
index 00000000000..fd54551776c
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
@@ -0,0 +1,636 @@
+node {
+  name: "zeros"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 3
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "DW"
+  op: "Variable"
+  attr {
+    key: "container"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 3
+        }
+        dim {
+          size: 3
+        }
+        dim {
+          size: 6
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\003\000\000\000\003\000\000\000\003\000\000\000\006\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal/mean"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal/stddev"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal/RandomStandardNormal"
+  op: "RandomStandardNormal"
+  input: "DW/Initializer/random_normal/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 87654321
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 5
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal/mul"
+  op: "Mul"
+  input: "DW/Initializer/random_normal/RandomStandardNormal"
+  input: "DW/Initializer/random_normal/stddev"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal"
+  op: "Add"
+  input: "DW/Initializer/random_normal/mul"
+  input: "DW/Initializer/random_normal/mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+}
+node {
+  name: "DW/Assign"
+  op: "Assign"
+  input: "DW"
+  input: "DW/Initializer/random_normal"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "DW/read"
+  op: "Identity"
+  input: "DW"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+}
+node {
+  name: "Conv2D"
+  op: "Conv2D"
+  input: "zeros"
+  input: "DW/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 2
+        i: 2
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "DW2"
+  op: "Variable"
+  attr {
+    key: "container"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 2
+        }
+        dim {
+          size: 2
+        }
+        dim {
+          size: 6
+        }
+        dim {
+          size: 12
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "DW2/Initializer/random_normal/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\002\000\000\000\002\000\000\000\006\000\000\000\014\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "DW2/Initializer/random_normal/mean"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "DW2/Initializer/random_normal/stddev"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "DW2/Initializer/random_normal/RandomStandardNormal"
+  op: "RandomStandardNormal"
+  input: "DW2/Initializer/random_normal/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 87654321
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 15
+    }
+  }
+}
+node {
+  name: "DW2/Initializer/random_normal/mul"
+  op: "Mul"
+  input: "DW2/Initializer/random_normal/RandomStandardNormal"
+  input: "DW2/Initializer/random_normal/stddev"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+}
+node {
+  name: "DW2/Initializer/random_normal"
+  op: "Add"
+  input: "DW2/Initializer/random_normal/mul"
+  input: "DW2/Initializer/random_normal/mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+}
+node {
+  name: "DW2/Assign"
+  op: "Assign"
+  input: "DW2"
+  input: "DW2/Initializer/random_normal"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "DW2/read"
+  op: "Identity"
+  input: "DW2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+}
+node {
+  name: "Conv2D_1"
+  op: "Conv2D"
+  input: "Conv2D"
+  input: "DW2/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 2
+        i: 2
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+versions {
+  producer: 13
+}
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
new file mode 100644
index 00000000000..2d5bb7ddaff
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
@@ -0,0 +1,22 @@
+
+�
+�
+%/job:localhost/replica:0/task:0/cpu:0:
+_SOURCE������� (2
+cpuB_SOURCE = NoOp()H塈����a
+zeros������� (2
+cpu:(&"�cpu0������Bzeros = Const()H�������^
+DW������� (2
+cpu:(&"�cpu0ੀ���BDW = Variable()H�������`
+DW2������� (2
+cpu:(&"�	cpu0������BDW2 = Variable()H�������j
+DW/read������� (2
+cpu:(&"�cpu0ੀ���BDW/read = Identity(DW)H�������m
+DW2/read������� (2
+cpu:(&"�	cpu0������BDW2/read = Identity(DW2)H�������s
+Conv2D������� P(U2
+cpu�:(&"�cpu0ી���BConv2D = Conv2D(zeros, DW/read)H�������{
+Conv2D_1������� (2
+cpu�:(&"�cpu0฀���B#Conv2D_1 = Conv2D(Conv2D, DW2/read)H�������6
+_SINK������� (2
+cpuB_SINK = NoOp()H�������
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
new file mode 100644
index 00000000000..c35d4338e97
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
@@ -0,0 +1,9 @@
+
+
+Conv2D_1�$
+
+DW2_trainable_variables
+
+DW_trainable_variables
+
+Conv2D�-
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
new file mode 100644
index 00000000000..169ebae4a75
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
@@ -0,0 +1,37 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+
+namespace tensorflow {
+namespace tfprof {
+
+// Op name of root of everything. Aggregates all stats.
+static const char* const kTFProfRoot = "_TFProfRoot";
+// Op type for nodes that doesn't represent a physical node in the
+// TensorFlow model. Only exist as a placehold to aggregate children.
+// For example, kTFProfRoot belongs to this type.
+static const char* const kTFGraphParent = "_TFGraphParent";
+static const char* const kTFScopeParent = "_kTFScopeParent";
+// Op type for tf.trainable_variables().
+static const char* const kTrainableVarType = "_trainable_variables";
+// Op type for tensors in the checkpoint file.
+static const char* const kCkptVarType = "_checkpoint_variables";
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
new file mode 100644
index 00000000000..287fd78d46c
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
@@ -0,0 +1,222 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+
+#include <stdio.h>
+#include <utility>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+GraphNode* TFGraph::CreateParentNode(const string& name) {
+  node_defs_.push_back(std::unique_ptr<NodeDef>(new NodeDef()));
+  node_defs_.back()->set_name(name);
+  node_defs_.back()->set_op(kTFGraphParent);
+  parent_nodes_[name] =
+      std::unique_ptr<TFNode>(new TFNode(node_defs_.back().get()));
+  nodes_map_[name] =
+      std::unique_ptr<GraphNode>(new GraphNode(parent_nodes_[name].get()));
+  return nodes_map_[name].get();
+}
+
+void TFGraph::AddNode(TFNode* node) {
+  string name = node->node_def()->name();
+  nodes_map_[name] = std::unique_ptr<GraphNode>(new GraphNode(node));
+}
+
+void TFGraph::Build() {
+  if (!roots_.empty()) return;
+
+  std::set<string> nonroots;
+  // Filter out the root nodes (node not input of any other node).
+  for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+    GraphNode* node = it->second.get();
+    const std::map<string, TFNode*>& inputs = node->node->inputs();
+    for (auto inputs_it = inputs.cbegin(); inputs_it != inputs.cend();
+         inputs_it++) {
+      nonroots.insert(inputs_it->first);
+      auto child_it = nodes_map_.find(inputs_it->first);
+      if (child_it != nodes_map_.end()) {
+        node->children.push_back(child_it->second.get());
+      }
+    }
+  }
+  for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+    if (nonroots.find(it->first) == nonroots.end()) {
+      roots_.push_back(it->second.get());
+    }
+  }
+}
+
+const ShowNode* TFGraph::ShowInternal(const Options& opts) {
+  // Search the nodes to start from.
+  std::vector<GraphNode*> roots = roots_;
+  if (opts.start_name_regexes.size() != 1 ||
+      opts.start_name_regexes[0] != ".*") {
+    std::set<string> visited;
+    roots = SearchRoot(roots, opts.start_name_regexes, &visited);
+  }
+
+  GraphNode* root = CreateParentNode(kTFProfRoot);
+  root->children.assign(roots.begin(), roots.end());
+
+  std::map<string, int64> account_visits;
+  Account({root}, opts, &account_visits);
+
+  if (opts.viz) {
+    printf("Visualizing feature disabled...\n");
+  }
+  std::set<string> visits;
+  return PrintGraph({root}, opts, 1, 0, 0, &visits)[0];
+}
+
+std::vector<GraphNode*> TFGraph::SearchRoot(
+    const std::vector<GraphNode*>& roots, const std::vector<string>& regexes,
+    std::set<string>* visited) {
+  std::vector<GraphNode*> res;
+  if (roots.empty()) {
+    return res;
+  }
+  for (GraphNode* root : roots) {
+    if (visited->find(root->name()) != visited->end()) continue;
+    visited->insert(root->name());
+    // If the parent is a start point, don't search its children.
+    // Note that its children can still be added as start node through
+    // another route.
+    bool match_start_node = false;
+    for (const string& regex : regexes) {
+      if (RE2::FullMatch(root->name(), regex)) {
+        res.push_back(root);
+        match_start_node = true;
+        break;
+      }
+    }
+    if (match_start_node) {
+      continue;
+    }
+    std::vector<GraphNode*> nroot =
+        SearchRoot(root->children, regexes, visited);
+    res.insert(res.end(), nroot.begin(), nroot.end());
+  }
+  return res;
+}
+
+std::vector<GraphNode*> TFGraph::PrintGraph(const std::vector<GraphNode*> roots,
+                                            const Options& opts, int depth,
+                                            int hidden, int last_ident,
+                                            std::set<string>* visits) {
+  std::vector<GraphNode*> show_nodes;
+
+  for (GraphNode* node : roots) {
+    if (visits->find(node->name()) != visits->end()) continue;
+    visits->insert(node->name());
+
+    int nhidden = hidden;
+    int nlast_ident = last_ident;
+    bool show = ShouldShow(node, opts, depth);
+    if (show) {
+      node->formatted_str.clear();
+      if (opts.account_displayed_op_only) {
+        node->ResetTotalStats();
+        node->AddSelfToTotalStats();
+      }
+      nhidden = 0;
+      nlast_ident = (hidden && opts.select.find(kShown[4]) != opts.select.end()
+                         ? last_ident + 4
+                         : last_ident + 2);
+    } else {
+      ++nhidden;
+    }
+
+    std::vector<GraphNode*> show_cnodes;
+    if (!ShouldTrim(node, opts.trim_name_regexes)) {
+      show_cnodes = PrintGraph(node->children, opts, depth + 1, nhidden,
+                               nlast_ident, visits);
+    }
+    if (show) {
+      show_cnodes = SortNodes(show_cnodes, opts);
+      string children_str;
+      for (GraphNode* sc : show_cnodes) {
+        children_str += sc->formatted_str;
+        node->mutable_proto()->add_children()->MergeFrom(sc->proto());
+        if (opts.account_displayed_op_only) {
+          node->AggregateTotalStats(sc);
+        }
+      }
+      if (hidden && opts.select.find(kShown[4]) != opts.select.end()) {
+        node->formatted_str = strings::Printf(
+            "%s...hidden %d...\n", string(last_ident, ' ').c_str(), hidden);
+        node->formatted_str +=
+            strings::Printf("  %s%s\n", string(last_ident, ' ').c_str(),
+                            node->Format(opts).c_str());
+      } else {
+        node->formatted_str =
+            strings::Printf("%s%s\n", string(last_ident, ' ').c_str(),
+                            node->Format(opts).c_str());
+      }
+      if (opts.select.find(kShown[5]) != opts.select.end()) {
+        std::unique_ptr<TFProfTensor> tfprof_tensor;
+        if (LookUpCheckPoint(node->name(), &tfprof_tensor)) {
+          string value_str;
+          tfprof_tensor->Display(&value_str,
+                                 node->mutable_proto()->mutable_tensor_value());
+          node->formatted_str += value_str;
+        }
+      }
+
+      node->formatted_str += children_str;
+      show_nodes.push_back(node);
+    } else {
+      show_nodes.insert(show_nodes.end(), show_cnodes.begin(),
+                        show_cnodes.end());
+    }
+  }
+  return show_nodes;
+}
+
+void TFGraph::Account(const std::vector<GraphNode*>& roots, const Options& opts,
+                      std::map<string, int64>* visits) {
+  if (roots.empty()) return;
+
+  for (GraphNode* node : roots) {
+    if (visits->find(node->name()) != visits->end()) continue;
+    (*visits)[node->name()] = 1;
+    node->ResetTotalStats();
+    // Depth-firsth.
+    Account(node->children, opts, visits);
+
+    node->account = ShouldAccount(node, opts);
+    if (node->account) {
+      node->AddSelfToTotalStats();
+    }
+    // Aggregate its children stats.
+    for (GraphNode* c : node->children) {
+      // A node can be visited from multiple parents. Only account once.
+      // "visits==1" is when the node is visited through depth-first search.
+      (*visits)[c->name()] += 1;
+      if ((*visits)[c->name()] > 2) continue;
+
+      node->AggregateTotalStats(c);
+    }
+  }
+}
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
new file mode 100644
index 00000000000..ee54534f56b
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
@@ -0,0 +1,116 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Build a graph structure based on op inputs/outputs. The graph is a directed
+// acyclic graph pointing *from outputs to inputs*.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tfprof {
+class GraphNode : public ShowNode {
+ public:
+  explicit GraphNode(TFNode* node) : ShowNode(node) {
+    mutable_proto()->set_inputs(node->inputs().size());
+    mutable_proto()->set_total_inputs(0);
+  }
+
+  void AggregateTotalStats(GraphNode* node) {
+    ShowNode::AggregateTotalStats(node);
+    mutable_proto()->set_total_inputs(proto().total_inputs() +
+                                      node->proto().total_inputs() + 1);
+  }
+
+  void AddSelfToTotalStats() {
+    ShowNode::AddSelfToTotalStats();
+    mutable_proto()->set_total_inputs(proto().total_inputs() +
+                                      proto().inputs());
+  }
+
+  void ResetTotalStats() {
+    ShowNode::ResetTotalStats();
+    mutable_proto()->set_total_inputs(0);
+  }
+
+  std::vector<GraphNode*> children;
+};
+
+// Organize tensorflow ops in a graph structure, pointing from output ops
+// to input ops.
+class TFGraph : public TFShow {
+ public:
+  explicit TFGraph(checkpoint::CheckpointReader* ckpt_reader)
+      : TFShow(ckpt_reader) {}
+  ~TFGraph() override {}
+
+  void AddNode(TFNode* node) override;
+
+  void Build() override;
+
+ private:
+  const ShowNode* ShowInternal(const Options& opts) override;
+
+  bool ShouldShowIfExtra(ShowNode* node, const Options& opts,
+                         int depth) override {
+    return true;
+  }
+
+  GraphNode* CreateParentNode(const string& name);
+
+  std::vector<GraphNode*> SearchRoot(const std::vector<GraphNode*>& roots,
+                                     const std::vector<string>& regexes,
+                                     std::set<string>* visited);
+
+  std::vector<GraphNode*> PrintGraph(const std::vector<GraphNode*> roots,
+                                     const Options& opts, int depth, int hidden,
+                                     int last_ident, std::set<string>* visits);
+
+  void VisualizeGraph(GraphNode* root, const Options& opts);
+
+  std::vector<GraphNode*> GenerateGraphDot(
+      GraphNode* root, GraphNode* last_shown, const Options& opts, int depth,
+      int hidden, std::set<string>* declared_nodes,
+      std::set<string>* declared_edges, TFProfNode* parent);
+
+  void Account(const std::vector<GraphNode*>& roots, const Options& opts,
+               std::map<string, int64>* visits);
+
+  std::vector<GraphNode*> roots_;
+  std::vector<std::unique_ptr<NodeDef>> node_defs_;
+  std::map<string, std::unique_ptr<TFNode>> parent_nodes_;
+  std::map<string, std::unique_ptr<GraphNode>> nodes_map_;
+};
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
new file mode 100644
index 00000000000..0e8ab366cbb
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
@@ -0,0 +1,47 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+
+#include "tensorflow/core/framework/allocation_description.pb.h"
+#include "tensorflow/core/framework/tensor_description.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+void TFNode::AddStepStat(const string& device, const NodeExecStats* step_stat) {
+  if (!device.empty()) {
+    // This might override device from GraphDef.
+    device_ = device;
+  }
+  step_stat_ = step_stat;
+
+  op_start_micros_ = step_stat_->all_start_micros();
+  if (step_stat_->op_end_rel_micros() && step_stat_->op_start_rel_micros()) {
+    op_exec_micros_ =
+        step_stat_->op_end_rel_micros() - step_stat_->op_start_rel_micros();
+  }
+  all_spent_micros_ = step_stat_->all_end_rel_micros();
+
+  for (const auto& output : step_stat_->output()) {
+    if (output.has_tensor_description() &&
+        output.tensor_description().has_allocation_description()) {
+      requested_bytes_ += output.tensor_description()
+                              .allocation_description()
+                              .requested_bytes();
+    }
+  }
+}
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
new file mode 100644
index 00000000000..c8a8f5e7ec4
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
@@ -0,0 +1,106 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/core/framework/allocation_description.pb.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/step_stats.pb.h"
+#include "tensorflow/core/framework/tensor_description.pb.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class TFNode {
+ public:
+  TFNode(const NodeDef* node)
+      : node_(node),
+        step_stat_(nullptr),
+        op_start_micros_(0),
+        op_exec_micros_(0),
+        all_spent_micros_(0),
+        requested_bytes_(0),
+        float_ops_(0) {
+    if (!node) return;
+
+    for (const auto& attr : node->attr()) {
+      // TODO(xpan): Also consider _output_shapes.
+      if (attr.first != "shape" || !attr.second.has_shape()) continue;
+      if (!shape_.empty()) {
+        fprintf(stderr, "Found duplicated shapes!\n");
+        continue;
+      }
+      std::vector<int64> shape_vec;
+      for (const auto& d : attr.second.shape().dim()) {
+        shape_vec.push_back(d.size());
+      }
+      update_shape(shape_vec);
+    }
+    op_types_.insert(node->op());
+    device_ = node->device();
+  }
+
+  TFNode() : TFNode(nullptr) {}
+
+  void AddInput(TFNode* input) { inputs_[input->node_def()->name()] = input; }
+
+  void AddOpType(const string& op_type) { op_types_.insert(op_type); }
+
+  void AddStepStat(const string& device, const NodeExecStats* step_stat);
+
+  void AddFloatOps(int64 float_ops) { float_ops_ = float_ops; }
+
+  const NodeDef* node_def() { return node_; }
+  const std::map<string, TFNode*>& inputs() { return inputs_; }
+  int64 op_start_micros() { return op_start_micros_; }
+  int64 op_exec_micros() { return op_exec_micros_; }
+  int64 all_spent_micros() { return all_spent_micros_; }
+  int64 requested_byptes() { return requested_bytes_; }
+  int64 float_ops() { return float_ops_; }
+  string device() { return device_; }
+  const std::set<string>& op_types() { return op_types_; }
+
+  const std::vector<int64>& shape() { return shape_; }
+  void update_shape(const std::vector<int64>& shape) { shape_ = shape; }
+
+ private:
+  std::map<string, TFNode*> inputs_;
+  const NodeDef* node_;
+  const NodeExecStats* step_stat_;
+
+  std::vector<int64> shape_;
+  std::set<string> op_types_;
+  string device_;
+  int64 op_start_micros_;
+  int64 op_exec_micros_;
+  int64 all_spent_micros_;
+  int64 requested_bytes_;
+  int64 float_ops_;
+};
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
new file mode 100644
index 00000000000..2574415fdd4
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
@@ -0,0 +1,57 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+string Options::ToString() const {
+  const string s = strings::Printf(
+      "%-28s%d\n"
+      "%-28s%lld\n"
+      "%-28s%lld\n"
+      "%-28s%lld\n"
+      "%-28s%lld\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n"
+      "%-28s%s\n",
+      kOptions[0], max_depth, kOptions[1], min_bytes, kOptions[2], min_micros,
+      kOptions[3], min_params, kOptions[4], min_float_ops, kOptions[5],
+      str_util::Join(device_regexes, ",").c_str(), kOptions[6],
+      order_by.c_str(), kOptions[7],
+      str_util::Join(account_type_regexes, ",").c_str(), kOptions[8],
+      str_util::Join(start_name_regexes, ",").c_str(), kOptions[9],
+      str_util::Join(trim_name_regexes, ",").c_str(), kOptions[10],
+      str_util::Join(show_name_regexes, ",").c_str(), kOptions[11],
+      str_util::Join(hide_name_regexes, ",").c_str(), kOptions[12],
+      (account_displayed_op_only ? "true" : "false"), kOptions[13],
+      str_util::Join(select, ",").c_str(), kOptions[14],
+      (viz ? "true" : "false"), kOptions[15], dump_to_file.c_str());
+  return s;
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
new file mode 100644
index 00000000000..a0c52e6d1af
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
@@ -0,0 +1,119 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace tensorflow {
+namespace tfprof {
+static const char* const kOptions[] = {
+    "-max_depth",
+    "-min_bytes",
+    "-min_micros",
+    "-min_params",
+    "-min_float_ops",
+    "-device_regexes",
+    "-order_by",
+    "-account_type_regexes",
+    "-start_name_regexes",
+    "-trim_name_regexes",
+    "-show_name_regexes",
+    "-hide_name_regexes",
+    "-account_displayed_op_only",
+    "-select",
+    "-viz",
+    "-dump_to_file",
+};
+
+static const char* const kOrderBy[] = {
+    "name", "bytes", "micros", "params", "float_ops",
+};
+
+// Append Only.
+static const char* const kShown[] = {
+    "bytes",          "micros",       "params", "float_ops",
+    "num_hidden_ops", "tensor_value", "device", "op_types",
+};
+
+static const char* const kCmds[] = {
+    "scope", "graph", "set", "help",
+};
+
+struct Options {
+ public:
+  virtual ~Options() {}
+  Options(int max_depth, tensorflow::int64 min_bytes,
+          tensorflow::int64 min_micros, tensorflow::int64 min_params,
+          tensorflow::int64 min_float_ops,
+          const std::vector<string>& device_regexes, const string& order_by,
+          const std::vector<string>& account_type_regexes,
+          const std::vector<string>& start_name_regexes,
+          const std::vector<string>& trim_name_regexes,
+          const std::vector<string>& show_name_regexes,
+          const std::vector<string>& hide_name_regexes,
+          bool account_displayed_op_only, const std::vector<string>& select,
+          bool viz, const string& dump_to_file = "")
+      : max_depth(max_depth),
+        min_bytes(min_bytes),
+        min_micros(min_micros),
+        min_params(min_params),
+        min_float_ops(min_float_ops),
+        device_regexes(device_regexes),
+        order_by(order_by),
+        account_type_regexes(account_type_regexes),
+        start_name_regexes(start_name_regexes),
+        trim_name_regexes(trim_name_regexes),
+        show_name_regexes(show_name_regexes),
+        hide_name_regexes(hide_name_regexes),
+        account_displayed_op_only(account_displayed_op_only),
+        select(select.begin(), select.end()),
+        viz(viz),
+        dump_to_file(dump_to_file) {}
+
+  string ToString() const;
+
+  int max_depth;
+  tensorflow::int64 min_bytes;
+  tensorflow::int64 min_micros;
+  tensorflow::int64 min_params;
+  tensorflow::int64 min_float_ops;
+  std::vector<string> device_regexes;
+  string order_by;
+
+  std::vector<string> account_type_regexes;
+  std::vector<string> start_name_regexes;
+  std::vector<string> trim_name_regexes;
+  std::vector<string> show_name_regexes;
+  std::vector<string> hide_name_regexes;
+  bool account_displayed_op_only;
+
+  std::set<string> select;
+  bool viz;
+  string dump_to_file;
+};
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
new file mode 100644
index 00000000000..6b2bc298ccb
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
@@ -0,0 +1,191 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
+
+#include <stdio.h>
+#include <utility>
+
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+ScopeNode* TFScope::CreateParentNode(const string& name) {
+  if (nodes_map_.find(name) != nodes_map_.end()) {
+    return nodes_map_[name].get();
+  }
+  node_defs_.push_back(std::unique_ptr<NodeDef>(new NodeDef()));
+  node_defs_.back()->set_name(name);
+  node_defs_.back()->set_op(kTFScopeParent);
+  parent_nodes_[name] =
+      std::unique_ptr<TFNode>(new TFNode(node_defs_.back().get()));
+  nodes_map_[name] =
+      std::unique_ptr<ScopeNode>(new ScopeNode(parent_nodes_[name].get()));
+  return nodes_map_[name].get();
+}
+
+void TFScope::AddNode(TFNode* node) {
+  string name = node->node_def()->name();
+  if (nodes_map_.find(node->node_def()->name()) == nodes_map_.end()) {
+    nodes_map_[name] = std::unique_ptr<ScopeNode>(new ScopeNode(node));
+  }
+
+  auto last_slash = name.find_last_of("/");
+  while (last_slash != name.npos) {
+    name = name.substr(0, last_slash);
+    if (nodes_map_.find(name) == nodes_map_.end()) {
+      CHECK(CreateParentNode(name));
+    }
+    last_slash = name.find_last_of("/");
+  }
+}
+
+void TFScope::Build() {
+  if (!roots_.empty()) return;
+  // Found roots, which are nodes without "/".
+  for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+    ScopeNode* node = it->second.get();
+    auto last_slash = node->name().find_last_of("/");
+    if (last_slash == string::npos) {
+      roots_.push_back(node);
+    } else {
+      const string prefix = node->name().substr(0, last_slash);
+      nodes_map_[prefix]->children.push_back(node);
+    }
+  }
+}
+
+const ShowNode* TFScope::ShowInternal(const Options& opts) {
+  // Search from roots recursively to find start node, if start_name_regexes
+  // is specified.
+  std::vector<ScopeNode*> roots = roots_;
+  if (opts.start_name_regexes.size() != 1 ||
+      opts.start_name_regexes[0] != ".*") {
+    roots = SearchRoot(roots, opts.start_name_regexes);
+  }
+
+  ScopeNode* root = CreateParentNode(kTFProfRoot);
+  root->children.assign(roots.begin(), roots.end());
+  Account({root}, opts);
+
+  root = PrintScope({root}, opts, 1, 0)[0];
+  return root;
+}
+
+std::vector<ScopeNode*> TFScope::SearchRoot(
+    std::vector<ScopeNode*> roots, const std::vector<string>& regexes) {
+  std::vector<ScopeNode*> res;
+  if (roots.empty()) {
+    return res;
+  }
+  for (ScopeNode* root : roots) {
+    bool match_start_node = false;
+    for (const string& regex : regexes) {
+      if (RE2::FullMatch(root->name(), regex)) {
+        res.push_back(root);
+        match_start_node = true;
+        break;
+      }
+    }
+    if (match_start_node) {
+      // Found a start node at this branch, no need to continue.
+      continue;
+    }
+    std::vector<ScopeNode*> nroots = SearchRoot(root->children, regexes);
+    res.insert(res.end(), nroots.begin(), nroots.end());
+  }
+  return res;
+}
+
+std::vector<ScopeNode*> TFScope::PrintScope(const std::vector<ScopeNode*> roots,
+                                            const Options& opts, int depth,
+                                            int last_ident) {
+  std::vector<ScopeNode*> show_nodes;
+
+  for (ScopeNode* node : roots) {
+    int nlast_ident = last_ident;
+    bool show = ShouldShow(node, opts, depth);
+    if (show) {
+      node->formatted_str.clear();
+      if (opts.account_displayed_op_only) {
+        node->ResetTotalStats();
+        node->AddSelfToTotalStats();
+      }
+      nlast_ident += 2;
+    }
+
+    std::vector<ScopeNode*> show_cnodes;
+    if (!ShouldTrim(node, opts.trim_name_regexes)) {
+      show_cnodes = PrintScope(node->children, opts, depth + 1, nlast_ident);
+    }
+    if (show) {
+      show_cnodes = SortNodes(show_cnodes, opts);
+      string children_str;
+      for (ScopeNode* sc : show_cnodes) {
+        children_str += sc->formatted_str;
+        node->mutable_proto()->add_children()->MergeFrom(sc->proto());
+        if (opts.account_displayed_op_only) {
+          node->AggregateTotalStats(sc);
+        }
+      }
+
+      node->formatted_str =
+          strings::Printf("%s%s\n", string(last_ident, ' ').c_str(),
+                          node->Format(opts).c_str());
+
+      if (opts.select.find(kShown[5]) != opts.select.end()) {
+        std::unique_ptr<TFProfTensor> tfprof_tensor;
+        if (LookUpCheckPoint(node->name(), &tfprof_tensor)) {
+          string value_str;
+          tfprof_tensor->Display(&value_str,
+                                 node->mutable_proto()->mutable_tensor_value());
+          node->formatted_str += value_str;
+        }
+      }
+
+      node->formatted_str += children_str;
+      show_nodes.push_back(node);
+    } else {
+      show_nodes.insert(show_nodes.end(), show_cnodes.begin(),
+                        show_cnodes.end());
+    }
+  }
+  return show_nodes;
+}
+
+void TFScope::Account(const std::vector<ScopeNode*>& roots,
+                      const Options& opts) {
+  if (roots.empty()) return;
+
+  for (ScopeNode* node : roots) {
+    node->ResetTotalStats();
+    Account(node->children, opts);
+
+    node->account = ShouldAccount(node, opts);
+    if (node->account) {
+      node->AddSelfToTotalStats();
+    }
+    for (ScopeNode* c : node->children) {
+      node->AggregateTotalStats(c);
+    }
+  }
+}
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
new file mode 100644
index 00000000000..3a8ca52b43c
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
@@ -0,0 +1,88 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Build a tree structure based on the TensorFlow op names.
+// For example, 'name1/name2' is a child of 'name1'.
+// Stats are aggregated from descendants from ancestors.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class ScopeNode : public ShowNode {
+ public:
+  explicit ScopeNode(TFNode* node) : ShowNode(node) {}
+  ~ScopeNode() override {}
+
+  void AggregateTotalStats(ScopeNode* node) {
+    ShowNode::AggregateTotalStats(node);
+  }
+
+  void AddSelfToTotalStats() { ShowNode::AddSelfToTotalStats(); }
+
+  void ResetTotalStats() { ShowNode::ResetTotalStats(); }
+
+  std::vector<ScopeNode*> children;
+};
+
+class TFScope : public TFShow {
+ public:
+  explicit TFScope(checkpoint::CheckpointReader* ckpt_reader)
+      : TFShow(ckpt_reader) {}
+  ~TFScope() override {}
+
+  void AddNode(TFNode* node) override;
+
+  void Build() override;
+
+ private:
+  const ShowNode* ShowInternal(const Options& opts) override;
+
+  ScopeNode* CreateParentNode(const string& name);
+
+  std::vector<ScopeNode*> SearchRoot(std::vector<ScopeNode*> roots,
+                                     const std::vector<string>& regexes);
+
+  std::vector<ScopeNode*> PrintScope(const std::vector<ScopeNode*> roots,
+                                     const Options& opts, int depth,
+                                     int last_ident);
+
+  void Account(const std::vector<ScopeNode*>& roots, const Options& opts);
+
+  std::vector<ScopeNode*> roots_;
+  std::vector<std::unique_ptr<NodeDef>> node_defs_;
+  std::map<string, std::unique_ptr<TFNode>> parent_nodes_;
+  std::map<string, std::unique_ptr<ScopeNode>> nodes_map_;
+};
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
new file mode 100644
index 00000000000..f7275d8ae4d
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
@@ -0,0 +1,266 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+
+#include <memory>
+#include <set>
+
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+ShowNode::ShowNode(TFNode* node) : node(node), account(true) {
+  mutable_proto()->set_name(name());
+  if (!node->device().empty()) {
+    mutable_proto()->set_device(node->device());
+  }
+  mutable_proto()->set_exec_micros(node->op_exec_micros());
+  mutable_proto()->set_requested_bytes(node->requested_byptes());
+  mutable_proto()->set_float_ops(node->float_ops());
+
+  if (!node->shape().empty()) {
+    int64 params = 1;
+    bool complete_shape = true;
+    for (int64 d : node->shape()) {
+      // Sometimes parameters could be <0 when a dim is unknown.
+      if (d < 0) {
+        complete_shape = false;
+        break;
+      }
+      params *= d;
+    }
+    if (complete_shape) {
+      mutable_proto()->set_parameters(proto_.parameters() + params);
+    } else {
+      fprintf(stderr, "Incomplete shape.");
+    }
+  }
+}
+
+string ShowNode::Format(const Options& opts) {
+  if (opts.select.empty()) {
+    return name();
+  }
+  return strings::Printf("%s (%s)", name().c_str(), FormatMeta(opts).c_str());
+}
+
+string ShowNode::FormatMeta(const Options& opts) {
+  std::vector<string> info;
+  if (opts.select.find(kShown[2]) != opts.select.end()) {
+    const string shape = FormatShapes(node->shape());
+    if (!shape.empty()) {
+      info.push_back(shape);
+    }
+    string params = FormatNumber(proto().total_parameters()) + " params";
+    if (account) {
+      params = FormatNumber(proto().parameters()) + "/" + params;
+    } else {
+      params = "--/" + params;
+    }
+    info.push_back(params);
+  }
+  if (opts.select.find(kShown[3]) != opts.select.end()) {
+    string fops = FormatNumber(proto().total_float_ops()) + " flops";
+    if (account) {
+      fops = FormatNumber(proto().float_ops()) + "/" + fops;
+    } else {
+      fops = "--/" + fops;
+    }
+    info.push_back(fops);
+  }
+  if (opts.select.find(kShown[0]) != opts.select.end()) {
+    string memory = FormatMemory(proto().total_requested_bytes());
+    if (account) {
+      memory = FormatMemory(proto().requested_bytes()) + "/" + memory;
+
+    } else {
+      memory = "--/" + memory;
+    }
+    info.push_back(memory);
+  }
+  if (opts.select.find(kShown[1]) != opts.select.end()) {
+    string time = FormatTime(proto().total_exec_micros());
+    if (account) {
+      time = FormatTime(proto().exec_micros()) + "/" + time;
+    } else {
+      time = "--/" + time;
+    }
+    info.push_back(time);
+  }
+  if (opts.select.find(kShown[6]) != opts.select.end()) {
+    if (!proto().device().empty()) {
+      info.push_back(proto().device());
+    }
+  }
+  if (opts.select.find(kShown[7]) != opts.select.end()) {
+    std::set<string> op_types = node->op_types();
+    // Device is considered a type.
+    if (!proto().device().empty()) {
+      op_types.insert(proto().device());
+    }
+    info.push_back(str_util::Join(op_types, "|"));
+  }
+  return str_util::Join(info, ", ");
+}
+
+TFProfNode* ShowNode::mutable_proto() { return &proto_; }
+
+const TFProfNode& ShowNode::proto() const { return proto_; }
+
+void ShowNode::AggregateTotalStats(ShowNode* node) {
+  TFProfNode* node_pb = node->mutable_proto();
+  mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
+                                         node_pb->total_exec_micros());
+  mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
+                                             node_pb->total_requested_bytes());
+  mutable_proto()->set_total_parameters(proto().total_parameters() +
+                                        node_pb->total_parameters());
+  mutable_proto()->set_total_float_ops(proto().total_float_ops() +
+                                       node_pb->total_float_ops());
+}
+
+void ShowNode::AddSelfToTotalStats() {
+  mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
+                                         proto().exec_micros());
+  mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
+                                             proto().requested_bytes());
+  mutable_proto()->set_total_parameters(proto().total_parameters() +
+                                        proto().parameters());
+  mutable_proto()->set_total_float_ops(proto().total_float_ops() +
+                                       proto().float_ops());
+}
+
+void ShowNode::ResetTotalStats() {
+  mutable_proto()->set_total_exec_micros(0);
+  mutable_proto()->set_total_requested_bytes(0);
+  mutable_proto()->set_total_parameters(0);
+  mutable_proto()->set_total_float_ops(0);
+}
+
+const TFProfNode& TFShow::Show(const Options& opts) {
+  const ShowNode* root = ShowInternal(opts);
+  if (opts.dump_to_file.empty()) {
+    printf("%s", root->formatted_str.c_str());
+    fflush(stdout);
+  } else {
+    Status s = WriteStringToFile(Env::Default(), opts.dump_to_file,
+                                 root->formatted_str);
+    if (!s.ok()) {
+      fprintf(stderr, "%s\n", s.ToString().c_str());
+    }
+  }
+  return root->proto();
+}
+
+bool TFShow::LookUpCheckPoint(const string& name,
+                              std::unique_ptr<TFProfTensor>* tensor) {
+  if (name == kTFProfRoot || !ckpt_reader_ || !tensor) {
+    return false;
+  }
+  std::unique_ptr<Tensor> out_tensor;
+  TF_Status* status = TF_NewStatus();
+  ckpt_reader_->GetTensor(name, &out_tensor, status);
+  if (TF_GetCode(status) != TF_OK) {
+    fprintf(stderr, "%s\n", TF_Message(status));
+    TF_DeleteStatus(status);
+    return false;
+  }
+  tensor->reset(new TFProfTensor(std::move(out_tensor)));
+  TF_DeleteStatus(status);
+  return true;
+}
+
+bool TFShow::ShouldShow(ShowNode* node, const Options& opts, int depth) {
+  // Always show kTFProfRoot.
+  if (node->name() == kTFProfRoot) return true;
+
+  if (!node->account) return false;
+
+  if (node->proto().requested_bytes() < opts.min_bytes ||
+      node->proto().exec_micros() < opts.min_micros ||
+      node->proto().parameters() < opts.min_params ||
+      node->proto().float_ops() < opts.min_float_ops ||
+      depth > opts.max_depth || !ShouldShowIfExtra(node, opts, depth)) {
+    return false;
+  }
+
+  bool show = false;
+  if (opts.device_regexes.size() == 1 && opts.device_regexes[0] == ".*") {
+    show = true;
+  } else {
+    for (const string& regex : opts.device_regexes) {
+      if (RE2::FullMatch(node->proto().device(), regex)) {
+        show = true;
+        break;
+      }
+    }
+  }
+  // Don't show if device_regexes don't cover it.
+  if (!show) return false;
+
+  show = false;
+  if (opts.show_name_regexes.size() == 1 && opts.show_name_regexes[0] == ".*") {
+    show = true;
+  } else {
+    for (const string& regex : opts.show_name_regexes) {
+      if (RE2::FullMatch(node->name(), regex)) {
+        show = true;
+        break;
+      }
+    }
+  }
+  // Don't show if show_name_regexes don't cover it.
+  if (!show) return false;
+  // Don't show if hide_name_regexes cover it.
+  for (const string& regex : opts.hide_name_regexes) {
+    if (RE2::FullMatch(node->name(), regex)) return false;
+  }
+  return true;
+}
+
+bool TFShow::ShouldTrim(ShowNode* node, const std::vector<string>& regexes) {
+  for (const string& regex : regexes) {
+    if (RE2::FullMatch(node->name(), regex)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool TFShow::ShouldAccount(ShowNode* node, const Options& opts) {
+  if (opts.account_type_regexes.size() == 1 &&
+      opts.account_type_regexes[0] == ".*") {
+    return true;
+  }
+  for (const string& regex : opts.account_type_regexes) {
+    for (const string& type : node->node->op_types()) {
+      if (RE2::FullMatch(type, regex)) {
+        return true;
+      }
+    }
+    if (RE2::FullMatch(node->proto().device(), regex)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
new file mode 100644
index 00000000000..4b5d6592e5a
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
@@ -0,0 +1,127 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Parent class and utilities for tfprof_graph and tfprof_scope.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+namespace tfprof {
+class ShowNode {
+ public:
+  explicit ShowNode(TFNode* node);
+  virtual ~ShowNode() {}
+
+  const string& name() const { return node->node_def()->name(); }
+  TFProfNode* mutable_proto();
+  const TFProfNode& proto() const;
+
+  string Format(const Options& opts);
+
+  string FormatMeta(const Options& opts);
+
+  TFNode* node;
+  bool account;
+  string formatted_str;
+
+ protected:
+  void AggregateTotalStats(ShowNode* node);
+
+  void AddSelfToTotalStats();
+
+  void ResetTotalStats();
+
+  TFProfNode proto_;
+};
+
+class TFShow {
+ public:
+  explicit TFShow(checkpoint::CheckpointReader* ckpt_reader)
+      : ckpt_reader_(ckpt_reader) {}
+  virtual ~TFShow() {}
+  virtual void AddNode(TFNode* node) = 0;
+  virtual void Build() = 0;
+  const TFProfNode& Show(const Options& opts);
+
+ protected:
+  virtual const ShowNode* ShowInternal(const Options& opts) = 0;
+
+  bool LookUpCheckPoint(const string& name,
+                        std::unique_ptr<TFProfTensor>* tensor);
+
+  // Overridden by subclass if extra requirements need to be met.
+  virtual bool ShouldShowIfExtra(ShowNode* node, const Options& opts,
+                                 int depth) {
+    return true;
+  }
+
+  bool ShouldShow(ShowNode* node, const Options& opts, int depth);
+
+  bool ShouldTrim(ShowNode* node, const std::vector<string>& regexes);
+
+  bool ShouldAccount(ShowNode* node, const Options& opts);
+
+  template <typename T>
+  std::vector<T*> SortNodes(const std::vector<T*>& nodes, const Options& opts) {
+    if (opts.order_by.empty() || nodes.empty()) {
+      return nodes;
+    }
+    std::vector<T*> sorted_nodes = nodes;
+    std::sort(sorted_nodes.begin(), sorted_nodes.end(), [&opts](const T* n1,
+                                                                const T* n2) {
+      if (n1->name() == kTFProfRoot) return true;
+      if (n2->name() == kTFProfRoot) return false;
+      bool name_cmp = n1->name() < n2->name();
+      if (opts.order_by == kOrderBy[0]) {
+        return name_cmp;
+      } else if (opts.order_by == kOrderBy[1]) {
+        return n1->proto().total_requested_bytes() >
+               n2->proto().total_requested_bytes();
+      } else if (opts.order_by == kOrderBy[2]) {
+        return n1->proto().total_exec_micros() >
+               n2->proto().total_exec_micros();
+      } else if (opts.order_by == kOrderBy[3]) {
+        return n1->proto().total_parameters() > n2->proto().total_parameters();
+      } else if (opts.order_by == kOrderBy[4]) {
+        return n1->proto().total_float_ops() > n2->proto().total_float_ops();
+      }
+      return name_cmp;
+    });
+    return sorted_nodes;
+  }
+
+  checkpoint::CheckpointReader* ckpt_reader_;
+};
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
new file mode 100644
index 00000000000..81396e31cca
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
@@ -0,0 +1,92 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+#include <utility>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+class TFProfShowTest : public ::testing::Test {
+ protected:
+  TFProfShowTest() {
+    string graph_path = io::JoinPath(
+        testing::TensorFlowSrcRoot(),
+        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
+    TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+
+    std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
+        new tensorflow::RunMetadata());
+    string run_meta_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+    TF_CHECK_OK(
+        ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
+
+    std::unique_ptr<OpLog> op_log_pb(new OpLog());
+    string op_log_path = io::JoinPath(
+        testing::TensorFlowSrcRoot(),
+        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+    TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
+
+    string ckpt_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    TF_Status* status = TF_NewStatus();
+    std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
+        new checkpoint::CheckpointReader(ckpt_path, status));
+    CHECK(TF_GetCode(status) == TF_OK);
+    TF_DeleteStatus(status);
+
+    tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
+                                std::move(op_log_pb), std::move(ckpt_reader)));
+  }
+
+  std::unique_ptr<TFStats> tf_stats_;
+};
+
+TEST_F(TFProfShowTest, DumpScopeMode) {
+  string dump_file = io::JoinPath(testing::TmpDir(), "dump");
+  Options opts(5, 0, 0, 0, 0, {".*"}, "name",
+               {"Variable"},  // accout_type_regexes
+               {".*"}, {""}, {".*"}, {""}, false,
+               {"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
+               false, dump_file);
+  tf_stats_->PrintGraph("scope", opts);
+
+  string dump_str;
+  TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
+  EXPECT_EQ(
+      "_TFProfRoot (--/450 params, --/0 flops, --/1.80KB, --/0us)\n  DW "
+      "(3x3x3x6, 162/162 params, 0/0 flops, 648B/648B, 0us/0us)\n  DW2 "
+      "(2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/1.15KB, 0us/0us)\n",
+      dump_str);
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
new file mode 100644
index 00000000000..54fce4772bd
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
@@ -0,0 +1,130 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+#include <stdio.h>
+#include <utility>
+
+#include "tensorflow/core/framework/step_stats.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+TFStats::TFStats(std::unique_ptr<GraphDef> graph,
+                 std::unique_ptr<RunMetadata> run_meta,
+                 std::unique_ptr<OpLog> op_log,
+                 std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
+    : graph_(std::move(graph)),
+      run_meta_(std::move(run_meta)),
+      op_log_(std::move(op_log)),
+      ckpt_reader_(std::move(ckpt_reader)) {
+  CHECK(graph_) << "Must at least have GraphDef";
+
+  printf("Parsing GraphDef...\n");
+  ParseGraph();
+  if (run_meta_) {
+    printf("Parsing RunMetadata...\n");
+    ParseRunMeta();
+  }
+  if (op_log_) {
+    printf("Parsing OpLog...\n");
+    ParseOpLog();
+  }
+
+  if (ckpt_reader_) {
+    printf("Parsing Checkpoint...\n");
+    for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
+      auto node = nodes_map_.find(v.first);
+      if (node != nodes_map_.end()) {
+        node->second.AddOpType("_checkpoint_variables");
+      }
+    }
+  }
+
+  printf("Preparing Views...\n");
+  scope_view_ = std::unique_ptr<TFScope>(new TFScope(ckpt_reader_.get()));
+  graph_view_ = std::unique_ptr<TFGraph>(new TFGraph(ckpt_reader_.get()));
+  for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+    scope_view_->AddNode(&it->second);
+    graph_view_->AddNode(&it->second);
+  }
+  scope_view_->Build();
+  graph_view_->Build();
+}
+
+const TFProfNode& TFStats::PrintGraph(const string& cmd, const Options& opts) {
+  if (cmd == kCmds[0]) {
+    return scope_view_->Show(opts);
+  } else if (cmd == kCmds[1]) {
+    return graph_view_->Show(opts);
+  } else {
+    fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
+    return empty_node_;
+  }
+}
+
+void TFStats::ParseGraph() {
+  for (const NodeDef& node : graph_->node()) {
+    CHECK(nodes_map_.find(node.name()) == nodes_map_.end());
+    nodes_map_[node.name()] = TFNode(&node);
+  }
+  for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+    const NodeDef* node_def = it->second.node_def();
+    for (string node_input : node_def->input()) {
+      // input name format can be: "^node:src_output"
+      auto prefix_pos = node_input.find(":");
+      if (prefix_pos != node_input.npos) {
+        node_input.substr(0, prefix_pos);
+      }
+      if (node_input.substr(0, 1) == "^") {
+        node_input = node_input.substr(1);
+      }
+      auto input_node = nodes_map_.find(node_input);
+      if (input_node == nodes_map_.end()) {
+        continue;
+      }
+      it->second.AddInput(&input_node->second);
+    }
+  }
+}
+
+void TFStats::ParseOpLog() {
+  for (const OpLogEntry& entry : op_log_->log_entries()) {
+    auto node = nodes_map_.find(entry.name());
+    if (node == nodes_map_.end()) continue;
+    for (const string& type : entry.types()) {
+      node->second.AddOpType(type);
+    }
+    if (entry.float_ops()) {
+      node->second.AddFloatOps(entry.float_ops());
+    }
+  }
+}
+
+void TFStats::ParseRunMeta() {
+  if (!run_meta_->has_step_stats()) return;
+
+  for (const auto& dev_stat : run_meta_->step_stats().dev_stats()) {
+    for (const auto& node_stat : dev_stat.node_stats()) {
+      auto node = nodes_map_.find(node_stat.node_name());
+      if (node == nodes_map_.end()) {
+        continue;
+      }
+      node->second.AddStepStat(dev_stat.device(), &node_stat);
+    }
+  }
+}
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
new file mode 100644
index 00000000000..1246a2fae2f
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
@@ -0,0 +1,82 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Core API of tfprof.
+// 1. Load protos generated from a tensorflow model.
+// 2. Build in-memory representations of the tensorflow model, annotate the
+//    representation with various stats, such as params,times,memory,etc.
+// 3. Accept command and options to selectively aggregate stats for analysis
+//    and print out the results.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/step_stats.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class TFStats {
+ public:
+  TFStats(std::unique_ptr<GraphDef> graph,
+          std::unique_ptr<RunMetadata> run_meta, std::unique_ptr<OpLog> op_log,
+          std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader);
+  ~TFStats() {}
+
+  // Prints the results to stdout. Also returns the printed output in
+  // a proto.
+  const TFProfNode& PrintGraph(const string& cmd, const Options& opts);
+
+ private:
+  void ParseGraph();
+
+  void ParseOpLog();
+
+  void ParseRunMeta();
+
+  std::unique_ptr<TFScope> scope_view_;
+  std::unique_ptr<TFGraph> graph_view_;
+  std::unique_ptr<GraphDef> graph_;
+  std::unique_ptr<RunMetadata> run_meta_;
+  std::unique_ptr<OpLog> op_log_;
+  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader_;
+  // Store TFNode instead of TFNode* to avoid large number of dynamic alloc.
+  std::map<string, TFNode> nodes_map_;
+  TFProfNode empty_node_;
+};
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
new file mode 100644
index 00000000000..06b288fdce7
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
@@ -0,0 +1,194 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+#include <utility>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+class TFProfStatsTest : public ::testing::Test {
+ protected:
+  TFProfStatsTest() {
+    string graph_path = io::JoinPath(
+        testing::TensorFlowSrcRoot(),
+        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
+    TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+
+    std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
+        new tensorflow::RunMetadata());
+    string run_meta_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+    TF_CHECK_OK(
+        ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
+
+    std::unique_ptr<OpLog> op_log_pb(new OpLog());
+    string op_log_path = io::JoinPath(
+        testing::TensorFlowSrcRoot(),
+        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+    TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
+
+    string ckpt_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    TF_Status* status = TF_NewStatus();
+    std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
+        new checkpoint::CheckpointReader(ckpt_path, status));
+    CHECK(TF_GetCode(status) == TF_OK);
+    TF_DeleteStatus(status);
+
+    tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
+                                std::move(op_log_pb), std::move(ckpt_reader)));
+  }
+
+  std::unique_ptr<TFStats> tf_stats_;
+};
+
+TEST_F(TFProfStatsTest, CustomOpType) {
+  Options opts(3, 0, 0, 0, 0, {".*"}, "name",
+               {kTrainableVarType},  // accout_type_regexes
+               {".*"}, {""}, {".*"}, {""}, false,
+               {"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
+               false);
+  const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+  TFProfNode expected;
+  CHECK(protobuf::TextFormat::ParseFromString(
+      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+      "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 1800\ntotal_parameters: "
+      "450\nchildren {\n  name: \"DW\"\n  exec_micros: 0\n  requested_bytes: "
+      "648\n  parameters: 162\n  total_exec_micros: 0\n  "
+      "total_requested_bytes: 648\n  total_parameters: 162\n  device: "
+      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 0\n  "
+      "total_float_ops: 0\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 0\n  "
+      "requested_bytes: 1152\n  parameters: 288\n  total_exec_micros: 0\n  "
+      "total_requested_bytes: 1152\n  total_parameters: 288\n  device: "
+      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 0\n  "
+      "total_float_ops: 0\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
+      &expected));
+  EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, CheckPointOpType) {
+  Options opts(
+      3, 0, 0, 0, 0, {".*"}, "name", {kCkptVarType},  // accout_type_regexes
+      {".*"}, {""}, {".*"}, {""}, false,
+      {"params", "bytes", "micros", "float_ops", "num_hidden_ops"}, false);
+  const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+  TFProfNode expected;
+  CHECK(protobuf::TextFormat::ParseFromString(
+      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+      "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 1800\ntotal_parameters: "
+      "450\nchildren {\n  name: \"DW\"\n  exec_micros: 0\n  requested_bytes: "
+      "648\n  parameters: 162\n  total_exec_micros: 0\n  "
+      "total_requested_bytes: 648\n  total_parameters: 162\n  device: "
+      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 0\n  "
+      "total_float_ops: 0\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 0\n  "
+      "requested_bytes: 1152\n  parameters: 288\n  total_exec_micros: 0\n  "
+      "total_requested_bytes: 1152\n  total_parameters: 288\n  device: "
+      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 0\n  "
+      "total_float_ops: 0\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
+      &expected));
+  EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestGraph) {
+  Options opts(100, 0, 10000, 0, 0, {".*"}, "name", {".*"},
+               {"cost.*"},  // start_name_regexes
+               {""}, {".*"}, {""}, false,
+               {"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
+               false);
+  const TFProfNode& root = tf_stats_->PrintGraph("graph", opts);
+
+  TFProfNode expected;
+  CHECK(protobuf::TextFormat::ParseFromString(
+      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: 0\ninputs: "
+      "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
+      "0\ntotal_inputs: 0\nfloat_ops: 0\ntotal_float_ops: 0\n",
+      &expected));
+  EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestFloatOps) {
+  Options opts(10, 0, 0, 0, 1, {".*"}, "name", {".*"}, {".*"}, {""}, {".*"},
+               {""}, false, {"float_ops"}, false);
+  const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+  TFProfNode expected;
+  CHECK(protobuf::TextFormat::ParseFromString(
+      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+      "0\ntotal_exec_micros: 11\ntotal_requested_bytes: "
+      "5280\ntotal_parameters: 450\nchildren {\n  name: \"Conv2D\"\n  "
+      "exec_micros: 0\n  requested_bytes: 432\n  total_exec_micros: 0\n  "
+      "total_requested_bytes: 432\n  total_parameters: 0\n  device: "
+      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 5832\n  "
+      "total_float_ops: 5832\n}\nchildren {\n  name: \"Conv2D_1\"\n  "
+      "exec_micros: 10\n  requested_bytes: 384\n  total_exec_micros: 10\n  "
+      "total_requested_bytes: 384\n  total_parameters: 0\n  device: "
+      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 4608\n  "
+      "total_float_ops: 4608\n}\nfloat_ops: 0\ntotal_float_ops: 10440\n",
+      &expected));
+  EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
+  Options opts(100, 0, 0, 0, 0, {".*"}, "name", {".*"}, {".*"}, {""},
+               {"unit_2_1.*DW"},  // show_name_regexes.
+               {""}, true,        // account_displayed_op_only.
+               {"params"}, false);
+  const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+  TFProfNode expected;
+  CHECK(protobuf::TextFormat::ParseFromString(
+      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+      "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
+      "0\nfloat_ops: 0\ntotal_float_ops: 0\n",
+      &expected));
+  EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestShowTensorValue) {
+  Options opts(10, 0, 0, 0, 0, {".*"}, "name", {".*"}, {".*"}, {""},
+               {"unit_1_0.*gamma"}, {""}, false,
+               {"tensor_value"},  // Show tensor value from checkpoint.
+               false);
+  const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+  TFProfNode expected;
+  CHECK(protobuf::TextFormat::ParseFromString(
+      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+      "0\ntotal_exec_micros: 11\ntotal_requested_bytes: "
+      "5280\ntotal_parameters: 450\nfloat_ops: 0\ntotal_float_ops: 10440\n",
+      &expected));
+  EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
new file mode 100644
index 00000000000..c21626919fa
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
@@ -0,0 +1,78 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+
+namespace tensorflow {
+namespace tfprof {
+void TFProfTensor::Display(string* formatted_str,
+                           TFProfTensorProto* tfprof_tensor_pb) {
+  if (formatted_str) {
+    if (formatted_str_.length() >= kTFProfTenosrMaxDisplayLen) {
+      *formatted_str =
+          strings::StrCat(formatted_str_, "...omitted from display\n\n");
+    } else {
+      *formatted_str = formatted_str_;
+    }
+  }
+  if (tfprof_tensor_pb) {
+    tfprof_tensor_pb->MergeFrom(tfprof_tensor_pb_);
+  }
+}
+
+void TFProfTensor::Build() {
+  tfprof_tensor_pb_.set_dtype(tensor_->dtype());
+
+  switch (tensor_->dtype()) {
+    // Double for all floats.
+    case DataType::DT_FLOAT:
+    case DataType::DT_DOUBLE: {
+      std::vector<double> values_vec;
+      if (tensor_->dtype() == DataType::DT_FLOAT) {
+        GetValueVec<float, double>(&values_vec);
+      } else if (tensor_->dtype() == DataType::DT_DOUBLE) {
+        GetValueVec<double, double>(&values_vec);
+      }
+      BuildOutput<double>(0, 0, values_vec, &tfprof_tensor_pb_);
+      break;
+    }
+    // Int64 for all integers.
+    case DataType::DT_INT32:
+    case DataType::DT_INT64: {
+      std::vector<int64> values_vec;
+      if (tensor_->dtype() == DataType::DT_INT32) {
+        GetValueVec<int32, int64>(&values_vec);
+      } else if (tensor_->dtype() == DataType::DT_INT64) {
+        GetValueVec<int64, int64>(&values_vec);
+      }
+      BuildOutput<int64>(0, 0, values_vec, &tfprof_tensor_pb_);
+      break;
+    }
+    case DataType::DT_STRING: {
+      // Not supported by TensorFlow.
+      std::vector<string> values_vec;
+      GetValueVec<string, string>(&values_vec);
+      BuildOutput<string>(0, 0, values_vec, &tfprof_tensor_pb_);
+      break;
+    }
+    default: {
+      fprintf(stderr, "Not Supported type %d\n", tensor_->dtype());
+      break;
+    }
+  }
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
new file mode 100644
index 00000000000..471a1db4172
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
@@ -0,0 +1,120 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// TFProf representation of a Tensor's value.
+// 1. Multi-dimension tensor is flattened in row major, and stored in proto.
+// 2. integer are up-casted to int64. floats are up-casted to double. string
+//    is not supported by TensorFlow CheckPointReader library, though it is
+//    supported in current code.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+
+#include <typeinfo>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class TFProfTensor {
+ public:
+  explicit TFProfTensor(std::unique_ptr<Tensor> tensor)
+      : tensor_(std::move(tensor)) {
+    Build();
+  }
+
+  // If pointers are provided, they are filled by the method.
+  void Display(string* formatted_str, TFProfTensorProto* tfprof_tensor_pb);
+
+ private:
+  // Max length of tensor value displayed to CLI.
+  const int64 kTFProfTenosrMaxDisplayLen = 10000;
+  // Max length after which a latency warning will be printed.
+  const int64 kTFProfTensorMaxWarnLen = 100000;
+
+  void Build();
+
+  // It assumes the flatten values are stored in row-major, which is mentioned
+  // indirectly at various places:
+  // TODO(xpan): Further verifying it.
+  template <typename T>
+  int64 BuildOutput(int64 start, int depth, const std::vector<T>& values,
+                    TFProfTensorProto* dim) {
+    formatted_str_ += "[";
+    int64 nstart = start;
+    for (int i = 0; i < tensor_->dim_size(depth); i++) {
+      // Last dimension, pull the values.
+      if (depth == tensor_->dims() - 1) {
+        std::ostringstream sstream;
+        sstream << values[nstart];
+
+        if (typeid(values[nstart]) == typeid(double)) {
+          double double_val;
+          CHECK(strings::safe_strtod(sstream.str().c_str(), &double_val));
+          dim->add_value_double(double_val);
+          formatted_str_ += strings::Printf(
+              "%.2f ", dim->value_double(dim->value_double_size() - 1));
+        } else if (typeid(values[nstart]) == typeid(int64)) {
+          int64 int64_val;
+          CHECK(strings::safe_strto64(sstream.str().c_str(), &int64_val));
+          dim->add_value_int64(int64_val);
+          formatted_str_ += strings::Printf(
+              "%lld ", dim->value_int64(dim->value_int64_size() - 1));
+        } else if (typeid(values[nstart]) == typeid(string)) {
+          dim->add_value_str(sstream.str());
+          formatted_str_ =
+              strings::StrCat(formatted_str_, "'",
+                              dim->value_str(dim->value_str_size() - 1) + "' ");
+        } else {
+          CHECK(false) << "Unsupported type: " << typeid(values[nstart]).name();
+        }
+        ++nstart;
+      } else {
+        // Not-last dimension. Drill deeper.
+        nstart = BuildOutput<T>(nstart, depth + 1, values, dim);
+      }
+    }
+    if (formatted_str_.length() > kTFProfTenosrMaxDisplayLen) {
+      formatted_str_ = formatted_str_.substr(0, kTFProfTenosrMaxDisplayLen);
+    }
+    formatted_str_ += "],\n";
+    return nstart;
+  }
+
+  template <typename T, typename U>
+  void GetValueVec(std::vector<U>* value_vec) {
+    // TODO(xpan): Address the huge tensor problem.
+    if (tensor_->NumElements() > kTFProfTensorMaxWarnLen) {
+      fprintf(stderr, "Showing huge tensor, the tool might halt...\n");
+    }
+    auto values = tensor_->flat<T>();
+    for (int64 i = 0; i < tensor_->NumElements(); i++) {
+      value_vec->push_back(static_cast<U>(values(i)));
+    }
+  }
+
+  TFProfTensorProto tfprof_tensor_pb_;
+  std::unique_ptr<Tensor> tensor_;
+  string formatted_str_;
+};
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
new file mode 100644
index 00000000000..d3f1e3c7b70
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
@@ -0,0 +1,306 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+class TFProfTensorTest : public ::testing::Test {
+ protected:
+  TFProfTensorTest() {
+    string graph_path = io::JoinPath(
+        testing::TensorFlowSrcRoot(),
+        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
+    TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+
+    std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
+    std::unique_ptr<OpLog> op_log_pb;
+
+    string ckpt_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    TF_Status* status = TF_NewStatus();
+    std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
+        new checkpoint::CheckpointReader(ckpt_path, status));
+    CHECK(TF_GetCode(status) == TF_OK);
+    TF_DeleteStatus(status);
+
+    tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
+                                std::move(op_log_pb), std::move(ckpt_reader)));
+  }
+
+  std::unique_ptr<TFStats> tf_stats_;
+};
+
+TEST_F(TFProfTensorTest, Basics) {
+  Options opts(3, 0, 0, 0, 0, {".*"}, "name", {"Variable"}, {".*"}, {""},
+               {".*"}, {""}, false, {"tensor_value"},  // show the tensor value.
+               false);
+  const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+  TFProfNode expected;
+  CHECK(protobuf::TextFormat::ParseFromString(
+      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+      "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
+      "450\nchildren {\n  name: \"DW\"\n  exec_micros: 0\n  requested_bytes: "
+      "0\n  parameters: 162\n  total_exec_micros: 0\n  total_requested_bytes: "
+      "0\n  total_parameters: 162\n  float_ops: 0\n  total_float_ops: 0\n  "
+      "tensor_value {\n    dtype: DT_FLOAT\n    value_double: -0.00117808\n    "
+      "value_double: -0.000709941\n    value_double: -0.00174816\n    "
+      "value_double: -0.000495372\n    value_double: 0.000243039\n    "
+      "value_double: -0.000126313\n    value_double: -0.000663929\n    "
+      "value_double: -0.000495198\n    value_double: -0.000893934\n    "
+      "value_double: -0.00179659\n    value_double: 0.000408874\n    "
+      "value_double: -0.00120166\n    value_double: -0.00109484\n    "
+      "value_double: -0.000200362\n    value_double: 0.000726721\n    "
+      "value_double: -0.000277568\n    value_double: 0.00180584\n    "
+      "value_double: 0.000997271\n    value_double: -0.00185987\n    "
+      "value_double: -0.00113401\n    value_double: -0.000528852\n    "
+      "value_double: -0.000197412\n    value_double: 1.32871e-05\n    "
+      "value_double: -0.000285896\n    value_double: -0.000428898\n    "
+      "value_double: -0.000424633\n    value_double: 2.15488e-05\n    "
+      "value_double: 0.00149753\n    value_double: -0.000884576\n    "
+      "value_double: -0.0013795\n    value_double: -0.000650125\n    "
+      "value_double: 0.00191612\n    value_double: 4.71838e-05\n    "
+      "value_double: 0.000400201\n    value_double: 0.00239555\n    "
+      "value_double: -0.00177706\n    value_double: -0.000781899\n    "
+      "value_double: -0.00145247\n    value_double: 0.0020025\n    "
+      "value_double: 0.000597419\n    value_double: 0.00135456\n    "
+      "value_double: 0.0015876\n    value_double: -0.000993568\n    "
+      "value_double: 0.0006509\n    value_double: -0.000894533\n    "
+      "value_double: -0.00129322\n    value_double: 0.0003859\n    "
+      "value_double: 0.000415186\n    value_double: -0.000439212\n    "
+      "value_double: 0.000442138\n    value_double: 0.00212353\n    "
+      "value_double: 0.000702953\n    value_double: 0.000713424\n    "
+      "value_double: -0.000304877\n    value_double: -9.17046e-05\n    "
+      "value_double: -0.000801103\n    value_double: 0.000304854\n    "
+      "value_double: -0.00070527\n    value_double: -0.00106408\n    "
+      "value_double: -0.000909906\n    value_double: -4.49183e-05\n    "
+      "value_double: 0.000104172\n    value_double: -0.000438067\n    "
+      "value_double: -0.000317689\n    value_double: -0.000769914\n    "
+      "value_double: -0.00157729\n    value_double: 0.000220733\n    "
+      "value_double: 0.00107268\n    value_double: -0.000186449\n    "
+      "value_double: -0.000807328\n    value_double: 0.000456308\n    "
+      "value_double: -0.000593729\n    value_double: -0.000954873\n    "
+      "value_double: -0.000268676\n    value_double: 9.06328e-05\n    "
+      "value_double: -0.000323473\n    value_double: -0.000628768\n    "
+      "value_double: 0.000664985\n    value_double: 0.0020999\n    "
+      "value_double: -0.000932228\n    value_double: -0.00203203\n    "
+      "value_double: 0.000565405\n    value_double: 0.000167899\n    "
+      "value_double: 0.00054897\n    value_double: 0.000612407\n    "
+      "value_double: -0.000619301\n    value_double: 0.00169361\n    "
+      "value_double: -0.000188057\n    value_double: 0.000267652\n    "
+      "value_double: -0.00127341\n    value_double: -0.000218836\n    "
+      "value_double: -0.000431722\n    value_double: 5.41867e-05\n    "
+      "value_double: 0.000296628\n    value_double: 0.000819415\n    "
+      "value_double: -0.000758993\n    value_double: -0.000114477\n    "
+      "value_double: 6.29219e-05\n    value_double: 0.000726988\n    "
+      "value_double: -0.00135974\n    value_double: 2.28447e-05\n    "
+      "value_double: 0.00120547\n    value_double: -0.00136907\n    "
+      "value_double: -0.00140188\n    value_double: 0.000201145\n    "
+      "value_double: -0.000774109\n    value_double: 0.000798465\n    "
+      "value_double: -0.00131861\n    value_double: 3.08996e-05\n    "
+      "value_double: -0.000637026\n    value_double: 0.00228975\n    "
+      "value_double: -0.000633757\n    value_double: -0.00116047\n    "
+      "value_double: 7.66039e-05\n    value_double: 2.09167e-06\n    "
+      "value_double: -0.000296448\n    value_double: 0.000206795\n    "
+      "value_double: 0.000674405\n    value_double: -0.000722742\n    "
+      "value_double: -9.32443e-05\n    value_double: -0.00170917\n    "
+      "value_double: -0.000505279\n    value_double: 0.000628132\n    "
+      "value_double: -0.00145929\n    value_double: 0.00106077\n    "
+      "value_double: -0.000796743\n    value_double: 0.000498275\n    "
+      "value_double: -0.0002914\n    value_double: -0.00230622\n    "
+      "value_double: -9.42872e-05\n    value_double: 0.000200359\n    "
+      "value_double: -0.00305027\n    value_double: -0.0016218\n    "
+      "value_double: 0.00137126\n    value_double: -0.00215436\n    "
+      "value_double: -0.000743827\n    value_double: -0.00090007\n    "
+      "value_double: -0.000762207\n    value_double: -0.000149951\n    "
+      "value_double: -0.0013102\n    value_double: 0.00165781\n    "
+      "value_double: 0.000343809\n    value_double: -0.000826069\n    "
+      "value_double: -4.67404e-05\n    value_double: 0.0023931\n    "
+      "value_double: 0.00165338\n    value_double: -0.00050529\n    "
+      "value_double: 0.000178771\n    value_double: -0.000858287\n    "
+      "value_double: -0.00157031\n    value_double: -0.00165846\n    "
+      "value_double: -0.000713672\n    value_double: 0.00014357\n    "
+      "value_double: 0.00203632\n    value_double: -0.0010973\n    "
+      "value_double: -9.89852e-05\n    value_double: 0.000558808\n    "
+      "value_double: 0.00087211\n    value_double: 0.000661239\n    "
+      "value_double: 0.000389605\n    value_double: 0.00060653\n    "
+      "value_double: -0.000330104\n  }\n}\nchildren {\n  name: \"DW2\"\n  "
+      "exec_micros: 0\n  requested_bytes: 0\n  parameters: 288\n  "
+      "total_exec_micros: 0\n  total_requested_bytes: 0\n  total_parameters: "
+      "288\n  float_ops: 0\n  total_float_ops: 0\n  tensor_value {\n    dtype: "
+      "DT_FLOAT\n    value_double: 0.000704577\n    value_double: "
+      "0.000127421\n    value_double: 0.00105952\n    value_double: "
+      "0.000423765\n    value_double: -0.00025461\n    value_double: "
+      "-0.000857203\n    value_double: 0.000693494\n    value_double: "
+      "0.000282214\n    value_double: 0.00106185\n    value_double: "
+      "-0.000836552\n    value_double: -0.00116766\n    value_double: "
+      "0.000733674\n    value_double: -0.000669601\n    value_double: "
+      "-0.000275175\n    value_double: -0.000428215\n    value_double: "
+      "-0.000495715\n    value_double: -0.000125887\n    value_double: "
+      "-0.000715204\n    value_double: -0.00108936\n    value_double: "
+      "0.000738267\n    value_double: 0.000376081\n    value_double: "
+      "0.00191442\n    value_double: 0.001423\n    value_double: -0.00093811\n "
+      "   value_double: -5.91421e-05\n    value_double: -0.000221507\n    "
+      "value_double: -0.000104555\n    value_double: -0.00069682\n    "
+      "value_double: -0.000278325\n    value_double: -0.00122748\n    "
+      "value_double: -0.00112411\n    value_double: -0.000440511\n    "
+      "value_double: -0.000392247\n    value_double: -0.000419606\n    "
+      "value_double: -0.00167063\n    value_double: -0.000988578\n    "
+      "value_double: -0.00040159\n    value_double: 0.00238918\n    "
+      "value_double: -0.000892898\n    value_double: -0.000875976\n    "
+      "value_double: 0.00154401\n    value_double: -0.000719911\n    "
+      "value_double: 0.000753941\n    value_double: -0.000119961\n    "
+      "value_double: -0.000305115\n    value_double: 9.97947e-05\n    "
+      "value_double: -0.00128908\n    value_double: -0.000584184\n    "
+      "value_double: -0.000734685\n    value_double: -0.00146612\n    "
+      "value_double: 0.000670802\n    value_double: 0.000924219\n    "
+      "value_double: -0.000154409\n    value_double: 0.000198231\n    "
+      "value_double: -0.000340742\n    value_double: -0.00159646\n    "
+      "value_double: -1.19382e-05\n    value_double: 0.00165203\n    "
+      "value_double: 0.0017085\n    value_double: -0.000199614\n    "
+      "value_double: 0.000529526\n    value_double: 0.000769364\n    "
+      "value_double: 0.00135369\n    value_double: 0.00132873\n    "
+      "value_double: 0.000451174\n    value_double: 0.000255218\n    "
+      "value_double: 0.00102891\n    value_double: -0.00160068\n    "
+      "value_double: 0.000324269\n    value_double: -0.000492347\n    "
+      "value_double: 0.000925301\n    value_double: 0.00281998\n    "
+      "value_double: -0.000826404\n    value_double: -0.000602903\n    "
+      "value_double: 0.00126559\n    value_double: 0.000924364\n    "
+      "value_double: -9.19827e-05\n    value_double: -5.59275e-05\n    "
+      "value_double: 0.00107971\n    value_double: -9.91756e-05\n    "
+      "value_double: 0.000864708\n    value_double: 0.00121747\n    "
+      "value_double: 0.00146338\n    value_double: 0.000186883\n    "
+      "value_double: -0.00168195\n    value_double: -0.00062029\n    "
+      "value_double: 0.000658127\n    value_double: 0.00115682\n    "
+      "value_double: -0.00178359\n    value_double: 0.000685606\n    "
+      "value_double: -0.000503373\n    value_double: -0.000312999\n    "
+      "value_double: 0.000335383\n    value_double: -1.08597e-05\n    "
+      "value_double: -8.2499e-05\n    value_double: -0.000469726\n    "
+      "value_double: -0.00170868\n    value_double: 0.000118957\n    "
+      "value_double: -0.000460736\n    value_double: -5.56372e-05\n    "
+      "value_double: -0.00110148\n    value_double: 0.00059123\n    "
+      "value_double: 0.000386339\n    value_double: -0.00139967\n    "
+      "value_double: -0.000835664\n    value_double: 0.00103421\n    "
+      "value_double: -0.00104296\n    value_double: -0.000687497\n    "
+      "value_double: 1.1338e-05\n    value_double: 0.00176484\n    "
+      "value_double: 0.000531523\n    value_double: -0.000986387\n    "
+      "value_double: -0.00114152\n    value_double: 0.000256744\n    "
+      "value_double: 0.000228425\n    value_double: 0.00116583\n    "
+      "value_double: 0.0002726\n    value_double: -0.00100828\n    "
+      "value_double: -0.000950376\n    value_double: -0.00229074\n    "
+      "value_double: -0.000348272\n    value_double: -0.000526032\n    "
+      "value_double: -0.000133703\n    value_double: 0.000310979\n    "
+      "value_double: -0.00199278\n    value_double: -0.000874469\n    "
+      "value_double: -0.000631466\n    value_double: 0.0010534\n    "
+      "value_double: 0.00134646\n    value_double: -0.00172743\n    "
+      "value_double: 0.00131031\n    value_double: -0.000697506\n    "
+      "value_double: 0.000286747\n    value_double: 0.000140759\n    "
+      "value_double: 0.000568707\n    value_double: 0.000108177\n    "
+      "value_double: -0.00207337\n    value_double: -0.00138146\n    "
+      "value_double: 0.000483162\n    value_double: -0.00167096\n    "
+      "value_double: -0.000465813\n    value_double: 0.00067724\n    "
+      "value_double: 2.08388e-05\n    value_double: -0.00203279\n    "
+      "value_double: 7.8429e-05\n    value_double: 0.00161337\n    "
+      "value_double: -0.000269005\n    value_double: 0.000217822\n    "
+      "value_double: 0.000599886\n    value_double: 0.000317549\n    "
+      "value_double: 0.00146597\n    value_double: -0.00210947\n    "
+      "value_double: -0.000823917\n    value_double: -6.83766e-05\n    "
+      "value_double: 0.000656085\n    value_double: 0.000117134\n    "
+      "value_double: -0.000390405\n    value_double: 2.39565e-05\n    "
+      "value_double: 0.00104837\n    value_double: -0.000563671\n    "
+      "value_double: 0.000634073\n    value_double: -0.000554531\n    "
+      "value_double: 0.000677971\n    value_double: -0.000596207\n    "
+      "value_double: -0.00103335\n    value_double: 0.000645199\n    "
+      "value_double: 0.00162195\n    value_double: 0.000239246\n    "
+      "value_double: 0.00113519\n    value_double: 0.000787431\n    "
+      "value_double: -0.000471688\n    value_double: -0.000216625\n    "
+      "value_double: -0.000537156\n    value_double: 0.000551816\n    "
+      "value_double: 0.00094337\n    value_double: -0.000708127\n    "
+      "value_double: 0.000956955\n    value_double: -0.000904936\n    "
+      "value_double: -0.000424413\n    value_double: 0.000106455\n    "
+      "value_double: -0.000443952\n    value_double: 0.000185436\n    "
+      "value_double: 0.000944397\n    value_double: -0.000760572\n    "
+      "value_double: 0.000560002\n    value_double: 4.09886e-05\n    "
+      "value_double: -0.00075076\n    value_double: -0.000701856\n    "
+      "value_double: -0.000234851\n    value_double: -0.000131515\n    "
+      "value_double: -0.000761718\n    value_double: -0.000267808\n    "
+      "value_double: -0.00039682\n    value_double: 0.000542953\n    "
+      "value_double: -0.000817685\n    value_double: 0.00103851\n    "
+      "value_double: -0.000427176\n    value_double: 0.000517784\n    "
+      "value_double: -0.000823552\n    value_double: -0.000742637\n    "
+      "value_double: 0.000529213\n    value_double: -0.000372805\n    "
+      "value_double: 1.85745e-05\n    value_double: 0.00139891\n    "
+      "value_double: -0.000128417\n    value_double: -0.000404316\n    "
+      "value_double: -0.000671571\n    value_double: 0.000490311\n    "
+      "value_double: -0.00118493\n    value_double: -0.000897118\n    "
+      "value_double: 0.000939601\n    value_double: 0.000376399\n    "
+      "value_double: 0.0014709\n    value_double: 0.000134806\n    "
+      "value_double: -0.000294469\n    value_double: -0.000569142\n    "
+      "value_double: 0.00127266\n    value_double: -0.00140936\n    "
+      "value_double: 0.000870083\n    value_double: 0.000287246\n    "
+      "value_double: 0.000537685\n    value_double: 0.000125569\n    "
+      "value_double: 0.000360276\n    value_double: -0.000186268\n    "
+      "value_double: 0.0011141\n    value_double: -0.000605185\n    "
+      "value_double: -0.0016281\n    value_double: -0.000552758\n    "
+      "value_double: -0.000196755\n    value_double: -0.00265188\n    "
+      "value_double: 0.000480997\n    value_double: 0.00018776\n    "
+      "value_double: -0.00199234\n    value_double: 0.000959982\n    "
+      "value_double: 0.00040334\n    value_double: -0.000693596\n    "
+      "value_double: 0.00157678\n    value_double: -0.00134499\n    "
+      "value_double: 0.00121909\n    value_double: -0.000328734\n    "
+      "value_double: 0.000148554\n    value_double: -0.000209509\n    "
+      "value_double: -0.000266303\n    value_double: -0.00134084\n    "
+      "value_double: 5.21371e-05\n    value_double: 0.0005329\n    "
+      "value_double: -0.000168858\n    value_double: -0.00074875\n    "
+      "value_double: 0.000959397\n    value_double: -0.00159476\n    "
+      "value_double: -0.000368838\n    value_double: 0.0006077\n    "
+      "value_double: -0.00117243\n    value_double: -0.00146013\n    "
+      "value_double: 0.00031519\n    value_double: -0.000167911\n    "
+      "value_double: 0.000482571\n    value_double: -0.000752268\n    "
+      "value_double: -0.00042363\n    value_double: 0.00121219\n    "
+      "value_double: -0.000208159\n    value_double: 0.000128531\n    "
+      "value_double: -0.000406308\n    value_double: -0.000242663\n    "
+      "value_double: -3.96673e-05\n    value_double: 0.00144854\n    "
+      "value_double: -0.000787328\n    value_double: -0.000401958\n    "
+      "value_double: 0.00114091\n    value_double: -0.000739546\n    "
+      "value_double: 0.000483236\n    value_double: -0.000916945\n    "
+      "value_double: -0.00129577\n    value_double: -0.00186504\n    "
+      "value_double: 0.000806804\n    value_double: -0.000152251\n    "
+      "value_double: 0.000662576\n    value_double: -0.000533236\n    "
+      "value_double: 0.00151019\n    value_double: 0.00127805\n    "
+      "value_double: 0.00115399\n    value_double: -0.00130876\n    "
+      "value_double: 2.99457e-06\n    value_double: 0.000820777\n    "
+      "value_double: 0.000878393\n    value_double: -0.000562642\n    "
+      "value_double: -0.00070442\n    value_double: -0.00066277\n  "
+      "}\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
+      &expected));
+  EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
new file mode 100644
index 00000000000..7610729a118
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
@@ -0,0 +1,350 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+
+#include <stdio.h>
+#include <algorithm>
+#include <memory>
+#include <set>
+
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+string FormatNumber(int64 n) {
+  if (n < 1000) {
+    return strings::Printf("%lld", n);
+  } else if (n < 1000000) {
+    return strings::Printf("%.2fk", n / 1000.0);
+  } else if (n < 1000000000) {
+    return strings::Printf("%.2fm", n / 1000000.0);
+  } else {
+    return strings::Printf("%.2fb", n / 1000000000.0);
+  }
+}
+
+string FormatTime(int64 micros) {
+  if (micros < 1000) {
+    return strings::Printf("%lldus", micros);
+  } else if (micros < 1000000) {
+    return strings::Printf("%.2fms", micros / 1000.0);
+  } else {
+    return strings::Printf("%.2fsec", micros / 1000000.0);
+  }
+}
+
+string FormatMemory(int64 bytes) {
+  if (bytes < 1000) {
+    return strings::Printf("%lldB", bytes);
+  } else if (bytes < 1000000) {
+    return strings::Printf("%.2fKB", bytes / 1000.0);
+  } else {
+    return strings::Printf("%.2fMB", bytes / 1000000.0);
+  }
+}
+
+string FormatShapes(const std::vector<int64>& shape) {
+  return str_util::Join(shape, "x");
+}
+
+string StringReplace(const string& str, const string& oldsub,
+                     const string& newsub) {
+  string out = str;
+  RE2::GlobalReplace(&out, oldsub, newsub);
+  return out;
+}
+
+Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def) {
+  string out;
+  Status s = ReadFileToString(env, fname, &out);
+  if (!s.ok()) return s;
+  if (protobuf::TextFormat::ParseFromString(out, graph_def)) {
+    return Status();
+  }
+  return errors::InvalidArgument("Cannot parse proto string.");
+}
+
+namespace {
+string StripQuote(const string& s) {
+  int start = s.find_first_not_of("\"\'");
+  int end = s.find_last_not_of("\"\'");
+  if (start == s.npos || end == s.npos) return "";
+
+  return s.substr(start, end - start + 1);
+}
+
+tensorflow::Status ReturnError(const std::vector<string> pieces, int idx) {
+  string val;
+  if (pieces.size() > idx + 1) {
+    val = pieces[idx + 1];
+  }
+  return tensorflow::Status(
+      tensorflow::error::INVALID_ARGUMENT,
+      strings::StrCat("Invalid option '", pieces[idx], "' value: '", val, "'"));
+}
+
+bool CaseEqual(StringPiece s1, StringPiece s2) {
+  if (s1.size() != s2.size()) return false;
+  return str_util::Lowercase(s1) == str_util::Lowercase(s2);
+}
+
+bool StringToBool(StringPiece str, bool* value) {
+  CHECK(value != NULL) << "NULL output boolean given.";
+  if (CaseEqual(str, "true") || CaseEqual(str, "t") || CaseEqual(str, "yes") ||
+      CaseEqual(str, "y") || CaseEqual(str, "1")) {
+    *value = true;
+    return true;
+  }
+  if (CaseEqual(str, "false") || CaseEqual(str, "f") || CaseEqual(str, "no") ||
+      CaseEqual(str, "n") || CaseEqual(str, "0")) {
+    *value = false;
+    return true;
+  }
+  return false;
+}
+}  // namespace
+
+tensorflow::Status ParseCmdLine(const string& line, string* cmd,
+                                tensorflow::tfprof::Options* opts) {
+  std::vector<string> pieces =
+      str_util::Split(line, ' ', str_util::SkipEmpty());
+
+  std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
+  if (std::find(cmds_str.begin(), cmds_str.end(), pieces[0]) ==
+      cmds_str.end()) {
+    return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT,
+                              "First string must be a valid command.");
+  }
+  *cmd = pieces[0];
+
+  for (int i = 1; i < pieces.size(); ++i) {
+    if (pieces[i] == string(tensorflow::tfprof::kOptions[0])) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto32(pieces[i + 1], &opts->max_depth)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[1]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_bytes)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[2]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_micros)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[3]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_params)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[4]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_float_ops)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[5]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      opts->device_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+                                             str_util::SkipEmpty());
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[6]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      std::set<string> order_by_set(
+          kOrderBy, kOrderBy + sizeof(kOrderBy) / sizeof(*kOrderBy));
+      auto order_by = order_by_set.find(pieces[i + 1]);
+      if (order_by == order_by_set.end()) {
+        return ReturnError(pieces, i);
+      }
+      opts->order_by = *order_by;
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[7]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      opts->account_type_regexes = str_util::Split(StripQuote(pieces[i + 1]),
+                                                   ',', str_util::SkipEmpty());
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[8]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      opts->start_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+                                                 str_util::SkipEmpty());
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[9]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      opts->trim_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+                                                str_util::SkipEmpty());
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[10]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      opts->show_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+                                                str_util::SkipEmpty());
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[11]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      opts->hide_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+                                                str_util::SkipEmpty());
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[12]) {
+      if ((pieces.size() > i + 1 && pieces[i + 1].find("-") == 0) ||
+          pieces.size() == i + 1) {
+        opts->account_displayed_op_only = true;
+      } else if (!StringToBool(pieces[i + 1],
+                               &opts->account_displayed_op_only)) {
+        return ReturnError(pieces, i);
+      } else {
+        ++i;
+      }
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[13]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      std::set<string> shown_set(kShown,
+                                 kShown + sizeof(kShown) / sizeof(*kShown));
+      std::vector<string> requested_vector = str_util::Split(
+          StripQuote(pieces[i + 1]), ',', str_util::SkipEmpty());
+      std::set<string> requested_set(requested_vector.begin(),
+                                     requested_vector.end());
+      for (const string& requested : requested_set) {
+        if (shown_set.find(requested) == shown_set.end()) {
+          return ReturnError(pieces, i);
+        }
+      }
+      opts->select = requested_set;
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[14]) {
+      if ((pieces.size() > i + 1 && pieces[i + 1].find("-") == 0) ||
+          pieces.size() == i + 1) {
+        opts->viz = true;
+      } else if (!StringToBool(pieces[i + 1], &opts->viz)) {
+        return ReturnError(pieces, i);
+      } else {
+        ++i;
+      }
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[15]) {
+      if (pieces.size() <= i + 1) {
+        return ReturnError(pieces, i);
+      }
+      opts->dump_to_file = StripQuote(pieces[i + 1]);
+      ++i;
+    } else {
+      return ReturnError(pieces, i);
+    }
+  }
+  return tensorflow::Status::OK();
+}
+
+void PrintHelp() {
+  printf(
+      "\nSee go/tfprof for detail tutorial.\n"
+      "\nCommands\n\n"
+      "  scope: Each op has its op name in TensorFlow, such as 'n1', 'n1/n2', "
+      "'n1/n2/n3'. 'n1/n2' is a child of 'n1'. 'scope' command builds "
+      "a name scope tree and aggregates statistics based on it.\n\n"
+      "  graph: ops in TensorFlow are organized as a graph based on their "
+      "the source (inputs) and sink (outputs). 'graph' command builds "
+      "a graph pointing *from output to input*, and aggregates "
+      "statistics based on it.\n\n"
+      "  set: Set options that will be default for follow up commands.\n\n"
+      "  help: Show helps.\n"
+      "\nOptions\n\n"
+      "Press Enter in CLI to see default option values.\n\n"
+      "  -max_depth: Show ops that are at most this number of hops from "
+      "starting op in the tree/graph structure.\n\n"
+      "  -min_bytes: Show ops that request at least this number of bytes.\n\n"
+      "  -min_micros: Show ops that spend at least this number of micros to "
+      "run.\n\n"
+      "  -min_params: Show ops that contains at least this number of "
+      "parameters.\n\n"
+      "  -min_float_ops: Show ops that contain at least this number of "
+      "float operations. Only available if an op has "
+      "op.RegisterStatistics() defined and OpLog is "
+      "provided\n\n"
+      "  -device_regexes: Show ops that a placed on the specified devices. "
+      "regexes are comma-separated.\n\n"
+      "  -order_by: Order the results by [name|depth|bytes|micros|params|"
+      "float_ops]\n\n"
+      "  -account_type_regexes: Account and display the ops whose types match "
+      "one of the type regexes specified. tfprof "
+      "allow user to define extra op types for ops "
+      "through tensorflow.tfprof.OpLog proto. regexes "
+      "are comma-sperated.\n\n"
+      "  -start_name_regexes: Show ops starting from the ops that matches the "
+      "regexes, recursively. regexes are "
+      "comma-separated.\n\n"
+      "  -trim_name_regexes: Hide ops starting from the ops that matches the "
+      "regexes, recursively, regexes are comma-seprated. "
+      "\n\n"
+      "  -show_name_regexes: Show ops that match the regexes. regexes are "
+      "comma-seprated.\n\n"
+      "  -hide_name_regexes: Hide ops that match the regexes. regexes are "
+      "comma-seprated.\n\n"
+      ""
+      "  Notes: For each op, -acount_type_regexes is first evaluated, "
+      "only ops with types matching the specified regexes are accounted and "
+      "selected for displayed. -start/trim/show/hide_name_regexes are used "
+      "to further filter ops for display. -start_name_regexes is evaluated "
+      "first to search the starting ops to display. Descendants of starting "
+      "ops are then evaluated against show/hide_name_regexes to make display "
+      "decision. If an op matches trim_name_regexes, all its descendants are "
+      "hidden.\n"
+      "Ops statistics are *accounted even if they are hidden* as long as "
+      "they match the -account_xxx options.\n\n"
+      "  -account_displayed_op_only: If True, only account the statistics of "
+      "ops eventually displayed. If False, account all "
+      "op statistics matching -account_type_regexes recursively.\n\n"
+      "  -select: Comma-separated list of metrics to show: [bytes|micros|"
+      "params|float_ops|num_hidden_ops|tensor_value|device|op_types]."
+      "\n\n"
+      "  -dump_to_file: Dump the output to a file, instead of terminal.\n\n"
+      ""
+      "Examples\n"
+      "  Assuming a toy model:\n"
+      "    intput(typeB)->conv2d_1(typeA)->conv2d_2(typeA)->"
+      "fc(typeA)->cost(typeA)->summarize(typeC)\n"
+      "  Command:\n"
+      "    tfprof> graph -account_type_regexes typeA -start_name_regexes "
+      "cost.* -show_name_regexes conv2d.* -max_depth 10\n\n"
+      "  The above command only aggregate statistics of all ops of typeA ("
+      "hence ignoring input(typeB)). It will start looking for candidate to "
+      "display from cost.* and finally displays conv2d_1 and conv2d_2.\n\n");
+  fflush(stdout);
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
new file mode 100644
index 00000000000..6c1bba04fc2
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
@@ -0,0 +1,50 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace tensorflow {
+namespace tfprof {
+string FormatNumber(int64 n);
+
+string FormatTime(int64 micros);
+
+string FormatMemory(int64 bytes);
+
+string FormatShapes(const std::vector<int64>& shapes);
+
+tensorflow::Status ParseCmdLine(const string& line, string* cmd,
+                                tensorflow::tfprof::Options* opts);
+
+string StringReplace(const string& str, const string& oldsub,
+                     const string& newsub);
+
+Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def);
+
+void PrintHelp();
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
new file mode 100644
index 00000000000..cae6e1e3a8c
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
@@ -0,0 +1,19 @@
+syntax = "proto2";
+
+package tensorflow.tfprof;
+
+message OpLogEntry {
+  // op name.
+  optional string name = 1;
+  // float_ops is filled by tfprof Python API when called. It requires the
+  // op has RegisterStatistics defined. Currently, Conv2D, MatMul, etc, are
+  // implemented.
+  optional int64 float_ops = 2;
+  // User can define extra op type information for an op. This allows the user
+  // to select a group of ops precisely using op_type as a key.
+  repeated string types = 3;
+}
+
+message OpLog {
+  repeated OpLogEntry log_entries = 1;
+}
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
new file mode 100644
index 00000000000..d9080242d6b
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
@@ -0,0 +1,236 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "linenoise.h"
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+using tensorflow::str_util::Split;
+
+void completion(const char* buf, linenoiseCompletions* lc) {
+  tensorflow::string buf_str = tensorflow::string(buf);
+  if (buf_str.find(" ") == buf_str.npos) {
+    for (const char* opt : tensorflow::tfprof::kCmds) {
+      if (tensorflow::string(opt).find(buf_str) == 0) {
+        linenoiseAddCompletion(lc, opt);
+      }
+    }
+    return;
+  }
+
+  tensorflow::string prefix;
+  int last_dash = buf_str.find_last_of(' ');
+  if (last_dash != tensorflow::string::npos) {
+    prefix = buf_str.substr(0, last_dash + 1);
+    buf_str = buf_str.substr(last_dash + 1, tensorflow::kint32max);
+  }
+  for (const char* opt : tensorflow::tfprof::kOptions) {
+    if (tensorflow::string(opt).find(buf_str) == 0) {
+      linenoiseAddCompletion(lc, (prefix + opt).c_str());
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  tensorflow::string FLAGS_graph_path = "";
+  tensorflow::string FLAGS_run_meta_path = "";
+  tensorflow::string FLAGS_op_log_path = "";
+  tensorflow::string FLAGS_checkpoint_path = "";
+  tensorflow::int32 FLAGS_max_depth = 4;
+  tensorflow::int64 FLAGS_min_bytes = 0;
+  tensorflow::int64 FLAGS_min_micros = 0;
+  tensorflow::int64 FLAGS_min_params = 0;
+  tensorflow::int64 FLAGS_min_float_ops = 0;
+  tensorflow::string FLAGS_device_regexes = ".*";
+  tensorflow::string FLAGS_order_by = "name";
+  tensorflow::string FLAGS_account_type_regexes = "Variable";
+  tensorflow::string FLAGS_start_name_regexes = ".*";
+  tensorflow::string FLAGS_trim_name_regexes = "";
+  tensorflow::string FLAGS_show_name_regexes = ".*";
+  tensorflow::string FLAGS_hide_name_regexes;
+  bool FLAGS_account_displayed_op_only = false;
+  tensorflow::string FLAGS_select = "params";
+  bool FLAGS_viz = false;
+  tensorflow::string FLAGS_dump_to_file = "";
+  for (int i = 0; i < argc; i++) {
+    fprintf(stderr, "%s\n", argv[i]);
+  }
+
+  CHECK(tensorflow::ParseFlags(
+      &argc, argv,
+      {tensorflow::Flag("graph_path", &FLAGS_graph_path),
+       tensorflow::Flag("run_meta_path", &FLAGS_run_meta_path),
+       tensorflow::Flag("op_log_path", &FLAGS_op_log_path),
+       tensorflow::Flag("checkpoint_path", &FLAGS_checkpoint_path),
+       tensorflow::Flag("max_depth", &FLAGS_max_depth),
+       tensorflow::Flag("min_bytes", &FLAGS_min_bytes),
+       tensorflow::Flag("min_micros", &FLAGS_min_micros),
+       tensorflow::Flag("min_params", &FLAGS_min_params),
+       tensorflow::Flag("min_float_ops", &FLAGS_min_float_ops),
+       tensorflow::Flag("device_regexes", &FLAGS_device_regexes),
+       tensorflow::Flag("order_by", &FLAGS_order_by),
+       tensorflow::Flag("account_type_regexes", &FLAGS_start_name_regexes),
+       tensorflow::Flag("trim_name_regexes", &FLAGS_trim_name_regexes),
+       tensorflow::Flag("show_name_regexes", &FLAGS_show_name_regexes),
+       tensorflow::Flag("hide_name_regexes", &FLAGS_hide_name_regexes),
+       tensorflow::Flag("account_displayed_op_only",
+                        &FLAGS_account_displayed_op_only),
+       tensorflow::Flag("select", &FLAGS_select),
+       tensorflow::Flag("dump_to_file", &FLAGS_dump_to_file)}));
+  tensorflow::port::InitMain(argv[0], &argc, &argv);
+
+  fprintf(stderr, "%s\n", FLAGS_graph_path.c_str());
+
+  std::vector<tensorflow::string> device_regexes =
+      Split(FLAGS_device_regexes, ',', tensorflow::str_util::SkipEmpty());
+  std::vector<tensorflow::string> account_type_regexes =
+      Split(FLAGS_account_type_regexes, ',', tensorflow::str_util::SkipEmpty());
+  std::vector<tensorflow::string> start_name_regexes =
+      Split(FLAGS_start_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+  std::vector<tensorflow::string> trim_name_regexes =
+      Split(FLAGS_trim_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+  std::vector<tensorflow::string> show_name_regexes =
+      Split(FLAGS_show_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+  std::vector<tensorflow::string> hide_name_regexes =
+      Split(FLAGS_hide_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+  std::vector<tensorflow::string> select =
+      Split(FLAGS_select, ',', tensorflow::str_util::SkipEmpty());
+
+  tensorflow::string cmd = "";
+  if (argc == 1 && FLAGS_graph_path.empty()) {
+    printf("1) go/tfprof: Tutorial.\n");
+    printf("2) tfprof help: Detail help information.\n");
+    printf(
+        "3) tfprof --graph_path <GraphDef proto text file>: "
+        "Profiling model structure, tensor shape and # parameters.\n");
+    printf(
+        "4) tfprof --graph_path <GraphDef proto text file> \\\n"
+        "          --run_meta_path <RunMetadata proto binary file> \\\n"
+        "          --op_log_path <tensorflow::tfprof::OpLog proto binary file> "
+        "\\\n"
+        "          --checkpoint_path <TensorFlow Checkpoint file>: "
+        "Profiling everything!\n");
+    return 0;
+  } else if (argc > 1) {
+    if (tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[3]) {
+      tensorflow::tfprof::PrintHelp();
+      return 0;
+    }
+    if (tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[0] ||
+        tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[1]) {
+      cmd = argv[1];
+    }
+  }
+
+  printf("Reading Files...\n");
+  std::unique_ptr<tensorflow::GraphDef> graph(new tensorflow::GraphDef());
+  TF_CHECK_OK(tensorflow::tfprof::ReadGraphDefText(
+      tensorflow::Env::Default(), FLAGS_graph_path, graph.get()));
+
+  std::unique_ptr<tensorflow::RunMetadata> run_meta(
+      new tensorflow::RunMetadata());
+  if (!ReadBinaryProto(tensorflow::Env::Default(), FLAGS_run_meta_path,
+                       run_meta.get())
+           .ok()) {
+    run_meta.release();
+  }
+
+  std::unique_ptr<tensorflow::tfprof::OpLog> op_log(
+      new tensorflow::tfprof::OpLog());
+  if (!ReadBinaryProto(tensorflow::Env::Default(), FLAGS_op_log_path,
+                       op_log.get())
+           .ok()) {
+    op_log.release();
+  }
+
+  std::unique_ptr<tensorflow::checkpoint::CheckpointReader> ckpt_reader;
+  TF_Status* status = TF_NewStatus();
+  if (!FLAGS_checkpoint_path.empty()) {
+    ckpt_reader.reset(new tensorflow::checkpoint::CheckpointReader(
+        FLAGS_checkpoint_path, status));
+    if (TF_GetCode(status) != TF_OK) {
+      fprintf(stderr, "%s\n", TF_Message(status));
+      TF_DeleteStatus(status);
+      return 1;
+    }
+    TF_DeleteStatus(status);
+  }
+
+  tensorflow::tfprof::TFStats tf_stat(std::move(graph), std::move(run_meta),
+                                      std::move(op_log),
+                                      std::move(ckpt_reader));
+  tensorflow::tfprof::Options opts(
+      FLAGS_max_depth, FLAGS_min_bytes, FLAGS_min_micros, FLAGS_min_params,
+      FLAGS_min_float_ops, device_regexes, FLAGS_order_by, account_type_regexes,
+      start_name_regexes, trim_name_regexes, show_name_regexes,
+      hide_name_regexes, FLAGS_account_displayed_op_only, select, FLAGS_viz,
+      FLAGS_dump_to_file);
+
+  if (!cmd.empty()) {
+    tf_stat.PrintGraph(cmd, opts);
+    return 0;
+  }
+
+  linenoiseSetCompletionCallback(completion);
+  linenoiseHistoryLoad(".tfprof_history.txt");
+
+  for (char* line = nullptr; (line = linenoise("tfprof> ")) != nullptr;) {
+    tensorflow::string line_s = tensorflow::string(line);
+    free(line);
+
+    if (line_s.empty()) {
+      printf("%s", opts.ToString().c_str());
+      continue;
+    }
+    linenoiseHistoryAdd(line_s.c_str());
+    linenoiseHistorySave(".tfprof_history.txt");
+
+    tensorflow::tfprof::Options new_opts = opts;
+    tensorflow::Status s =
+        tensorflow::tfprof::ParseCmdLine(line_s, &cmd, &new_opts);
+    if (!s.ok()) {
+      fprintf(stderr, "E: %s\n", s.ToString().c_str());
+      continue;
+    }
+    if (cmd == tensorflow::tfprof::kCmds[2]) {
+      opts = new_opts;
+    } else if (cmd == tensorflow::tfprof::kCmds[3]) {
+      tensorflow::tfprof::PrintHelp();
+    } else {
+      tf_stat.PrintGraph(cmd, new_opts);
+    }
+  }
+  return 0;
+}
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
new file mode 100644
index 00000000000..9afd41046e4
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
@@ -0,0 +1,49 @@
+syntax = "proto2";
+
+import "tensorflow/core/framework/tensor_shape.proto";
+import "tensorflow/core/framework/types.proto";
+
+package tensorflow.tfprof;
+
+message TFProfTensorProto {
+  optional DataType dtype = 1;
+  // Flatten tensor in row-major.
+  // Only one of the following array is set.
+  repeated double value_double = 2;
+  repeated int64 value_int64 = 3;
+  repeated string value_str = 4;
+}
+
+message TFProfNode {
+  // op name.
+  optional string name = 1;
+  // tensor value restored from checkpoint.
+  optional TFProfTensorProto tensor_value = 15;
+  // op execution time.
+  optional int64 exec_micros = 2;
+  // Total requested bytes by the op.
+  optional int64 requested_bytes = 3;
+  // Number of parameters if available.
+  optional int64 parameters = 4;
+  // Number of float operations.
+  optional int64 float_ops = 13;
+  // Number of inputs to the op.
+  optional int64 inputs = 5;
+  // Device the op is assigned to.
+  optional string device = 10;
+
+  // The following are the aggregated stats from all accounted descendants and
+  // the op itself. The actual descendants depend on the data structure used
+  // (scope, graph).
+  optional int64 total_exec_micros = 6;
+  optional int64 total_requested_bytes = 7;
+  optional int64 total_parameters = 8;
+  optional int64 total_float_ops = 14;
+  optional int64 total_inputs = 9;
+
+  // shape information, if available.
+  repeated TensorShapeProto shapes = 11;
+  // Descendants of the graph. The actual descendants depend on the data
+  // structure used (scope, graph).
+  repeated TFProfNode children = 12;
+}
\ No newline at end of file
diff --git a/tensorflow/core/util/command_line_flags.cc b/tensorflow/core/util/command_line_flags.cc
index 8927a265444..2048126338a 100644
--- a/tensorflow/core/util/command_line_flags.cc
+++ b/tensorflow/core/util/command_line_flags.cc
@@ -47,6 +47,22 @@ bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
   return false;
 }
 
+bool ParseInt64Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+                    tensorflow::int64* dst, bool* value_parsing_ok) {
+  *value_parsing_ok = true;
+  if (arg.Consume("--") && arg.Consume(flag) && arg.Consume("=")) {
+    char extra;
+    if (sscanf(arg.data(), "%lld%c", dst, &extra) != 1) {
+      LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag
+                 << ".";
+      *value_parsing_ok = false;
+    }
+    return true;
+  }
+
+  return false;
+}
+
 bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
                    bool* dst, bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -78,6 +94,9 @@ bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
 Flag::Flag(const char* name, tensorflow::int32* dst)
     : name_(name), type_(TYPE_INT), int_value_(dst) {}
 
+Flag::Flag(const char* name, tensorflow::int64* dst)
+    : name_(name), type_(TYPE_INT64), int64_value_(dst) {}
+
 Flag::Flag(const char* name, bool* dst)
     : name_(name), type_(TYPE_BOOL), bool_value_(dst) {}
 
@@ -88,6 +107,8 @@ bool Flag::Parse(string arg, bool* value_parsing_ok) const {
   bool result = false;
   if (type_ == TYPE_INT) {
     result = ParseInt32Flag(arg, name_, int_value_, value_parsing_ok);
+  } else if (type_ == TYPE_INT64) {
+    result = ParseInt64Flag(arg, name_, int64_value_, value_parsing_ok);
   } else if (type_ == TYPE_BOOL) {
     result = ParseBoolFlag(arg, name_, bool_value_, value_parsing_ok);
   } else if (type_ == TYPE_STRING) {
diff --git a/tensorflow/core/util/command_line_flags.h b/tensorflow/core/util/command_line_flags.h
index 7e74240e538..9297fb066d1 100644
--- a/tensorflow/core/util/command_line_flags.h
+++ b/tensorflow/core/util/command_line_flags.h
@@ -49,6 +49,7 @@ namespace tensorflow {
 class Flag {
  public:
   Flag(const char* name, int32* dst1);
+  Flag(const char* name, int64* dst1);
   Flag(const char* name, bool* dst);
   Flag(const char* name, string* dst);
 
@@ -56,8 +57,9 @@ class Flag {
 
  private:
   string name_;
-  enum { TYPE_INT, TYPE_BOOL, TYPE_STRING } type_;
+  enum { TYPE_INT, TYPE_INT64, TYPE_BOOL, TYPE_STRING } type_;
   int* int_value_;
+  int64* int64_value_;
   bool* bool_value_;
   string* string_value_;
 };
diff --git a/tensorflow/core/util/command_line_flags_test.cc b/tensorflow/core/util/command_line_flags_test.cc
index 1cdddf363db..bc38fff8fde 100644
--- a/tensorflow/core/util/command_line_flags_test.cc
+++ b/tensorflow/core/util/command_line_flags_test.cc
@@ -33,19 +33,21 @@ std::vector<char*> CharPointerVectorFromStrings(
 
 TEST(CommandLineFlagsTest, BasicUsage) {
   int some_int = 10;
+  int64 some_int64 = 21474836470;  // max int32 is 2147483647
   bool some_switch = false;
   tensorflow::string some_name = "something";
-  int argc = 4;
+  int argc = 5;
   std::vector<tensorflow::string> argv_strings = {
-      "program_name", "--some_int=20", "--some_switch",
-      "--some_name=somethingelse"};
+      "program_name", "--some_int=20", "--some_int64=214748364700",
+      "--some_switch", "--some_name=somethingelse"};
   std::vector<char*> argv_array = CharPointerVectorFromStrings(argv_strings);
-  bool parsed_ok =
-      ParseFlags(&argc, argv_array.data(), {Flag("some_int", &some_int),
-                                            Flag("some_switch", &some_switch),
-                                            Flag("some_name", &some_name)});
+  bool parsed_ok = ParseFlags(
+      &argc, argv_array.data(),
+      {Flag("some_int", &some_int), Flag("some_int64", &some_int64),
+       Flag("some_switch", &some_switch), Flag("some_name", &some_name)});
   EXPECT_EQ(true, parsed_ok);
   EXPECT_EQ(20, some_int);
+  EXPECT_EQ(214748364700, some_int64);
   EXPECT_EQ(true, some_switch);
   EXPECT_EQ("somethingelse", some_name);
   EXPECT_EQ(argc, 1);
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 4cb540dc1dc..b0e0e462ba2 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -152,6 +152,14 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     actual = "@grpc//:grpc++_unsecure",
   )
 
+  native.new_git_repository(
+    name = "linenoise",
+    commit = "c894b9e59f02203dbe4e2be657572cf88c4230c3",
+    init_submodules = True,
+    remote = "https://github.com/antirez/linenoise.git",
+    build_file = str(Label("//:linenoise.BUILD")),
+  )
+
   native.new_http_archive(
     name = "jsoncpp_git",
     url = "http://github.com/open-source-parsers/jsoncpp/archive/11086dd6a7eba04289944367ca82cea71299ed70.tar.gz",