From 1848070f47cfb6810554d9c1e6c32bf4da7a85bd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 2 Aug 2017 21:29:03 -0700
Subject: [PATCH] Fine-grained memory profiling

Add residual_bytes, peak_bytes and output_bytes.
Allow to order/select/filter by
accelerator_micros/cpu_micros/peak_bytes/residual_bytes/output_bytes

Also updated the testdata.

PiperOrigin-RevId: 164079214
---
 tensorflow/core/profiler/README.md            |    6 +-
 tensorflow/core/profiler/g3doc/options.md     |   17 +-
 .../core/profiler/g3doc/profile_memory.md     |    7 +-
 .../advisor/expensive_operation_checker.h     |   12 +-
 .../testdata/ckpt.data-00000-of-00001         |  Bin 1480 -> 1804 bytes
 .../profiler/internal/testdata/ckpt.index     |  Bin 239 -> 194 bytes
 .../core/profiler/internal/testdata/ckpt.meta |  Bin 11285 -> 8435 bytes
 .../profiler/internal/testdata/graph.pbtxt    | 1903 ++++++++---------
 .../core/profiler/internal/testdata/run_meta  |  Bin 3444 -> 5539 bytes
 .../profiler/internal/testdata/tfprof_log     |   26 +-
 .../core/profiler/internal/tfprof_code.cc     |   62 +-
 .../core/profiler/internal/tfprof_code.h      |    3 +-
 .../core/profiler/internal/tfprof_node.cc     |   16 +-
 .../core/profiler/internal/tfprof_node.h      |   79 +-
 .../profiler/internal/tfprof_node_show.cc     |   43 +
 .../core/profiler/internal/tfprof_op.cc       |   48 +-
 tensorflow/core/profiler/internal/tfprof_op.h |    2 +
 .../core/profiler/internal/tfprof_options.cc  |   38 +-
 .../core/profiler/internal/tfprof_options.h   |   41 +-
 .../core/profiler/internal/tfprof_show.cc     |   53 +-
 .../core/profiler/internal/tfprof_show.h      |   17 +-
 .../profiler/internal/tfprof_show_multi.cc    |   15 +
 .../profiler/internal/tfprof_show_multi.h     |   19 +-
 .../profiler/internal/tfprof_show_test.cc     |  166 +-
 .../profiler/internal/tfprof_stats_test.cc    |  383 ++--
 .../core/profiler/internal/tfprof_tensor.h    |  113 +-
 .../profiler/internal/tfprof_tensor_test.cc   |  245 +--
 .../core/profiler/internal/tfprof_timeline.cc |    4 +-
 .../profiler/internal/tfprof_timeline_test.cc |   10 +-
 .../core/profiler/internal/tfprof_utils.cc    |   57 +-
 tensorflow/core/profiler/profiler.cc          |   26 +-
 tensorflow/core/profiler/tfprof_options.proto |    5 +
 tensorflow/core/profiler/tfprof_output.proto  |   24 +-
 tensorflow/python/profiler/model_analyzer.py  |    5 +
 .../python/profiler/model_analyzer_test.py    |  128 +-
 tensorflow/python/profiler/option_builder.py  |   65 +-
 tensorflow/python/profiler/profiler_test.py   |    2 +-
 ...ensorflow.profiler.-graph-node-proto.pbtxt |   24 +
 ...low.profiler.-multi-graph-node-proto.pbtxt |   24 +
 ...low.profiler.-profile-option-builder.pbtxt |    6 +-
 40 files changed, 1949 insertions(+), 1745 deletions(-)

diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md
index e748daba7ad..6db38a59aef 100644
--- a/tensorflow/core/profiler/README.md
+++ b/tensorflow/core/profiler/README.md
@@ -106,7 +106,7 @@ _TFProfRoot (--/930.58k params)
 ### Show the most expensive operation types.
 ```
 tfprof> op -select micros,bytes,occurrence -order_by micros
-node name | output bytes | total execution time | accelerator execution time | cpu execution time | op occurrence (run|defined)
+node name | requested bytes | total execution time | accelerator execution time | cpu execution time | op occurrence (run|defined)
 SoftmaxCrossEntropyWithLogits      36.58MB (100.00%, 0.05%),      1.37sec (100.00%, 26.68%),           0us (100.00%, 0.00%),      1.37sec (100.00%, 30.75%),      30|30
 MatMul                        2720.57MB (99.95%, 3.66%),      708.14ms (73.32%, 13.83%),     280.76ms (100.00%, 41.42%),       427.39ms (69.25%, 9.62%),  2694|3450
 ConcatV2                       741.37MB (96.29%, 1.00%),       389.63ms (59.49%, 7.61%),        31.80ms (58.58%, 4.69%),       357.83ms (59.63%, 8.05%),  4801|6098
@@ -192,7 +192,7 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
 ******************************************************
 ```
 <left>
-[Timeline](g3doc/graph_timeline.png)
+![Timeline](g3doc/graph_timeline.png)
 </left>
 
 ```
@@ -213,7 +213,7 @@ pprof -png --nodecount=20 --sample_index=1 <filename>
 ```
 
 <left>
-[PprofGraph](g3doc/pprof.jpg)
+![PprofGraph](g3doc/pprof.jpg)
 </left>
 
 ### Feature Request and Bug Report
diff --git a/tensorflow/core/profiler/g3doc/options.md b/tensorflow/core/profiler/g3doc/options.md
index 95083793245..bdcc6b2bd84 100644
--- a/tensorflow/core/profiler/g3doc/options.md
+++ b/tensorflow/core/profiler/g3doc/options.md
@@ -48,7 +48,18 @@ In graph view, in means the number of hops in the <b>graph</b>.
 
 `-min_bytes`: Show nodes that request at least this number of bytes.
 
-`-min_micros`: Show nodes that spend at least this number of microseconds to run.
+`-min_peak_bytes`: Show nodes that using at least this number of bytes during peak memory usage.
+
+`-min_residual_bytes`: Show nodes that have at least this number of bytes not being de-allocated after Compute.
+
+`-min_output_bytes`: Show nodes that have at least this number of bytes output (no necessarily allocated by the nodes).
+
+`-min_micros`: Show nodes that spend at least this number of microseconds to run. It sums
+accelerator_micros and cpu_micros. Note: cpu and accelerator can run in parallel.
+
+`-min_accelerator_micros`: Show nodes that spend at least this number of microseconds to run on accelerator (e.g. GPU).
+
+`-min_cpu_micros`: Show nodes that spend at least this number of microseconds to run on CPU.
 
 `-min_params`: Show nodes that contains at least this number of parameters.
 
@@ -58,7 +69,7 @@ In graph view, in means the number of hops in the <b>graph</b>.
 
 `-step`: Show the stats of the this step when multiple steps of RunMetadata were added. By default, show the average of all steps."
 
-`-order_by`: Order the results by [name|depth|bytes|micros|accelerator_micros|cpu_micros|params|float_ops|occurrence]
+`-order_by`: Order the results by [name|depth|bytes|peak_bytes|residual_bytes|output_bytes|micros|accelerator_micros|cpu_micros|params|float_ops|occurrence]
 
 `-account_type_regexes`: Account and display the nodes whose types match one of the type regexes specified. tfprof allow user to define extra operation types for graph nodes through tensorflow.tfprof.OpLogProto proto. regexes are comma-sperated.
 
@@ -76,7 +87,7 @@ In graph view, in means the number of hops in the <b>graph</b>.
 Notes: See <b>overview</b> sesion on how does above options play with each other to decide the output and counting.
 
 `-select`: Comma-separated list of attributes to show. Supported attributes:
-[bytes|micros|accelerator_micros|cpu_micros|params|float_ops|occurrence|tensor_value|device|op_types|input_shapes].
+[bytes|peak_bytes|residual_bytes|output_bytes|micros|accelerator_micros|cpu_micros|params|float_ops|occurrence|tensor_value|device|op_types|input_shapes].
 
 `-output`: Output results as stdout, file or timeline.
 The format is ```output_type:key=value,key=value```.
diff --git a/tensorflow/core/profiler/g3doc/profile_memory.md b/tensorflow/core/profiler/g3doc/profile_memory.md
index e897967d3b7..a00683d0626 100644
--- a/tensorflow/core/profiler/g3doc/profile_memory.md
+++ b/tensorflow/core/profiler/g3doc/profile_memory.md
@@ -15,7 +15,6 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
 ```
 
 <left>
-TODO(xpan): Show the image correctly in github.
 ![Timeline](graph_timeline.png)
 </left>
 
@@ -26,7 +25,7 @@ TODO(xpan): Show the image correctly in github.
 # With op view, it shows you the aggregated output tensor bytes of each
 # operation type.
 tfprof> op -select bytes -order_by bytes
-node name | output bytes
+node name | requested bytes
 Identity                   32515.37MB (100.00%, 27.02%)
 FusedBatchNormGrad           10802.14MB (72.98%, 8.98%)
 FusedBatchNorm               10517.52MB (64.01%, 8.74%)
@@ -41,7 +40,7 @@ AddN                           2741.49MB (8.56%, 2.28%)
 
 # With scope view, you can see the operations that outputs largest tensors.
 tfprof> scope -order_by bytes -select bytes -min_bytes 100000000
-node name | output bytes
+node name | requested bytes
 _TFProfRoot (--/120356.38MB)
   tower_3/SepConv2d_2b_3x3/separable_conv2d (346.85MB/854.00MB)
     tower_3/SepConv2d_2b_3x3/separable_conv2d/depthwise (507.15MB/507.15MB)
@@ -61,7 +60,7 @@ _TFProfRoot (--/120356.38MB)
 
 # code view.
 tfprof> code  -max_depth 10 -select bytes -order_by bytes -start_name_regexes .*seq2seq.* -min_bytes 1
-node name | output bytes
+node name | requested bytes
 _TFProfRoot (--/74148.60MB)
   seq2seq_attention.py'>:168:run_filename_from...:none (0B/74148.60MB)
     seq2seq_attention.py'>:33:_run_code_in_main:none (0B/74148.60MB)
diff --git a/tensorflow/core/profiler/internal/advisor/expensive_operation_checker.h b/tensorflow/core/profiler/internal/advisor/expensive_operation_checker.h
index 85b99dc9519..8b4b90b6330 100644
--- a/tensorflow/core/profiler/internal/advisor/expensive_operation_checker.h
+++ b/tensorflow/core/profiler/internal/advisor/expensive_operation_checker.h
@@ -47,8 +47,8 @@ class ExpensiveOperationChecker : public Checker {
       fprintf(stderr, "Missing run_meta for %s\n", name().c_str());
       return;
     }
-    Options opts(3, 0, 1, 0, 0, 0, -1, "micros", {".*"}, {".*"}, {}, {".*"}, {},
-                 false, {"micros", "occurrence"}, "none", {});
+    Options opts(3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, -1, "micros", {".*"}, {".*"},
+                 {}, {".*"}, {}, false, {"micros", "occurrence"}, "none", {});
     const MultiGraphNodeProto root = stats->ShowMultiGraphNode("op", opts);
     if (root.children_size() == 0) {
       return;
@@ -74,8 +74,8 @@ class ExpensiveOperationChecker : public Checker {
       fprintf(stderr, "Missing op_log (code traces) for %s\n", name().c_str());
       return;
     }
-    Options opts(100, 0, 1, 0, 0, 0, -1, "micros", {".*"}, {".*"}, {}, {".*"},
-                 {}, false, {"micros"}, "none", {});
+    Options opts(100, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, -1, "micros", {".*"},
+                 {".*"}, {}, {".*"}, {}, false, {"micros"}, "none", {});
     const MultiGraphNodeProto root = stats->ShowMultiGraphNode("code", opts);
     const MultiGraphNodeProto* node = &root;
     // A trick here is: Usually, codes in library file are usually referenced
@@ -93,8 +93,8 @@ class ExpensiveOperationChecker : public Checker {
   }
 
   void CheckScopeView(const TFStats* stats) {
-    Options opts(100, 0, 100, 0, 0, 0, -1, "micros", {".*"}, {".*"}, {}, {".*"},
-                 {}, false, {"micros"}, "none", {});
+    Options opts(100, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, -1, "micros", {".*"},
+                 {".*"}, {}, {".*"}, {}, false, {"micros"}, "none", {});
     const GraphNodeProto root = stats->ShowGraphNode("scope", opts);
     if (root.children_size() == 0) {
       return;
diff --git a/tensorflow/core/profiler/internal/testdata/ckpt.data-00000-of-00001 b/tensorflow/core/profiler/internal/testdata/ckpt.data-00000-of-00001
index 045063943f9711d25028437e1d3d38cc40674cf9..067f866c8227b9ce0561429ef61eed6337467545 100644
GIT binary patch
literal 1804
zcmeYj<k|J?QP!^a2X5_D^t@^5wcN*QO|Q@{N7k~Pi}QIb`)+^TrSjF+>TkV))d@Fk
ztNh3!E3enfc7FJBa_0u?Wjki=eq=7R;JDS{%_}VC`%CPy5^1muU-@+B8L8(xB+lAf
zN%b$><?&J7YO&eIUB+kIc1+Biy{oGI|IQ|Behcn(=XYH>dB>{j%O$J5nX<do%CxLL
zN=(>w{z%L&j_D%1m(Fe7#hyB8*P)FYcO`a}Zdd(fu)WW;aM$!JM%L03!YxWlgm!;u
zZ{FGZ;?1tEOOm_U&rI3Hy=d~T)hw~QHrMy>P|&Ei^wODQwQKikE1fV;tNE*T?Kozp
zvddax%8rAQ0=xei2=0!SKDTqbsk)_sf9kGDENgcCdb4pC%k0;?I+9y<=~qA8VX;kd
zmxI;bo!ZR3JAYS*?i3Cbw^C{0u{sjA%*yoof?fV5;ybp)$XRO1^jh_;nqjrEY~QXQ
zD@rUEo(kUSwBnW3!PQ5stUsjha_YEd$?f&ioOvzFu5`~M+qoqV@7$)HWW}oR#7g=>
z_0HvddRB|)ZrF9Ee!A76?SfVx7e2ST^OSwp&!QG9t@Z4d*DVWnaaL{Fb?eCST{Gs*
zv<MUN*jbRKwsT%e(T<%`23A`NPVCY@GSlj4-pQSd=RUW}Xt&$hzVYKu&ixuz;h)_t
z0`gjRaamln%02nTYK2g<)#>%~cQppCwXzf6Xtn7~x7DmowwBl8&sgo<W@NESq~6k|
z;^D4^?WsHOPnvDDX04{RsGEk>MUAz)7REi?WwIsHN@L@+UDEe_cRtmgU{$(ep5-?U
zL#wIcldaAqv0KTWUT3wtZjNPy`+lnpofmdFZVTI$^YD*_oScHy=go_(&NO&fm1ZBY
ztPnf0%Q0oyPEmo|JOBQc+qHFR*RBKBtX670X}fGcuHJd{YyB?W6E;>|pO~#v<PPn6
zcw?94?(3RXeTfHm9k~2wXUqHOo&JoAc6n^E-t}umyyaR=&RvOnBP<VW>)%y&Lf+c$
zZ^useoolU><NodXbbxu+=a$_TK3YbWI=QO5{!Ux6^G@YAi+R(J?X*4e-^%;k!JVD=
zlC3uPM_6`c?zVb!@0{iD*t?eJ42rETikz}4owmfPVUORgyK{xCE^ZXswO)Cid0XHV
ztD+o6Yk^CPcX<{j?YcC7xn=A7$-9!C?zH;Qdc?9PLwEZ-W941bSQc6ydUky0#eRKD
z=j?Nq;;y1Q+qZM>T*~%i=iD1-cdf{`+j(Pks+CT$iRG7lhE@lIg{=NBH`}%L!7t0W
z{M9=yII`_j7Ja)zC3x>nTNx{><j-DKVTI{C|F~)Hy4BORW4r!FE7tjZR)wh-cl}yW
zu$}GY>78?~x9<|$?Y2u(b){wQVTN7p?8duVRIl#bS2f%6P0!R_+dlJ}AJf0O>za47
zRWS2Es~2UumNN>(t#qbs+Np1}+2W}B%w6^`<*n_0D(-%@X_2{Nl<=<3-aR|g{ip5{
zcTV4VbPo6KGkY%Vdbw!B&XltUcYUc;+7<Yxe8+7OPAh{`%eGG|ma&+qBV;9!w$n<#
zw`bQ`m1)~6ryjIY`uAbm&h=WmmTb9Tx#@i5u1$*~tq#^-+qvmUn$@8XS-T$pUS=+`
z`HhveT9;Mew@<q`K5e%=uzQWA^{l*I3|ul+v(I(xn#&Qo%iGpySEu$(i|f-ItxBe8
z?)?A4W4pqsZY!s19jg^OWxI`?KJGlheR${f&V?39tsy(zn;R`<I2(3F^|9^R(_y-+
zRw=^DQN_n9@X5bjA`HD&pY@Whu1$MvmHE=$>Y>-3T_w93cm1B5XSLo##Y#Eb$*Swt
zAB#JAJv-AYBCR&O&fV24`eWyb%9NeeH@8{kiI?o~IFz?DfNP<Z**PvN1EX&{6vDN3
zsz2=9*{Jl~GJCTAPNqo*cedUCZ@Ef$ou$R&*E_alN$)xo+H4g#^~}zve`>7`ZsxFx
zIlN@2&!+fYZIy~UcV6M%shXH!WqVq8=j>Ojt(qtA*vY1PVpp>U!*1y;^IZ!pPFwA~
z-)zzGNqX1xzgMg>cg)>&_s9mT*hGFS=PA3bKIA^#sW<ES&e-+SEghzRu&jK|xU0KZ
z&q}@C*DBvwZD*xOv6cC*nY;8P=4^L0+Py37)RUcxBE`Gn!e?0&oR`@papS?x7Yk-u
z9T%Hwm9b50mvh1ttB7-}cYQ0GxAWPRR*R`Tc~(MtR=YBagLWN#FwrWAch$}<p|LxE
z-+pPSxq)++gqx=&S8}|?@5M%ztc~neXH*<4mWtoEvfC_gX)<HhF6Cv_R_Ek*Sh?*f
zwMb8Rux;wZ(>tGQX<6E8HSL=GpkWv1EQZ}4E1z1Pt)IP9<>Ty~vJ2d-F4Rg{>9Ols
zxux-0T@Y-tniev3r@;(GtDQ4Dc8Shs*fm!w)2jH3$}Xk1vX*zgCRnYKE4JuQ-)glo
tQQmq-;^Cdv&!Tq9H!ZYEOy{+FzEyTtrON3Yns3{ze)6jA>fOg;4FF^Fd{h7c

literal 1480
zcmZR;0%W#6-|KLz)o#s~7`q;oa=TTRp6~s&H*(*)3p@8ROxS8uQWS3|Fe}4OYwo?h
z85_&&?tSLncfe9)-+_6ud(PZBX3Nl`Yn`&Ud!NGI5StB>_O{aNv-a^g%i2wI*WI@-
z{rO&%wLx||EG>2+f_H2yo`>w4bx7Jy&9~34s8-ajfotpDIc?f@I?FHG-r$e3RrtnX
z%NM9@m(WmS_uyEKP08v_wx#=<_Zf8h*e(#eWfO3Bt8K>4MSB~xt9PHX*4($i^`6a^
z@~O5WbA9b*#GbXi#52h*QpjK*L;K7<Rlnl*J&4J-lbCbg_R6JNyPK?O`!1|JV0*y*
zru910*?T|xTkPWq>fQHiPph5AnYO)F%ro~Id|kaaifiTG8Pj^Q%`}C)2ACu(f
zy=A{G_c_fh+!x@n!e+};Te}U5d-n<)vDiC*uI#=8PoC^?sg<w`SUq`9XQIeHgGu7H
zA*<)uZm=w{z2%a!Pay2D%@3KIdu#kJ?!BO`yl=tLeS0qaeqcM#MsVMsXFPTX%>V73
z_B(W6MBHrKfTwZ$j>P)yYmoBTr}@!#UxS~swcNkHeLEic*m(pT+sk^jVxNRSsom7C
zh5KB(b?w$|W3+3SW@Yyw_W2&GV-xIZTQ&FX2yWhc<Cfh%jbm)Kv-G>|ro^qaO=-Ei
z*N3gvZlAxaT^^5tovOOSK8Ga@cDsDL?G`yY+uc~4XjlB9X`cxDW7{JtVs^)lYuoX3
z*V|!9E({Fy_jk3u<+VF$p0-cz@Y=oimgw2-G8MPW*^#+d>>v9+RTYDMZky}vUIk0q
z6}%9&&Ens?H+ie+zBPP$`&dd1_c_Qu+AEYRW_RGWwk?ODg6*ocJo~Qgi?mbl?A-f=
zW9r@m9gp_hD~h*E2rRJ`*kix<g5KFZx6TIaTb!A{@6<gmy90Gr`=r&I?Cu;|Zrfol
zw(sIm-+dcA8utCUmcH-AWF6appTf3IqK^A+6mHwI!k2lU`d#sT4bOP?u}st2D^i!e
zcfpfiwgPclwrtm`>>BPDTPyhR?|W<7V8>}vvd`)3q`d)m1ok|+$7Q#{$7}CWlP0?>
zM=kfoXusaedBR|y14qB@qlp{$%Ju8m-F_>*Pd6ZEpR-NVKC93-dkuWFZO^3o?6Y9f
z-e<Dge&2#63vAfx{OtTph4=k1(zcsr%d)R*ZMNN&jbHbk>G-+#fuqx&2GLhG7j%pF
zdSx5$TQxIc9}lm#-M*gKeFywE?@1D>+Go&YZ0DiaX6y4_d#^~-<-Gxi*6z(wS-Y2^
z!C<#Vwdg)S*SLKwFZS<UR8nYH@NL^(2A)W(2Ti>;*_ZS8&DpYXZ^d0UyFD?Sb|=)-
z_N}_evd<xCj_rhl3U+n@iFQrK7W;B$bl6_XinRN1`^er4H=gb_`dDvw;prRO1oN|d
zB~~)n@hUX$6A(XRn-ZjL=dkeU?tNd%?F{&~+D_Sj+*V@3-#se2x_dvwhS)tQd1;&Y
zrP0oA{rbHPZmD(=hc4M_C|<VxcgS>~MepT35t~`}<y=wT$8-JsUWT=^Y}l$*>^N%d
zZCO0r?A#{p+FKB8Vw-%ZY2QSqvV9$%QTt{Yo7mkradyv!z=O8h&pr0N2vE0sFwtPo
ziRgyCk5p>*8BA}t_4)qIwu+_L?!ue3wvQ4z_C7ctXglYq@V-WY!hJfu>3hHI-D3M!
zDt6zho16A->g?UykS()YB23&a<H5hZzrF?9O(-kbtMK8r&5v2@Z5NpA-5a*!$X<a@
zx^_Rl=-N%uW7*5)^nI`0UGIHiJ(hM0M1^g*E;sLUInT9E(@}C?fQ!-|rphF{f`6>G
z9?ul^dTg%VE7w|M_u{<%zTy(8ebsk)_sMrv?c4Hsp{?DCNV@{91-79YX8U5EYVNBw
z)Y!M?yN2D4*iyR%cjNZWmrLCDWcf<lhD;;tgh?!WU3i-Jnf)uWOPjfUZ_VfOy#cnl
Wb}S43?wxRRy{*NH9J?1;p0)s(8?x>I

diff --git a/tensorflow/core/profiler/internal/testdata/ckpt.index b/tensorflow/core/profiler/internal/testdata/ckpt.index
index 908198167da66769efd31c90ef7c63ab0fe1a91b..2097de8da2ed1d268ab0835d37f62bd70df59064 100644
GIT binary patch
literal 194
zcmZQzVB=tvV&Y(AV3KhO=U@~P5MttBM#5|w9jvBva}`cAG0Gc3<(Y(-I6#<9h>3$o
zp@UVUfz$N7aX~u+J6~{eVoqWa$RGxV4n7SQQ|0Bel^7Tp7#Kmslic~HAR5d$@M+-&
g21ce3hFrLi;34BVMI7~fAOS)+{C7jQN~!y80HmiIGXMYp

literal 239
zcmZQzVB=tvV&Y(A;O0-x&nq)Z(ND@uEaqSoVi98EVAT*YO-f<#XJ?bkPA$qy&Ea4a
z5)fkIU`E2M3L+YFSWNrXb;Q`%h2jlS^eb#&ff~WdCXdw!R**FcM_4pk*i8?J|8-|z
zU|?Vb5wZ3U4})kh=fJ0h8yFawQy6A~xnSbOy5e2)7&%rm@`IG&$Kk&lx>ZWuZvz1S
C-6oCz

diff --git a/tensorflow/core/profiler/internal/testdata/ckpt.meta b/tensorflow/core/profiler/internal/testdata/ckpt.meta
index 94fe29ad5c8ca902e5173b68fa599e7e1ba9ab7e..b907e4ab508769596bb81bbfd4cf6fe7ddebe15e 100644
GIT binary patch
literal 8435
zcmd;@$05|s!DY$C?3j`w#Ky%~p~M&hqAQgcL!{Wa7^{>RLzJYr7(;|uN-7Idt@yck
zjktvwnON9ZnK?Ox*tsgW*c^+CGt={gxVe~%Qqz<eLmC(vg}As_%Mx=+Q$fZEadG9B
zmK2nh#KYy4IN?SrNpbO&CFW$NB$lMc7iT0EqzbVl<>%)}F=;R=iF0w67N^GN<R@om
z=A|PfW-u~xS#YsB=jRodNbz&ALCjZTO#wMli63gY5L-!VUU7br5-%4-2I2{>6O3GJ
z&iQ#|MlK+CX66-?mVms?&Bd0MnNyNl1adSt+)&04B|*4rt=PC&jF^NOl|;DMi%W_!
zQ&NkCICC<KOEfa`N;H*ZxCB7%O)gEz%Zty?i%%~oML1eXiHp4;F(n1$6E=|MymU}_
z3b6z``nn3ShB^9px+v*#ai=7fB*v%Z7v(0F2tn0Ju?eyGd4xM#@p17$SVAm*&K}`h
zdR!cyDXDoSnI)CTevv|PJ|eKWY`B<xOLIUmOngiWf}?r@BO@0Z7mHuMe*squBbP`}
zVqQvqZg5FrUP@w7iXX_iIUpy1qYxB=SfYicI5jmzh&eN_M2bm*K}mp%6(nGU5D`O(
zC@Xd@RwHI%Mkb61VCL*#WaP@{;tWbHF3B%S4Kotr;$kZ(N=?hGkYE=Q;NpSAe0*MF
zZfdavyO0Q%0N7#iiFqmU#W|VDAW<nnE_R5zVk0(i(kWJw;DWM*xWLg0N>G{_j0TJ#
z3v;;Gf)mTgv`vUq+lslk!V-%z6O(dM!;GZ(A=wL(kQ*2omH4<=!Cn%AP-ux<Nt}x_
zIX|x?F*7f<2%e0Xgcy`0xwwln5{pt(z=4A-)xpT9#cHT$q-SWvUYeJkmtUSI)MCz+
z#l>2cT9jWb1j%n&s<7-O<0QqwC?p`n#K9!Q#K8u|%vvlA3=9lf+_1Q0GGNqF<Kl~l
zWKu{TFBTHv60*Y3%GJimr5>D|n3Gr(uJ4(bS(2HUlUbEoq+bM1ZSi^FWTp@GH#qwv
z+rz9Sfng6D7mF2>5QCN=%+(xDY#hu&AR#F(w)o_n#NuKh0WLlvo}Bz-D+j3MT<wfp
zYEU*FhvcRv=D~tR6lMXB6Bh@g5Cb$A@K}QEbVe;P45zbjF<CKibuw~k5Ozs%NlHp;
z8E&`a3pHD5QS6pmj9g}f-Ga4f5WyxYMQmh)f(lYrFi9|IadAOP5)hjmR5mjiFyr+c
zj<5xp3eNXJOa_cxry03a341QLG)D+jhDwnc5K=@(rWVv|jO2KYYYQWnB0=8>f$DuJ
z;(RSdWPYH`+3k#6>`-S5Ar*fpF{Q<iT(*I7GB>O&WU^u)C$zK#Q42*TAqK8hj9h$B
zm*_*AIMAk!6wHNEghLn+8X_3&93~C?Q7eer`T;4&k&wCKxOiZ0ElN#H5rVf0;BLiL
zz91Zkr*z<&#K@)OLSB(9g;B0?aIsmj2r+T6pjIW!LSR890cHjU1{h`oRVAYEf|{F)
zQ-~d0P`iY4)p9A~brTV#Gj@|vQyZ=@ujf+6Ydva_j8YxqFj5P%4gr_j*j>qWn~}>9
zuM4mhxkA_rVj^M}HB>-ZE;Tj9NC;F2V=a)eyBbeZfLVwMR5ODy8`p70E(N^4gO|jl
z2Z9taF^9+*6#8TXBbN+5pFm1sf}WKkoZ7W0b1B@%Tyq(@m|Vh1D1oJ6B`~B_hZ2>z
zT!mTa5}i1fF>-Obgrk?sOfKP4ctctXdm)S?)UcGlxZAO)K897@T=87&F5&uUWipdX
zI6SCv1rCumajigfW}%&0DM)i&3T83FDq%#Ym4k~-iiMGhiBU@wt;Y%JU*d@gP-7qF
zTQ)5*EL~nuO(lrb*@bj*xu!6JN=zd>?P-*TJxV_VUO6x^Fff2HsAAv&RSshC%7KTA
zONawnIT#_AR-ncqdjA7n331hfsy2MCBBCB4!V$Pz_>GKQDlXypT!31Y;HV`ia>G4F
zE+hPIAfYl*#%~fhma#YDarpsvZUCi55C(NWd5|NW5yXM_89=RjMf}c!*C%8IgcMN`
zNFY-oya6jdv~Yw6sC6%k*Ef(lg|LsMl<*p(4@&!p0tAP1i1ioOd`2#2mvAGLdIh5~
zk5-j{LIs2&5lo>Aur%~R$pp~}T*b)61qu<g7C*?<Qusq05lYxA8C)(Wsg?l+1PFrz
zgDZ)P1LRh;+6ClRgkx}7OJt7XTEoc20U0cfHxz=^FfbZ!J;FZf)iEG{f-ny$b&MGo
zOJ-hXi4b_8Qi?wg)+>U<rxaHlywxDZ9R~_Lhy+&v7gup&S*kvGObIrOA_g1M;dSEX
zU>9O=V$ID@NzFk{G3>bdRa}K!5?~V{BZm5rfkyC{8+;@UsZYSJg}a}`jM`6P2ltbh
zotRw0oj`Hz1nZ7+Wpc?NTL~^_kw!dW-jalQOTdYb17;e76N3}VpdDTpan*5gBUvQ`
z9T$~C3PdTK-j~AbN-0n)4&*OH*HsWcC<`7DW@KdKs^t;~+YcJ!EXvP`Pf0CE%}Ytm
zORhxf`y#uBCl0J05*x7O%C02~Z#@We2@3IpTMuA`;DmrPa?`k^!K&cn&sgJE92U@g
zPCU?nW&}kWY68L?@r+y<T=Ho4;EGg~Mgb&J8HtZjMy?5rTzsfj2_Z(;QQ{S!&!q@C
zSBoDWm#kdOMvRQOhHNDmxn?m!6B#6r3c-e7KnVoRt*{7Xq*n`{E0aqWHMrspv8DkD
zSQ_AW;)SOHQ1T`|5oB>GVpv71WH619O90(6TnPbx07wybG%1N;0V5YL*kO>w0IG~2
zEGbZ6N?|yfP>R4+FH%q)=VBxYBdke+$YP&SO9qy9gq;N8X$M}~V<rnD$jk_qBA?RI
z0ZXzlA~so|1_DO1z;rb3A^{`8aLqtV7Dn(=0v1jfNy7+78G&c~6Vz-3P3-1CC!#?e
zTwG&#u(msw8#E^sLFOpq6LWH)%@Sx`1WmtukeUg?i#L=)<r^t6aY#tHaPfc>Ke$%1
zG7ypk^DrwvD+8!^kt|hUHDz#+akF4js^;P>gU%=x3*`yraA|XKLK{d{2100~-BNHq
zsC{W=z$L@Q;u3CUAcPorkpeNHvhrN4F5yO220}>9XDKinqClyYkxRIwC^0h+G@%(k
HVD10_#A@K`

literal 11285
zcmd<O#VNFjlgpBe*)b(Wh>eS}LWwa1L{};?hDfn-F;*!thA2sKF@^}SlvEa^TJdx7
z8gUCVGO@6+GIMeWv2#^$u{jnOXQt-~adR;jrKTw{hBPoT3UP6<mL=wtrh<$S;^N9L
zEh#81iHFN8al(yMlH%ekOU%hkNi0c?FV09TNEKp9%FoY{V$xt#66fMBEl!Qk$xqJC
z%u7c|%wS~Xs^(&M%1kT<IULD0Ax<uqq|C%(kj30wY!Hh<o{)vxXeGoYV8qAB#L3Je
z$jZhc#LdGitfb4uosw9R7@wA3l$%&0#8zBVl$n<<#U{k!=MnB~#mB`1VF|JLIeUb2
zS#YsB=jRodNb$q%U`+vqkP<)C=R#~Hsd>fuMM}I}5E)1aa-C%4Vsp;VD>HHd1yW{S
zL1_sn%(=PP(lT>OQj0JgE`)G6I~S`FvoIr*k_Z<&$bl)T#X_7pnZ+d<nRz9eN-|sm
zpwLP#P07oP&(DibFDOL>nUWG0dqHAK3Md?*z5~Uk5KFM5ud5Jin4^!U3#p-?$Hn29
zlA2eNSyGAY87Y*2M?^K34HvU-X$~lT5ueTk!3k{wBO@0Z7mHuMe*sr57jIBvUP^v$
zXkI2Lx<FQg6Amcou*3&TacXLc5OZc;i4>CrgOUIjD@ecyAtHtnLssxm7DR+R8yAZa
z6EkNABO_N17iUmvaY=qrYM7A_7Z+PWQEFOdg#^2h02dD=%H#7Ab5n~Y*o8#61i%i9
zPs~e+FV4wK28l`ua<N0y6&taE(@e3F1Q(Pg#08E{P)gF&U^HOl%H(1TPAntSDj|}r
zGUZ|pE=|H2J7S2?;O62qVi#s)0u=#V#avuriA9--Nja%uMpFEcQUH?V8yFdt__$cX
zVITyd(6WP)I2UJfeqKppW?pI$Jl8P^F(^rLaTjMK7Nw?uLlar5gOO2-)lkns&%j8g
z3{2}5B^&7)n(C&Tm?tGAni=b+WEPcF3LW+2%Hm?JN-fGS7J_6~EpAv;GZ`>yslu|V
zjFS`xqmY0Q69<zJ69)$rGi$LhFfcG^sd4eeL&_0I`BW?<!X;#dp_*$YBbQz>C}*eW
zXQvkBrRL~+=4F;-Cgx;Tr55QIfs<@}DJ0G6L&F4Ih#<R)SxbS7Ej~FXvA9@Bj7vmF
zC?`MJ$^pe#RChBAu?R77uqX*IGcYi~Fe}L2QW);$;9|1^tLB=_$fbi~C_cC5X69iH
zKf*2*g@rJW6Bh@g5Q7$r)!E7Ww8Su6#=^y9#XzE~5-X_bDm$F6`oPF#Ms!%AmQ1Jx
zl@y7|15e6h7GmOnVOHey!wBL)$~Ptn1}!cwNa+V+vx71)lK~MajO!99$)mV5NeEPO
zNRg1D5-UhiotZ~Ou7G-v5obKUA;nX<r8z>N%1VmNs3*aD#idE4cn^1R1WOPgnw9Gw
zBbNrT$yNx|{v*N3xurRzs3tA`i1pxRMlN0y4+<ewL|XjF6$_IPzG55{UNDT7nWSO0
z7o@<4sRGfgL`0e(Y9+=b#K3i#kxK-{)%wu(GqfEoh3Y3MqT^6Y1fwC%q`^p>BSFnZ
zSR)&x37(gTjf)0GE+JH36{RMo2*I0&XucxCRU|v1i;+tf>I6^+0Hs0$SF>1a3psdg
zE5aotB!FDof=tnpgw<dIPJA4U;JTI-+AhFdzp`>oXXN5Wat!{IiN_#NX%2293V|9d
z{IJvrsm58g2)Q0h`rC$xDUb)zQy;RcrSOL*js!=9n{XyKJR!lA$0dLiqWWm*3)$TS
zg9V<p@Yz?$B>}Y$MCIp{mSpDV=?8;*sF<xuW@@y`3b_<Ob{VDUgD9AlDVaHmAiLv>
z5=&CCBw}VQSy&<!aT4NyGzgiLI2aii7(iGHw=bEv4j_^PlAS`(E~*ry^(=)YZ{um4
zfEv+kLQEVW%&H}d*0YB6qqW4abXGw{j}W4p$`0+8Y6&B{yBu6>QY?&&jEq{aNaSi~
z<l+Ol-6%yLHXr~S8Gt(+Bo7TwDU4)<%ZV@>i1jMhT1GAdkWEG@@rD%j0hHjSk_4BS
zkO)$#1UHyKdIR+UKp0Qtvw%t|Xp<CfEK0G7)@Q-h4<N?1m_31|MlHB|0@LArDdJp)
zqc1?V%W(GwzA<uHkPyds`vW8=7Cb2t)Fc66Rv{)1v_=d!Y{Uc93&7eVAR?T&Zjlxx
z@LmB)DJ-!9)=45R-ZS&Gh;Tc+tiahf_(GcJ;JpK~lLATJ1oaR|@+RSC8#tM8J!9n3
zB0e!gdI==OXK4;e9?c~+K8f|{c1A9KG>_tMlM!h!YWoaFZvh<EM5R+K?X}yCTw-W0
zM{l#C`%H?2IK|P3BPO*8q7Kf3DsLicOi&pG!r(yRYJ>NN5cwUgWryxNq5_g+hx9RW
z$-{#g+%QHSyu;mpz&!>QZ>SG4huj{-Y<TY$Y7qYPio1=6RvBRQAqe{bORC(59B&|R
zqNhn{FBa-{DMDe3BN-CqHnMsXC9wV~)P?B%2~_vv>90cdBdQoYF~L>LB?V1nMkq~u
za(fk^79I%WZRHhnDPgsgru~Yuhy;S-Rd~+=t;sG09TY(=2Jp0ZK%ETmST?x5L-S5X
zFC(n`8E=Hy&44)`#jCK+2Q-C%Qy7I^4Y-H70=T$}6U$Qdag|K$T4JzHA+HlR2fGl1
z6KigMN@@<yCeUm~E(x%akohKk$Q%`T_66JY3A>g*%mOzj7Y=st&<KkYazES&nu(xY
zAs81L9!?kqJMNsr!j;V>gX}o)^a;|m8kTM`yOuP}fkI9Ka0fCtfg#=;vyzdE8_6~y
z=oFk3QjkmG3@a(T4wXU<Ahc027#A8qQWyaw1fM44;9@fZcZ0cVxx~Rv1<j%r<>$nw
zq!y&+rKIL1S0atRAv>HW4y+y$%UZJVo~AIDpb$T}rwNt^CoEX021-rnlVB|1DVuyQ
zX|QtmlqlhJqX3H*aVJq|v@oKi8$5Bt$d$n*k7glJX#u7G2}!JsP7HW$n!w1#hia7&
zV&)emE#M0RDMHTG;)lm7D;Kj7BcqlC#`G8)7mF2>1S8i1Si(dbGe${NXpY8FwbE*s
zu9!;}HPGS>2`339Sdx%*5{D-V)Y1cQRAq4~VpvM6WHF7AO90(6TnPhzP)HGWG%1N=
z3nLdV*kO>w5pO7jV@d(T`8bjY?$#HyMAxFM3jpp0aA77GBgzsCwD=@3!5BfN<O$U&
zWRxRV5{wbC2?jMNFcJ)=qe)3HE6@^*5qg4whNKjFf-xdF!9Y#In_zHP|Eyf47^R9a
z;Y6YWD^;YOB;lnBMx~0|T1GC+a>baQ$;6o0WP%zP7|8_F(WE4k9canK7@kbf!xAH*
z7~@DN^cr3WgBI0AkOf=ui8(n!;6-Fo&{hznsFdP^w22_RctbS45gOl^D}#$AGcU6Q
zYL;jmq<;l16`@^WDPfo}QvX;=94d(D=Rho#f*AtMDNsX{m^ehGT)23^<vO@YW@R8G
z3Fcuo_^b?|5rE`f1y)lA2N^dDCZ%2`F3vLOLakz<|BOO^7`bd=qhe5}TNwyJyE0G#
zs54-_Gngo7h{no*%N#xd5Avdwfe>t-2*ibHMG=E)bmkI-1Q|RStPF&340B0gh(is5
sJ02Qj2**POU@m}q2__2Bt~8yAOSq&cF*6UeaxH!|WD<+PJya_L04VlPO#lD@

diff --git a/tensorflow/core/profiler/internal/testdata/graph.pbtxt b/tensorflow/core/profiler/internal/testdata/graph.pbtxt
index e6fae2c4cf6..62bba4a7bf6 100644
--- a/tensorflow/core/profiler/internal/testdata/graph.pbtxt
+++ b/tensorflow/core/profiler/internal/testdata/graph.pbtxt
@@ -1,6 +1,27 @@
 node {
   name: "zeros"
   op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
   attr {
     key: "dtype"
     value {
@@ -17,10 +38,10 @@ node {
             size: 2
           }
           dim {
-            size: 8
+            size: 6
           }
           dim {
-            size: 8
+            size: 6
           }
           dim {
             size: 3
@@ -32,13 +53,24 @@ node {
   }
 }
 node {
-  name: "conv2d/kernel/Initializer/random_uniform/shape"
+  name: "ScalarW/Initializer/random_normal/shape"
   op: "Const"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+          }
+        }
       }
     }
   }
@@ -55,22 +87,29 @@ node {
         dtype: DT_INT32
         tensor_shape {
           dim {
-            size: 4
           }
         }
-        tensor_content: "\003\000\000\000\003\000\000\000\003\000\000\000\005\000\000\000"
       }
     }
   }
 }
 node {
-  name: "conv2d/kernel/Initializer/random_uniform/min"
+  name: "ScalarW/Initializer/random_normal/mean"
   op: "Const"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
@@ -87,19 +126,28 @@ node {
         dtype: DT_FLOAT
         tensor_shape {
         }
-        float_val: -0.288675129414
+        float_val: 0.0
       }
     }
   }
 }
 node {
-  name: "conv2d/kernel/Initializer/random_uniform/max"
+  name: "ScalarW/Initializer/random_normal/stddev"
   op: "Const"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
@@ -116,15 +164,15 @@ node {
         dtype: DT_FLOAT
         tensor_shape {
         }
-        float_val: 0.288675129414
+        float_val: 0.0010000000475
       }
     }
   }
 }
 node {
-  name: "conv2d/kernel/Initializer/random_uniform/RandomUniform"
-  op: "RandomUniform"
-  input: "conv2d/kernel/Initializer/random_uniform/shape"
+  name: "ScalarW/Initializer/random_normal/RandomStandardNormal"
+  op: "RandomStandardNormal"
+  input: "ScalarW/Initializer/random_normal/shape"
   attr {
     key: "T"
     value {
@@ -135,7 +183,16 @@ node {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
@@ -159,30 +216,10 @@ node {
   }
 }
 node {
-  name: "conv2d/kernel/Initializer/random_uniform/sub"
-  op: "Sub"
-  input: "conv2d/kernel/Initializer/random_uniform/max"
-  input: "conv2d/kernel/Initializer/random_uniform/min"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d/kernel"
-      }
-    }
-  }
-}
-node {
-  name: "conv2d/kernel/Initializer/random_uniform/mul"
+  name: "ScalarW/Initializer/random_normal/mul"
   op: "Mul"
-  input: "conv2d/kernel/Initializer/random_uniform/RandomUniform"
-  input: "conv2d/kernel/Initializer/random_uniform/sub"
+  input: "ScalarW/Initializer/random_normal/RandomStandardNormal"
+  input: "ScalarW/Initializer/random_normal/stddev"
   attr {
     key: "T"
     value {
@@ -193,16 +230,25 @@ node {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
 }
 node {
-  name: "conv2d/kernel/Initializer/random_uniform"
+  name: "ScalarW/Initializer/random_normal"
   op: "Add"
-  input: "conv2d/kernel/Initializer/random_uniform/mul"
-  input: "conv2d/kernel/Initializer/random_uniform/min"
+  input: "ScalarW/Initializer/random_normal/mul"
+  input: "ScalarW/Initializer/random_normal/mean"
   attr {
     key: "T"
     value {
@@ -213,19 +259,37 @@ node {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
 }
 node {
-  name: "conv2d/kernel"
+  name: "ScalarW"
   op: "VariableV2"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
@@ -245,18 +309,6 @@ node {
     key: "shape"
     value {
       shape {
-        dim {
-          size: 3
-        }
-        dim {
-          size: 3
-        }
-        dim {
-          size: 3
-        }
-        dim {
-          size: 5
-        }
       }
     }
   }
@@ -268,10 +320,10 @@ node {
   }
 }
 node {
-  name: "conv2d/kernel/Assign"
+  name: "ScalarW/Assign"
   op: "Assign"
-  input: "conv2d/kernel"
-  input: "conv2d/kernel/Initializer/random_uniform"
+  input: "ScalarW"
+  input: "ScalarW/Initializer/random_normal"
   attr {
     key: "T"
     value {
@@ -282,7 +334,16 @@ node {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
@@ -300,9 +361,9 @@ node {
   }
 }
 node {
-  name: "conv2d/kernel/read"
+  name: "ScalarW/read"
   op: "Identity"
-  input: "conv2d/kernel"
+  input: "ScalarW"
   attr {
     key: "T"
     value {
@@ -313,137 +374,43 @@ node {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/kernel"
+        s: "loc:@ScalarW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
 }
 node {
-  name: "conv2d/bias/Initializer/Const"
+  name: "DW/Initializer/random_normal/shape"
   op: "Const"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d/bias"
+        s: "loc:@DW"
       }
     }
   }
   attr {
-    key: "dtype"
+    key: "_output_shapes"
     value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_FLOAT
-        tensor_shape {
+      list {
+        shape {
           dim {
-            size: 5
+            size: 4
           }
         }
-        float_val: 0.0
       }
     }
   }
-}
-node {
-  name: "conv2d/bias"
-  op: "VariableV2"
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d/bias"
-      }
-    }
-  }
-  attr {
-    key: "container"
-    value {
-      s: ""
-    }
-  }
-  attr {
-    key: "dtype"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "shape"
-    value {
-      shape {
-        dim {
-          size: 5
-        }
-      }
-    }
-  }
-  attr {
-    key: "shared_name"
-    value {
-      s: ""
-    }
-  }
-}
-node {
-  name: "conv2d/bias/Assign"
-  op: "Assign"
-  input: "conv2d/bias"
-  input: "conv2d/bias/Initializer/Const"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d/bias"
-      }
-    }
-  }
-  attr {
-    key: "use_locking"
-    value {
-      b: true
-    }
-  }
-  attr {
-    key: "validate_shape"
-    value {
-      b: true
-    }
-  }
-}
-node {
-  name: "conv2d/bias/read"
-  op: "Identity"
-  input: "conv2d/bias"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d/bias"
-      }
-    }
-  }
-}
-node {
-  name: "conv2d/convolution/Shape"
-  op: "Const"
   attr {
     key: "dtype"
     value {
@@ -460,46 +427,422 @@ node {
             size: 4
           }
         }
-        tensor_content: "\003\000\000\000\003\000\000\000\003\000\000\000\005\000\000\000"
+        tensor_content: "\003\000\000\000\003\000\000\000\003\000\000\000\006\000\000\000"
       }
     }
   }
 }
 node {
-  name: "conv2d/convolution/dilation_rate"
+  name: "DW/Initializer/random_normal/mean"
   op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
+      }
+    }
+  }
   attr {
     key: "dtype"
     value {
-      type: DT_INT32
+      type: DT_FLOAT
     }
   }
   attr {
     key: "value"
     value {
       tensor {
-        dtype: DT_INT32
+        dtype: DT_FLOAT
         tensor_shape {
-          dim {
-            size: 2
-          }
         }
-        tensor_content: "\001\000\000\000\001\000\000\000"
+        float_val: 0.0
       }
     }
   }
 }
 node {
-  name: "conv2d/convolution"
+  name: "DW/Initializer/random_normal/stddev"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal/RandomStandardNormal"
+  op: "RandomStandardNormal"
+  input: "DW/Initializer/random_normal/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 6
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal/mul"
+  op: "Mul"
+  input: "DW/Initializer/random_normal/RandomStandardNormal"
+  input: "DW/Initializer/random_normal/stddev"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 6
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "DW/Initializer/random_normal"
+  op: "Add"
+  input: "DW/Initializer/random_normal/mul"
+  input: "DW/Initializer/random_normal/mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 6
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "DW"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 6
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 3
+        }
+        dim {
+          size: 3
+        }
+        dim {
+          size: 6
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "DW/Assign"
+  op: "Assign"
+  input: "DW"
+  input: "DW/Initializer/random_normal"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 6
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "DW/read"
+  op: "Identity"
+  input: "DW"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 6
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Conv2D"
   op: "Conv2D"
   input: "zeros"
-  input: "conv2d/kernel/read"
+  input: "DW/read"
   attr {
     key: "T"
     value {
       type: DT_FLOAT
     }
   }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 6
+          }
+        }
+      }
+    }
+  }
   attr {
     key: "data_format"
     value {
@@ -509,7 +852,7 @@ node {
   attr {
     key: "padding"
     value {
-      s: "VALID"
+      s: "SAME"
     }
   }
   attr {
@@ -517,8 +860,8 @@ node {
     value {
       list {
         i: 1
-        i: 1
-        i: 1
+        i: 2
+        i: 2
         i: 1
       }
     }
@@ -531,31 +874,25 @@ node {
   }
 }
 node {
-  name: "conv2d/BiasAdd"
-  op: "BiasAdd"
-  input: "conv2d/convolution"
-  input: "conv2d/bias/read"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "data_format"
-    value {
-      s: "NHWC"
-    }
-  }
-}
-node {
-  name: "conv2d_1/kernel/Initializer/random_uniform/shape"
+  name: "DW2/Initializer/random_normal/shape"
   op: "Const"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d_1/kernel"
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
       }
     }
   }
@@ -575,19 +912,28 @@ node {
             size: 4
           }
         }
-        tensor_content: "\003\000\000\000\003\000\000\000\005\000\000\000\005\000\000\000"
+        tensor_content: "\002\000\000\000\002\000\000\000\006\000\000\000\014\000\000\000"
       }
     }
   }
 }
 node {
-  name: "conv2d_1/kernel/Initializer/random_uniform/min"
+  name: "DW2/Initializer/random_normal/mean"
   op: "Const"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d_1/kernel"
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
@@ -604,19 +950,28 @@ node {
         dtype: DT_FLOAT
         tensor_shape {
         }
-        float_val: -0.25819888711
+        float_val: 0.0
       }
     }
   }
 }
 node {
-  name: "conv2d_1/kernel/Initializer/random_uniform/max"
+  name: "DW2/Initializer/random_normal/stddev"
   op: "Const"
   attr {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d_1/kernel"
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+        }
       }
     }
   }
@@ -633,15 +988,15 @@ node {
         dtype: DT_FLOAT
         tensor_shape {
         }
-        float_val: 0.25819888711
+        float_val: 0.0010000000475
       }
     }
   }
 }
 node {
-  name: "conv2d_1/kernel/Initializer/random_uniform/RandomUniform"
-  op: "RandomUniform"
-  input: "conv2d_1/kernel/Initializer/random_uniform/shape"
+  name: "DW2/Initializer/random_normal/RandomStandardNormal"
+  op: "RandomStandardNormal"
+  input: "DW2/Initializer/random_normal/shape"
   attr {
     key: "T"
     value {
@@ -652,7 +1007,28 @@ node {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d_1/kernel"
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 12
+          }
+        }
       }
     }
   }
@@ -676,30 +1052,10 @@ node {
   }
 }
 node {
-  name: "conv2d_1/kernel/Initializer/random_uniform/sub"
-  op: "Sub"
-  input: "conv2d_1/kernel/Initializer/random_uniform/max"
-  input: "conv2d_1/kernel/Initializer/random_uniform/min"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/kernel"
-      }
-    }
-  }
-}
-node {
-  name: "conv2d_1/kernel/Initializer/random_uniform/mul"
+  name: "DW2/Initializer/random_normal/mul"
   op: "Mul"
-  input: "conv2d_1/kernel/Initializer/random_uniform/RandomUniform"
-  input: "conv2d_1/kernel/Initializer/random_uniform/sub"
+  input: "DW2/Initializer/random_normal/RandomStandardNormal"
+  input: "DW2/Initializer/random_normal/stddev"
   attr {
     key: "T"
     value {
@@ -710,313 +1066,268 @@ node {
     key: "_class"
     value {
       list {
-        s: "loc:@conv2d_1/kernel"
+        s: "loc:@DW2"
       }
     }
   }
-}
-node {
-  name: "conv2d_1/kernel/Initializer/random_uniform"
-  op: "Add"
-  input: "conv2d_1/kernel/Initializer/random_uniform/mul"
-  input: "conv2d_1/kernel/Initializer/random_uniform/min"
   attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
+    key: "_output_shapes"
     value {
       list {
-        s: "loc:@conv2d_1/kernel"
-      }
-    }
-  }
-}
-node {
-  name: "conv2d_1/kernel"
-  op: "VariableV2"
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/kernel"
-      }
-    }
-  }
-  attr {
-    key: "container"
-    value {
-      s: ""
-    }
-  }
-  attr {
-    key: "dtype"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "shape"
-    value {
-      shape {
-        dim {
-          size: 3
-        }
-        dim {
-          size: 3
-        }
-        dim {
-          size: 5
-        }
-        dim {
-          size: 5
-        }
-      }
-    }
-  }
-  attr {
-    key: "shared_name"
-    value {
-      s: ""
-    }
-  }
-}
-node {
-  name: "conv2d_1/kernel/Assign"
-  op: "Assign"
-  input: "conv2d_1/kernel"
-  input: "conv2d_1/kernel/Initializer/random_uniform"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/kernel"
-      }
-    }
-  }
-  attr {
-    key: "use_locking"
-    value {
-      b: true
-    }
-  }
-  attr {
-    key: "validate_shape"
-    value {
-      b: true
-    }
-  }
-}
-node {
-  name: "conv2d_1/kernel/read"
-  op: "Identity"
-  input: "conv2d_1/kernel"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/kernel"
-      }
-    }
-  }
-}
-node {
-  name: "conv2d_1/bias/Initializer/Const"
-  op: "Const"
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/bias"
-      }
-    }
-  }
-  attr {
-    key: "dtype"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_FLOAT
-        tensor_shape {
-          dim {
-            size: 5
-          }
-        }
-        float_val: 0.0
-      }
-    }
-  }
-}
-node {
-  name: "conv2d_1/bias"
-  op: "VariableV2"
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/bias"
-      }
-    }
-  }
-  attr {
-    key: "container"
-    value {
-      s: ""
-    }
-  }
-  attr {
-    key: "dtype"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "shape"
-    value {
-      shape {
-        dim {
-          size: 5
-        }
-      }
-    }
-  }
-  attr {
-    key: "shared_name"
-    value {
-      s: ""
-    }
-  }
-}
-node {
-  name: "conv2d_1/bias/Assign"
-  op: "Assign"
-  input: "conv2d_1/bias"
-  input: "conv2d_1/bias/Initializer/Const"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/bias"
-      }
-    }
-  }
-  attr {
-    key: "use_locking"
-    value {
-      b: true
-    }
-  }
-  attr {
-    key: "validate_shape"
-    value {
-      b: true
-    }
-  }
-}
-node {
-  name: "conv2d_1/bias/read"
-  op: "Identity"
-  input: "conv2d_1/bias"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/bias"
-      }
-    }
-  }
-}
-node {
-  name: "conv2d_2/convolution/Shape"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_INT32
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_INT32
-        tensor_shape {
-          dim {
-            size: 4
-          }
-        }
-        tensor_content: "\003\000\000\000\003\000\000\000\005\000\000\000\005\000\000\000"
-      }
-    }
-  }
-}
-node {
-  name: "conv2d_2/convolution/dilation_rate"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_INT32
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_INT32
-        tensor_shape {
+        shape {
           dim {
             size: 2
           }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 12
+          }
         }
-        tensor_content: "\001\000\000\000\001\000\000\000"
       }
     }
   }
 }
 node {
-  name: "conv2d_2/convolution"
-  op: "Conv2D"
-  input: "conv2d/BiasAdd"
-  input: "conv2d_1/kernel/read"
+  name: "DW2/Initializer/random_normal"
+  op: "Add"
+  input: "DW2/Initializer/random_normal/mul"
+  input: "DW2/Initializer/random_normal/mean"
   attr {
     key: "T"
     value {
       type: DT_FLOAT
     }
   }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 12
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "DW2"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 12
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 2
+        }
+        dim {
+          size: 2
+        }
+        dim {
+          size: 6
+        }
+        dim {
+          size: 12
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "DW2/Assign"
+  op: "Assign"
+  input: "DW2"
+  input: "DW2/Initializer/random_normal"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 12
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "DW2/read"
+  op: "Identity"
+  input: "DW2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@DW2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 6
+          }
+          dim {
+            size: 12
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Conv2D_1"
+  op: "Conv2D"
+  input: "Conv2D"
+  input: "DW2/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 12
+          }
+        }
+      }
+    }
+  }
   attr {
     key: "data_format"
     value {
@@ -1026,7 +1337,7 @@ node {
   attr {
     key: "padding"
     value {
-      s: "VALID"
+      s: "SAME"
     }
   }
   attr {
@@ -1034,8 +1345,8 @@ node {
     value {
       list {
         i: 1
-        i: 1
-        i: 1
+        i: 2
+        i: 2
         i: 1
       }
     }
@@ -1047,537 +1358,13 @@ node {
     }
   }
 }
-node {
-  name: "conv2d_2/BiasAdd"
-  op: "BiasAdd"
-  input: "conv2d_2/convolution"
-  input: "conv2d_1/bias/read"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "data_format"
-    value {
-      s: "NHWC"
-    }
-  }
-}
-node {
-  name: "save/Const"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-        }
-        string_val: "model"
-      }
-    }
-  }
-}
-node {
-  name: "save/SaveV2/tensor_names"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 4
-          }
-        }
-        string_val: "conv2d/bias"
-        string_val: "conv2d/kernel"
-        string_val: "conv2d_1/bias"
-        string_val: "conv2d_1/kernel"
-      }
-    }
-  }
-}
-node {
-  name: "save/SaveV2/shape_and_slices"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 4
-          }
-        }
-        string_val: ""
-        string_val: ""
-        string_val: ""
-        string_val: ""
-      }
-    }
-  }
-}
-node {
-  name: "save/SaveV2"
-  op: "SaveV2"
-  input: "save/Const"
-  input: "save/SaveV2/tensor_names"
-  input: "save/SaveV2/shape_and_slices"
-  input: "conv2d/bias"
-  input: "conv2d/kernel"
-  input: "conv2d_1/bias"
-  input: "conv2d_1/kernel"
-  attr {
-    key: "dtypes"
-    value {
-      list {
-        type: DT_FLOAT
-        type: DT_FLOAT
-        type: DT_FLOAT
-        type: DT_FLOAT
-      }
-    }
-  }
-}
-node {
-  name: "save/control_dependency"
-  op: "Identity"
-  input: "save/Const"
-  input: "^save/SaveV2"
-  attr {
-    key: "T"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@save/Const"
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2/tensor_names"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: "conv2d/bias"
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2/shape_and_slices"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: ""
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2"
-  op: "RestoreV2"
-  input: "save/Const"
-  input: "save/RestoreV2/tensor_names"
-  input: "save/RestoreV2/shape_and_slices"
-  attr {
-    key: "dtypes"
-    value {
-      list {
-        type: DT_FLOAT
-      }
-    }
-  }
-}
-node {
-  name: "save/Assign"
-  op: "Assign"
-  input: "conv2d/bias"
-  input: "save/RestoreV2"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d/bias"
-      }
-    }
-  }
-  attr {
-    key: "use_locking"
-    value {
-      b: true
-    }
-  }
-  attr {
-    key: "validate_shape"
-    value {
-      b: true
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_1/tensor_names"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: "conv2d/kernel"
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_1/shape_and_slices"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: ""
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_1"
-  op: "RestoreV2"
-  input: "save/Const"
-  input: "save/RestoreV2_1/tensor_names"
-  input: "save/RestoreV2_1/shape_and_slices"
-  attr {
-    key: "dtypes"
-    value {
-      list {
-        type: DT_FLOAT
-      }
-    }
-  }
-}
-node {
-  name: "save/Assign_1"
-  op: "Assign"
-  input: "conv2d/kernel"
-  input: "save/RestoreV2_1"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d/kernel"
-      }
-    }
-  }
-  attr {
-    key: "use_locking"
-    value {
-      b: true
-    }
-  }
-  attr {
-    key: "validate_shape"
-    value {
-      b: true
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_2/tensor_names"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: "conv2d_1/bias"
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_2/shape_and_slices"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: ""
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_2"
-  op: "RestoreV2"
-  input: "save/Const"
-  input: "save/RestoreV2_2/tensor_names"
-  input: "save/RestoreV2_2/shape_and_slices"
-  attr {
-    key: "dtypes"
-    value {
-      list {
-        type: DT_FLOAT
-      }
-    }
-  }
-}
-node {
-  name: "save/Assign_2"
-  op: "Assign"
-  input: "conv2d_1/bias"
-  input: "save/RestoreV2_2"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/bias"
-      }
-    }
-  }
-  attr {
-    key: "use_locking"
-    value {
-      b: true
-    }
-  }
-  attr {
-    key: "validate_shape"
-    value {
-      b: true
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_3/tensor_names"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: "conv2d_1/kernel"
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_3/shape_and_slices"
-  op: "Const"
-  attr {
-    key: "dtype"
-    value {
-      type: DT_STRING
-    }
-  }
-  attr {
-    key: "value"
-    value {
-      tensor {
-        dtype: DT_STRING
-        tensor_shape {
-          dim {
-            size: 1
-          }
-        }
-        string_val: ""
-      }
-    }
-  }
-}
-node {
-  name: "save/RestoreV2_3"
-  op: "RestoreV2"
-  input: "save/Const"
-  input: "save/RestoreV2_3/tensor_names"
-  input: "save/RestoreV2_3/shape_and_slices"
-  attr {
-    key: "dtypes"
-    value {
-      list {
-        type: DT_FLOAT
-      }
-    }
-  }
-}
-node {
-  name: "save/Assign_3"
-  op: "Assign"
-  input: "conv2d_1/kernel"
-  input: "save/RestoreV2_3"
-  attr {
-    key: "T"
-    value {
-      type: DT_FLOAT
-    }
-  }
-  attr {
-    key: "_class"
-    value {
-      list {
-        s: "loc:@conv2d_1/kernel"
-      }
-    }
-  }
-  attr {
-    key: "use_locking"
-    value {
-      b: true
-    }
-  }
-  attr {
-    key: "validate_shape"
-    value {
-      b: true
-    }
-  }
-}
-node {
-  name: "save/restore_all"
-  op: "NoOp"
-  input: "^save/Assign"
-  input: "^save/Assign_1"
-  input: "^save/Assign_2"
-  input: "^save/Assign_3"
-}
 node {
   name: "init"
   op: "NoOp"
-  input: "^conv2d/kernel/Assign"
-  input: "^conv2d/bias/Assign"
-  input: "^conv2d_1/kernel/Assign"
-  input: "^conv2d_1/bias/Assign"
+  input: "^ScalarW/Assign"
+  input: "^DW/Assign"
+  input: "^DW2/Assign"
 }
 versions {
-  producer: 21
+  producer: 24
 }
diff --git a/tensorflow/core/profiler/internal/testdata/run_meta b/tensorflow/core/profiler/internal/testdata/run_meta
index 6e9e0c38729e81ab892faf194d8a5d94037855d5..ae76acb743fc517239206228369b175c00c1c248 100644
GIT binary patch
literal 5539
zcmd;Dpv|?6jZ0NOD?iC9CqFqcCnLYOM87DtASW|9(aJ!-B(XT#%0NH8pw!Ah$d!vd
zKG;7r$k|n3^@C5XJ?pPBNw6t!X$TqdadEl_gvJ}hC#5B8G?+LEK-DSOD){C57iegD
zyu1(5ki;O=!N|qz5^f}L6lMma0=EV~x*1j`Qbrt%LIOfe986HmCd9<Sqa?%8z$wtc
zC55n2fm?&opkc+2xf?i~gj~Xn6l@j35{ohulX6nSj5IVo_ChU6;#X+kGUECV1B^l~
zj9g4E;R4s73KW<$nDMxSS%`@PhS`*4I67F-+`$8K$271z1YN?>opA+f3#K#5xLB)F
zi}H&FUPF~gFe)%<uwppHNXh^yNI<~>!puri91qw88aR-HgwLR16WAgAV3QSW6`b?)
zic2&!J#IqnNMaD0z{thsoS#=_<Rb79Dyr~EUgN90kvLL>3N(00H0UWbuxT_{p~VVv
zXl!5>XkbHj0zW7;7J(fh1+fL<iZUY?4X~qh6kNhJJw8BfPGS~la1olp$i)Fs9B(M_
zvf(o%l`$$TX4Y8FjOHv2i3SCQ1{RG5J2Yp(5)3%8Fbg!WAUjJO<g8s_XUV~ALO2W}
z0&<y=rpGg={UE2QaGho(oKcd&8N~*XQ8q$-C&8qkqM>fY%Eg>qP>Lmgd_?4r6fTkY
zqSTVI#GH7DYvK*#4dM+1`dZ<kp{bz_GfEqqS_NB$_#m(m8ZaaD;|(=EI-y1+F>syb
z=MseEYyIMqqSVA(tHhifp(uEQxbqN_VHG$vIGs!(rdUBJg`(8-Vk<*)h2o4{D+7h}
zqRbR4LmfjMLxrTA{N!vaLjxld5Z7b+!{Z%&-@d;R3AlQV1<CBzM^LlbHQ2G6Z9&lN
zNq^2@HG4YDY&H!xa?M_{_#Bei(eN<Y_!sJK1q}r!Ge}^dhKYd**x^RT1}2!{VxeQ9
zW8v}X%ls}xpoGH>pYjiCILP5f*bKJ>n+*+cV^9b~1KbkC_V|8u1(Lytm^=<MSW!a}
zYlxUZLfgd5$O4z6ANQ?BGCLCP=sEwPj+WPucQV1^Xd@IyBc~EmLnE-WoBwP;GCCG+
z^ii16DjF(I7FdinhZt>PVur=phGsfo<Z*1zW+dYgE}sX@+X}K8vShk^@$Vf-Mk8E)
z3fXA#U4C)bZY1Lo@%aX3JckAc*7!7p#3wYNAcr|TXP(=&2g&R>q%yam5mL%1fbym}
zB-rB(QByhLoPXy3J{;!vz|03Fa~$RqcK_{%`*D~*ohs)4-h2Sb{CK4BISw;lTtgg|
zEYZV<h*DtYzC%bBL?c<y-vo^SRt;8I!bG<KlJcPuYzE3k(2@XB?%um|9LexRB*Rz0
z4A<AtcY;PLn&HrFWoTq!1hoJ;1zG4A8|fGudE8iV63L2KBr6)4q3)N`kb)JGXjTv|
z6wY2bhh#jm50=1;2jyG5J|Ivq+<kii$qM8sZft@2fU+oVTyzb|0^~e47iIxPdEnEI
z>j?9?&hT>ypw{PxMo3leXmt*2hKyF{79OM3IZ~MkYlw_i=Qtbkqt$svt`)vU{b+TL
zwI>M6TBFrD;U+C=b^e`^ix*m*=ceW+7gP!*a&h^(`Z@<hdX(h52=sn{cD$H0m|)FN
zXlqCzuQWG4sj?)s*h<0B&`2RAwYVfRFR>&uKhH|RKq0ZXGB4RmAuTbdIMrjxy`7zi
z&S(=O7eB-fmy&!BN035+<FK|0iw28R2&_M#A8%k4ADo((Vr7+@lAaoGXaFCpu(AT%
vfb0rm3lki!C@CsU^*BH27E(i|go_{Qap<^&IUT&tHJuTo`D7wA7{UVpJ}GQo

literal 3444
zcmd=3&&TzLk4sfQD?iC9CqFqcCnLYOM87DtASW|9(aJ!-B(XT#%0NH4pw!Ah$c&3U
zKG;7r$k|ok%)+G;?!3Lkq`_pw%Eg>qQ0gQAl~S-(@XPlv(9rZa2vsAL#Kl^bT9jWb
za1$z`!3@=9r7NY)!6+mk#KgfQ#Kge?#mq{g91T1I4LnjHqZQ;08V;<y-SA4@i63l^
zf~|sceqM2jhNj0ksEI<gT)fHod1Xc^`q`;Pd8s)9H=r^Sj0%hztT6lZrF5b8F$*zq
zz%Z+lIL90ofjKM?`(!m34H}j<FZ};f-bn_<R0Ugwu*9Ow#H5_mFe433j|)&sgz{n5
z#~bP=WhNF2+=a@3t%rM8QA(bJQHVu|iGx*1ltV;71Y)5S$ifB9lYS#CgqnlX!fY;X
zsLR0C-9omG4dz=M)=5IFYy9(4-bo6{6r7e#VB`{ls)K~Ceo<;-ioiW&OW}S*L@6wI
zK{Uv}$UzJbU>!_@6>JqeQ&RIvGD|8oP-HbdV8JTX0Cy5Jq##au2z8PIBRpkV$xF$?
z16>#?&<z@(K@N2SvO$<mfXZrmJb+pu)X2!i4{=_+AuQG1fl5j+DlloV!{Pxm)v<z;
zfH+4ByFd#&BoWAgVr4~h6DU^X(9FddGc{ZS5T)Q$2ML2G2sbbyavF|MkOYMSI3>V>
z8kC;E24RK)*b<}=n8L^<0<j(*0$`m2&!J9GKym<Px_|_NC`uq0G(dyE0Gq*>PJ&yk
z>G2q9sn8;L+Rz6XlAlvrl9`_;fD|q^8umuq;Bre~0k^~gZYx9#fyyB^AtnwG29*m7
zxCIt)Lt;n)6hjAEz~zD&$Yi4wWD6k0Qkjv92Dp^cQGhBz$}*ZBummc!gprF6s=z5T
zvDh&sMPS(?NTI60qrpdp^AthOI|Ff^9jeg^whB;7V6hC6hXxGXq41d3^jHjaiqLvQ
z6dNIXR$v=cg#xbzKg_WWtP%~ZXkiH|>_8Y4mJO@|4Xlu`R02760i=v|206wkB_72B
zL})@}^`RbsxfUMr;1s9nu?6ZLp*4)KJP~gM(<E>lDyzVx!9s>>l|imu1#ztl#61YZ
zF#;FWBS;QL3gDwq2MN9B<`RL|s|NpJfRRg*i#0yj)6ZLi5lPgEnSp_y3s#{D5m#|b
zFli})>Qk86{~4t)YFq^rTNoL*a$q$Yv}hyNE@mwyP+f{<7pMw=VOE7yWFIjycrbFM
zW2<0@wUR}NiGx`SRE2~61+xc4v!eLRgORHYZV@ONp*et9i&&MIIGB`}I9RllI2LdV
zp!pG{<UqC8hAS4Oz9!Z}HZ4#q0m&Y4PQns?5wJ85E~3F%f>=A)m6$l#w757#q#%YP
zCj<{ht|Iheidb_wlt8T<CJuH|1B-_%52Iow)>2L_B~W#O6k?#14oV3i46bl6LXU|n
z6MHpAtfgE^OdOnApvoL2{XuL)3(g9-RVbw$u~u;_F>!zbmJ5`lS+S)+P$YsdTFU3;
ziic%rXiJ7zD|v`bgHf=o1+{>rRLHBu#KD6*6_&u#H=@`j*8O}+OdOzK<0Uo7n7Bf?
iB;t!x^HSnLWd*o`f>uKD2E^LVuf)W`hg95IFaiKZ*>Vg3

diff --git a/tensorflow/core/profiler/internal/testdata/tfprof_log b/tensorflow/core/profiler/internal/testdata/tfprof_log
index 2a317207c4e..e1c3693d2be 100644
--- a/tensorflow/core/profiler/internal/testdata/tfprof_log
+++ b/tensorflow/core/profiler/internal/testdata/tfprof_log
@@ -1,17 +1,11 @@
 
-
-conv2d_2/BiasAdd�
-
-conv2d/BiasAdd�
-%
-conv2d_1/bias_trainable_variables
-
-conv2d_2/convolution�p
-
-conv2d/convolution�
-#
-conv2d/bias_trainable_variables
-'
-conv2d_1/kernel_trainable_variables
-%
-conv2d/kernel_trainable_variables
\ No newline at end of file
+
+DW2_trainable_variables
+
+ScalarW_trainable_variables
+
+DW_trainable_variables
+
+Conv2D�-
+
+Conv2D_1�$
\ No newline at end of file
diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc
index 17c51bed9f5..1c512a7ca18 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.cc
+++ b/tensorflow/core/profiler/internal/tfprof_code.cc
@@ -191,6 +191,13 @@ class Samples {
         } else if (type == kShown[0]) {
           sample_pb->mutable_value()->Add(
               gn->requested_bytes(node->node->step()));
+        } else if (type == kShown[11]) {
+          sample_pb->mutable_value()->Add(gn->peak_bytes(node->node->step()));
+        } else if (type == kShown[12]) {
+          sample_pb->mutable_value()->Add(
+              gn->residual_bytes(node->node->step()));
+        } else if (type == kShown[13]) {
+          sample_pb->mutable_value()->Add(gn->output_bytes(node->node->step()));
         } else if (type == kShown[2]) {
           sample_pb->mutable_value()->Add(gn->parameters());
         } else if (type == kShown[3]) {
@@ -296,9 +303,21 @@ class PprofProfileImpl : public PprofProfile {
             string_table_.GetIndex("CPU execution time."));
       }
     } else if (type == kShown[0]) {
-      sample_type->set_unit(string_table_.GetIndex("bytes"));
+      sample_type->set_unit(string_table_.GetIndex("requested bytes"));
       profile_pb->mutable_comment()->Add(
-          string_table_.GetIndex("Sum of operation output memory."));
+          string_table_.GetIndex("Sum of operation total requested memory."));
+    } else if (type == kShown[11]) {
+      sample_type->set_unit(string_table_.GetIndex("peak bytes"));
+      profile_pb->mutable_comment()->Add(
+          string_table_.GetIndex("Sum of operation peak memory usage."));
+    } else if (type == kShown[12]) {
+      sample_type->set_unit(string_table_.GetIndex("residual bytes"));
+      profile_pb->mutable_comment()->Add(string_table_.GetIndex(
+          "Sum of operation allocated memory after finish."));
+    } else if (type == kShown[13]) {
+      sample_type->set_unit(string_table_.GetIndex("output bytes"));
+      profile_pb->mutable_comment()->Add(
+          string_table_.GetIndex("Sum of operation output size."));
     } else if (type == kShown[2]) {
       sample_type->set_unit(string_table_.GetIndex("count"));
       profile_pb->mutable_comment()->Add(
@@ -370,7 +389,8 @@ const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
     }
     string select = *opts.select.begin();
     if (select != kShown[0] && select != kShown[1] && select != kShown[2] &&
-        select != kShown[3] && select != kShown[9] && select != kShown[10]) {
+        select != kShown[3] && select != kShown[9] && select != kShown[10] &&
+        select != kShown[11] && select != kShown[12] && select != kShown[13]) {
       fprintf(stderr, "pprof doesn't support -select=%s\n", select.c_str());
       return root_.get();
     }
@@ -522,17 +542,37 @@ std::vector<CodeNode*> TFCode::Account(const std::vector<CodeNode*>& roots,
   return act_nodes;
 }
 
-string TFCode::FormatNode(CodeNode* node, const Options& opts, int64 indent) {
+string TFCode::FormatNodeMemory(CodeNode* node, int64 bytes,
+                                int64 total_bytes) const {
+  string memory = FormatMemory(total_bytes);
+  if (node->account) {
+    memory = FormatMemory(bytes) + "/" + memory;
+  } else {
+    memory = "--/" + memory;
+  }
+  return memory;
+}
+
+string TFCode::FormatNode(CodeNode* node, const Options& opts,
+                          int64 indent) const {
   std::vector<string> attrs;
   if (opts.select.find(kShown[0]) != opts.select.end()) {
-    string memory = FormatMemory(node->proto().total_requested_bytes());
-    if (node->account) {
-      memory = FormatMemory(node->proto().requested_bytes()) + "/" + memory;
-    } else {
-      memory = "--/" + memory;
-    }
-    attrs.push_back(memory);
+    attrs.push_back(FormatNodeMemory(node, node->proto().requested_bytes(),
+                                     node->proto().total_requested_bytes()));
   }
+  if (opts.select.find(kShown[11]) != opts.select.end()) {
+    attrs.push_back(FormatNodeMemory(node, node->proto().peak_bytes(),
+                                     node->proto().total_peak_bytes()));
+  }
+  if (opts.select.find(kShown[12]) != opts.select.end()) {
+    attrs.push_back(FormatNodeMemory(node, node->proto().residual_bytes(),
+                                     node->proto().total_residual_bytes()));
+  }
+  if (opts.select.find(kShown[13]) != opts.select.end()) {
+    attrs.push_back(FormatNodeMemory(node, node->proto().output_bytes(),
+                                     node->proto().total_output_bytes()));
+  }
+
   std::vector<string> time_attrs = FormatTimes(node, opts);
   attrs.insert(attrs.end(), time_attrs.begin(), time_attrs.end());
 
diff --git a/tensorflow/core/profiler/internal/tfprof_code.h b/tensorflow/core/profiler/internal/tfprof_code.h
index 7583a43a26f..5e64104d9fa 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.h
+++ b/tensorflow/core/profiler/internal/tfprof_code.h
@@ -79,7 +79,8 @@ class TFCode : public TFMultiShow {
               const Options& opts, string* display_str,
               MultiGraphNodeProto* proto, std::vector<uint64>* call_ids);
 
-  string FormatNode(CodeNode* node, const Options& opts, int64 indent);
+  string FormatNode(CodeNode* node, const Options& opts, int64 indent) const;
+  string FormatNodeMemory(CodeNode* node, int64 bytes, int64 total_bytes) const;
 
   std::unique_ptr<CodeNode> root_;
   std::unique_ptr<TFMultiGraphNode> graph_root_;
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index bc1765e704a..70b91c37e4b 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -110,9 +110,11 @@ void ExecStep::AddMemoryStats(const string& dev,
       uint64 output_ptr =
           output.tensor_description().allocation_description().ptr();
       total_output_bytes += output_bytes;
-      output_bytes_[output.slot()] = std::make_pair(output_bytes, output_ptr);
+      output_memory_[output.slot()] = std::make_pair(output_bytes, output_ptr);
     }
   }
+  output_bytes_ = total_output_bytes;
+
   if (step_stat.has_memory_stats()) {
     host_temp_bytes_ += step_stat.memory_stats().host_temp_memory_size();
     host_persistent_bytes_ +=
@@ -122,7 +124,17 @@ void ExecStep::AddMemoryStats(const string& dev,
     accelerator_persistent_bytes_ +=
         step_stat.memory_stats().device_persistent_memory_size();
   }
-  requested_bytes_ = total_output_bytes;
+  int64 residual_bytes = 0;
+  int64 requested_bytes = 0;
+  int64 peak_bytes = 0;
+  for (const auto& mem : step_stat.memory()) {
+    residual_bytes += mem.live_bytes();
+    requested_bytes += mem.total_bytes();
+    peak_bytes += mem.peak_bytes();
+  }
+  requested_bytes_ = requested_bytes;
+  residual_bytes_ = residual_bytes;
+  peak_bytes_ = peak_bytes;
 }
 
 void TFGraphNode::AddStepStat(int64 step, const string& device,
diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h
index 929ee3f50c1..5ec3da12cfc 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.h
+++ b/tensorflow/core/profiler/internal/tfprof_node.h
@@ -51,6 +51,9 @@ class ExecStep {
         latest_end_micros_(0),
         mem_initiated_(false),
         requested_bytes_(0),
+        peak_bytes_(0),
+        residual_bytes_(0),
+        output_bytes_(0),
         host_temp_bytes_(0),
         host_persistent_bytes_(0),
         accelerator_temp_bytes_(0),
@@ -78,14 +81,17 @@ class ExecStep {
   int64 latest_end_micros() const { return latest_end_micros_; }
 
   int64 requested_bytes() const { return requested_bytes_; }
+  int64 peak_bytes() const { return peak_bytes_; }
+  int64 residual_bytes() const { return residual_bytes_; }
+  int64 output_bytes() const { return output_bytes_; }
   int64 accelerator_temp_bytes() const { return accelerator_temp_bytes_; }
   int64 host_temp_bytes() const { return host_temp_bytes_; }
   int64 accelerator_persistent_bytes() const {
     return accelerator_persistent_bytes_;
   }
   int64 host_persistent_bytes() const { return host_persistent_bytes_; }
-  const std::map<int64, std::pair<int64, uint64>>& output_bytes() const {
-    return output_bytes_;
+  const std::map<int64, std::pair<int64, uint64>>& output_memory() const {
+    return output_memory_;
   }
   int64 allocator_bytes_in_use() const { return allocator_bytes_in_use_; }
 
@@ -111,8 +117,14 @@ class ExecStep {
   std::set<string> devices_;
 
   bool mem_initiated_;
-  // Total output bytes requested by the op.
+  // Total bytes requested by the op.
   int64 requested_bytes_;
+  // Total bytes requested by the op and released before op end.
+  int64 peak_bytes_;
+  // Total bytes requested by the op and not released after op end.
+  int64 residual_bytes_;
+  // Total bytes output by the op (not necessarily requested by the op).
+  int64 output_bytes_;
   // Total temporary bytes allocated and released by the op.
   int64 host_temp_bytes_;
   // Total persistent bytes (e.g. variable) allocated by the op.
@@ -122,9 +134,27 @@ class ExecStep {
   // The total number of bytes currently allocated by the allocator if >0.
   int64 allocator_bytes_in_use_;
   // output_idx -> {output_bytes, memory_ptr}
-  std::map<int64, std::pair<int64, uint64>> output_bytes_;
+  std::map<int64, std::pair<int64, uint64>> output_memory_;
 };
 
+#define GRAPH_NODE_BYTES(type)                                \
+  do {                                                        \
+    if (execs_.empty()) {                                     \
+      return 0;                                               \
+    }                                                         \
+    if (step >= 0) {                                          \
+      auto exec = execs_.find(step);                          \
+      CHECK(exec != execs_.end()) << "unknown step " << step; \
+      return exec->second.type##_bytes();                     \
+    }                                                         \
+                                                              \
+    int64 bytes = 0;                                          \
+    for (const auto& exec : execs_) {                         \
+      bytes += exec.second.type##_bytes();                    \
+    }                                                         \
+    return bytes / execs_.size();                             \
+  } while (0)
+
 class TFGraphNode {
  public:
   TFGraphNode(const NodeDef* node)
@@ -270,22 +300,10 @@ class TFGraphNode {
     return total_micros / execs_.size();
   }
 
-  int64 requested_bytes(int64 step) const {
-    if (execs_.empty()) {
-      return 0;
-    }
-    if (step >= 0) {
-      auto exec = execs_.find(step);
-      CHECK(exec != execs_.end()) << "unknown step " << step;
-      return exec->second.requested_bytes();
-    }
-
-    int64 requested_bytes = 0;
-    for (const auto& exec : execs_) {
-      requested_bytes += exec.second.requested_bytes();
-    }
-    return requested_bytes / execs_.size();
-  }
+  int64 requested_bytes(int64 step) const { GRAPH_NODE_BYTES(requested); }
+  int64 peak_bytes(int64 step) const { GRAPH_NODE_BYTES(peak); }
+  int64 residual_bytes(int64 step) const { GRAPH_NODE_BYTES(residual); }
+  int64 output_bytes(int64 step) const { GRAPH_NODE_BYTES(output); }
 
   int64 all_start_micros(int64 step) const {
     auto exec = execs_.find(step);
@@ -328,11 +346,11 @@ class TFGraphNode {
     CHECK(exec != execs_.end()) << "unknown step " << step;
     return exec->second.host_persistent_bytes();
   }
-  const std::map<int64, std::pair<int64, uint64>>& output_bytes(
+  const std::map<int64, std::pair<int64, uint64>>& output_memory(
       int64 step) const {
     auto exec = execs_.find(step);
     CHECK(exec != execs_.end()) << "unknown step " << step;
-    return exec->second.output_bytes();
+    return exec->second.output_memory();
   }
   int64 allocator_bytes_in_use(int64 step) const {
     auto exec = execs_.find(step);
@@ -427,6 +445,9 @@ class TFMultiGraphNode {
         accelerator_exec_micros_(0),
         cpu_exec_micros_(0),
         requested_bytes_(0),
+        peak_bytes_(0),
+        residual_bytes_(0),
+        output_bytes_(0),
         float_ops_(0),
         parameters_(0) {}
 
@@ -437,6 +458,10 @@ class TFMultiGraphNode {
     cpu_exec_micros_ = 0;
 
     requested_bytes_ = 0;
+    peak_bytes_ = 0;
+    residual_bytes_ = 0;
+    output_bytes_ = 0;
+
     float_ops_ = 0;
     parameters_ = 0;
     op_types_.clear();
@@ -460,6 +485,10 @@ class TFMultiGraphNode {
       cpu_exec_micros_ += node->cpu_exec_micros(step);
 
       requested_bytes_ += node->requested_bytes(step);
+      peak_bytes_ += node->peak_bytes(step);
+      residual_bytes_ += node->residual_bytes(step);
+      output_bytes_ += node->output_bytes(step);
+
       float_ops_ += node->float_ops(step);
       parameters_ += node->parameters();
       if (node->shape().size() > 0) {
@@ -492,6 +521,9 @@ class TFMultiGraphNode {
   int64 cpu_exec_micros() const { return cpu_exec_micros_; }
 
   int64 requested_bytes() const { return requested_bytes_; }
+  int64 peak_bytes() const { return peak_bytes_; }
+  int64 residual_bytes() const { return residual_bytes_; }
+  int64 output_bytes() const { return output_bytes_; }
 
   int64 float_ops() const { return float_ops_; }
 
@@ -540,6 +572,9 @@ class TFMultiGraphNode {
   int64 cpu_exec_micros_;
 
   int64 requested_bytes_;
+  int64 peak_bytes_;
+  int64 residual_bytes_;
+  int64 output_bytes_;
   int64 float_ops_;
   int64 parameters_;
   std::set<string> devices_;
diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.cc b/tensorflow/core/profiler/internal/tfprof_node_show.cc
index 16b94fdfa1e..b0f8dcbf3b5 100644
--- a/tensorflow/core/profiler/internal/tfprof_node_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node_show.cc
@@ -38,6 +38,10 @@ void ShowNode::ReInit(int64 step) {
   mutable_proto()->set_cpu_exec_micros(node->cpu_exec_micros(step));
 
   mutable_proto()->set_requested_bytes(node->requested_bytes(step));
+  mutable_proto()->set_peak_bytes(node->peak_bytes(step));
+  mutable_proto()->set_residual_bytes(node->residual_bytes(step));
+  mutable_proto()->set_output_bytes(node->output_bytes(step));
+
   mutable_proto()->set_float_ops(node->float_ops(step));
 
   mutable_proto()->clear_input_shapes();
@@ -68,6 +72,12 @@ void ShowNode::AggregateTotalStats(ShowNode* node) {
 
   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
                                              node_pb->total_requested_bytes());
+  mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
+                                        node_pb->total_peak_bytes());
+  mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
+                                            node_pb->total_residual_bytes());
+  mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
+                                          node_pb->total_output_bytes());
   mutable_proto()->set_total_parameters(proto().total_parameters() +
                                         node_pb->total_parameters());
   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
@@ -89,6 +99,13 @@ void ShowNode::AddSelfToTotalStats() {
 
   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
                                              proto().requested_bytes());
+  mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
+                                        proto().peak_bytes());
+  mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
+                                            proto().residual_bytes());
+  mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
+                                          proto().output_bytes());
+
   mutable_proto()->set_total_parameters(proto().total_parameters() +
                                         proto().parameters());
   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
@@ -105,6 +122,10 @@ void ShowNode::ResetTotalStats() {
   mutable_proto()->set_total_cpu_exec_micros(0);
 
   mutable_proto()->set_total_requested_bytes(0);
+  mutable_proto()->set_total_peak_bytes(0);
+  mutable_proto()->set_total_residual_bytes(0);
+  mutable_proto()->set_total_output_bytes(0);
+
   mutable_proto()->set_total_parameters(0);
   mutable_proto()->set_total_float_ops(0);
   mutable_proto()->mutable_children()->Clear();
@@ -135,6 +156,10 @@ bool ShowMultiNode::ReInit(int64 step,
   mutable_proto()->set_cpu_exec_micros(node->cpu_exec_micros());
 
   mutable_proto()->set_requested_bytes(node->requested_bytes());
+  mutable_proto()->set_peak_bytes(node->peak_bytes());
+  mutable_proto()->set_residual_bytes(node->residual_bytes());
+  mutable_proto()->set_output_bytes(node->output_bytes());
+
   mutable_proto()->set_float_ops(node->float_ops());
 
   mutable_proto()->set_parameters(node->parameters());
@@ -157,6 +182,13 @@ void ShowMultiNode::AggregateTotalStats(ShowMultiNode* node) {
 
   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
                                              node_pb->total_requested_bytes());
+  mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
+                                        node_pb->total_peak_bytes());
+  mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
+                                            node_pb->total_residual_bytes());
+  mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
+                                          node_pb->total_output_bytes());
+
   mutable_proto()->set_total_parameters(proto().total_parameters() +
                                         node_pb->total_parameters());
   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
@@ -174,6 +206,13 @@ void ShowMultiNode::AddSelfToTotalStats() {
 
   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
                                              proto().requested_bytes());
+  mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
+                                        proto().peak_bytes());
+  mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
+                                            proto().residual_bytes());
+  mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
+                                          proto().output_bytes());
+
   mutable_proto()->set_total_parameters(proto().total_parameters() +
                                         proto().parameters());
   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
@@ -187,6 +226,10 @@ void ShowMultiNode::ResetTotalStats() {
   mutable_proto()->set_total_cpu_exec_micros(0);
 
   mutable_proto()->set_total_requested_bytes(0);
+  mutable_proto()->set_total_peak_bytes(0);
+  mutable_proto()->set_total_residual_bytes(0);
+  mutable_proto()->set_total_output_bytes(0);
+
   mutable_proto()->set_total_parameters(0);
   mutable_proto()->set_total_float_ops(0);
   mutable_proto()->mutable_children()->Clear();
diff --git a/tensorflow/core/profiler/internal/tfprof_op.cc b/tensorflow/core/profiler/internal/tfprof_op.cc
index ab013506ece..c04b0ea0c62 100644
--- a/tensorflow/core/profiler/internal/tfprof_op.cc
+++ b/tensorflow/core/profiler/internal/tfprof_op.cc
@@ -211,24 +211,44 @@ int64 TFOp::SearchRoot(const std::vector<OpNode*> nodes,
   return i;
 }
 
+string TFOp::FormatMemoryNode(int64 node_total_bytes, int64 root_total_bytes,
+                              int64 node_bytes) const {
+  double accu_pct = 0.0;
+  double pct = 0.0;
+  if (node_bytes > 0) {
+    accu_pct = 100.0 * node_total_bytes / root_total_bytes;
+    pct = 100.0 * node_bytes / root_total_bytes;
+  }
+  return strings::Printf(
+      "%30s", strings::Printf("%s (%.2f%%, %.2f%%)",
+                              FormatMemory(node_bytes).c_str(), accu_pct, pct)
+                  .c_str());
+}
+
 string TFOp::FormatNode(OpNode* node, OpNode* root, const Options& opts) const {
   std::vector<string> attrs;
 
   if (opts.select.find(kShown[0]) != opts.select.end()) {
-    double accu_pct = 0.0;
-    double pct = 0.0;
-    if (node->proto().requested_bytes() > 0) {
-      accu_pct = 100.0 * node->proto().total_requested_bytes() /
-          root->proto().total_requested_bytes();
-      pct = 100.0 * node->proto().requested_bytes() /
-          root->proto().total_requested_bytes();
-    }
-    attrs.push_back(strings::Printf(
-        "%30s",
-        strings::Printf("%s (%.2f%%, %.2f%%)",
-                        FormatMemory(node->proto().requested_bytes()).c_str(),
-                        accu_pct, pct)
-            .c_str()));
+    attrs.push_back(FormatMemoryNode(node->proto().total_requested_bytes(),
+                                     root->proto().total_requested_bytes(),
+                                     node->proto().requested_bytes()));
+  }
+
+  if (opts.select.find(kShown[11]) != opts.select.end()) {
+    attrs.push_back(FormatMemoryNode(node->proto().total_peak_bytes(),
+                                     root->proto().total_peak_bytes(),
+                                     node->proto().peak_bytes()));
+  }
+
+  if (opts.select.find(kShown[12]) != opts.select.end()) {
+    attrs.push_back(FormatMemoryNode(node->proto().total_residual_bytes(),
+                                     root->proto().total_residual_bytes(),
+                                     node->proto().residual_bytes()));
+  }
+  if (opts.select.find(kShown[13]) != opts.select.end()) {
+    attrs.push_back(FormatMemoryNode(node->proto().total_output_bytes(),
+                                     root->proto().total_output_bytes(),
+                                     node->proto().output_bytes()));
   }
 
   if (opts.select.find(kShown[1]) != opts.select.end()) {
diff --git a/tensorflow/core/profiler/internal/tfprof_op.h b/tensorflow/core/profiler/internal/tfprof_op.h
index 9e20f5c3f49..55a346c7e8d 100644
--- a/tensorflow/core/profiler/internal/tfprof_op.h
+++ b/tensorflow/core/profiler/internal/tfprof_op.h
@@ -65,6 +65,8 @@ class TFOp : public TFMultiShow {
   }
 
   string FormatNode(OpNode* node, OpNode* root, const Options& opts) const;
+  string FormatMemoryNode(int64 node_total_bytes, int64 root_total_bytes,
+                          int64 node_bytes) const;
 
   std::unique_ptr<OpNode> root_;
   std::map<string, std::unique_ptr<OpNode>> cnodes_map_;
diff --git a/tensorflow/core/profiler/internal/tfprof_options.cc b/tensorflow/core/profiler/internal/tfprof_options.cc
index 2b5e340cecb..66342725418 100644
--- a/tensorflow/core/profiler/internal/tfprof_options.cc
+++ b/tensorflow/core/profiler/internal/tfprof_options.cc
@@ -151,9 +151,11 @@ tensorflow::Status Options::FromProtoStr(const string& opts_proto_str,
   }
 
   *opts = Options(
-      opts_pb.max_depth(), opts_pb.min_bytes(), opts_pb.min_micros(),
-      opts_pb.min_params(), opts_pb.min_float_ops(), opts_pb.min_occurrence(),
-      opts_pb.step(), opts_pb.order_by(),
+      opts_pb.max_depth(), opts_pb.min_bytes(), opts_pb.min_peak_bytes(),
+      opts_pb.min_residual_bytes(), opts_pb.min_output_bytes(),
+      opts_pb.min_micros(), opts_pb.min_accelerator_micros(),
+      opts_pb.min_cpu_micros(), opts_pb.min_params(), opts_pb.min_float_ops(),
+      opts_pb.min_occurrence(), opts_pb.step(), opts_pb.order_by(),
       std::vector<string>(opts_pb.account_type_regexes().begin(),
                           opts_pb.account_type_regexes().end()),
       std::vector<string>(opts_pb.start_name_regexes().begin(),
@@ -179,6 +181,11 @@ string Options::ToString() const {
       "%-28s%lld\n"
       "%-28s%lld\n"
       "%-28s%lld\n"
+      "%-28s%lld\n"
+      "%-28s%lld\n"
+      "%-28s%lld\n"
+      "%-28s%lld\n"
+      "%-28s%lld\n"
       "%-28s%s\n"
       "%-28s%s\n"
       "%-28s%s\n"
@@ -188,17 +195,20 @@ string Options::ToString() const {
       "%-28s%s\n"
       "%-28s%s\n"
       "%-28s%s:%s\n",
-      kOptions[0], max_depth, kOptions[1], min_bytes, kOptions[2], min_micros,
-      kOptions[3], min_params, kOptions[4], min_float_ops, kOptions[5],
-      min_occurrence, kOptions[6], step, kOptions[7], order_by.c_str(),
-      kOptions[8], str_util::Join(account_type_regexes, ",").c_str(),
-      kOptions[9], str_util::Join(start_name_regexes, ",").c_str(),
-      kOptions[10], str_util::Join(trim_name_regexes, ",").c_str(),
-      kOptions[11], str_util::Join(show_name_regexes, ",").c_str(),
-      kOptions[12], str_util::Join(hide_name_regexes, ",").c_str(),
-      kOptions[13], (account_displayed_op_only ? "true" : "false"),
-      kOptions[14], str_util::Join(select, ",").c_str(), kOptions[15],
-      output_type.c_str(), KeyValueToStr(output_options).c_str());
+      kOptions[0], max_depth, kOptions[1], min_bytes, kOptions[2],
+      min_peak_bytes, kOptions[3], min_residual_bytes, kOptions[4],
+      min_output_bytes, kOptions[5], min_micros, kOptions[6],
+      min_accelerator_micros, kOptions[7], min_cpu_micros, kOptions[8],
+      min_params, kOptions[9], min_float_ops, kOptions[10], min_occurrence,
+      kOptions[11], step, kOptions[12], order_by.c_str(), kOptions[13],
+      str_util::Join(account_type_regexes, ",").c_str(), kOptions[14],
+      str_util::Join(start_name_regexes, ",").c_str(), kOptions[15],
+      str_util::Join(trim_name_regexes, ",").c_str(), kOptions[16],
+      str_util::Join(show_name_regexes, ",").c_str(), kOptions[17],
+      str_util::Join(hide_name_regexes, ",").c_str(), kOptions[18],
+      (account_displayed_op_only ? "true" : "false"), kOptions[19],
+      str_util::Join(select, ",").c_str(), kOptions[20], output_type.c_str(),
+      KeyValueToStr(output_options).c_str());
   return s;
 }
 
diff --git a/tensorflow/core/profiler/internal/tfprof_options.h b/tensorflow/core/profiler/internal/tfprof_options.h
index 8e78ee74639..463f5b3c3a6 100644
--- a/tensorflow/core/profiler/internal/tfprof_options.h
+++ b/tensorflow/core/profiler/internal/tfprof_options.h
@@ -29,7 +29,12 @@ namespace tfprof {
 static const char* const kOptions[] = {
     "-max_depth",
     "-min_bytes",
+    "-min_peak_bytes",
+    "-min_residual_bytes",
+    "-min_output_bytes",
     "-min_micros",
+    "-min_accelerator_micros",
+    "-min_cpu_micros",
     "-min_params",
     "-min_float_ops",
     "-min_occurrence",
@@ -46,17 +51,21 @@ static const char* const kOptions[] = {
 };
 
 static const char* const kOrderBy[] = {
-    "name",       "bytes",  "micros",    "accelerator_micros",
-    "cpu_micros", "params", "float_ops", "occurrence",
+    "name",         "bytes",     "peak_bytes",         "residual_bytes",
+    "output_bytes", "micros",    "accelerator_micros", "cpu_micros",
+    "params",       "float_ops", "occurrence",
 };
 
 // Append Only.
 // TODO(xpan): As we are adding more fields to be selected, we
 // need to have a way to tell users what fields are available in which view.
-static const char* const kShown[] = {
-    "bytes",     "micros",   "params",     "float_ops",    "tensor_value",
-    "device",    "op_types", "occurrence", "input_shapes", "accelerator_micros",
-    "cpu_micros"};
+static const char* const kShown[] = {"bytes",          "micros",
+                                     "params",         "float_ops",
+                                     "tensor_value",   "device",
+                                     "op_types",       "occurrence",
+                                     "input_shapes",   "accelerator_micros",
+                                     "cpu_micros",     "peak_bytes",
+                                     "residual_bytes", "output_bytes"};
 
 static const char* const kCmds[] = {
     "scope", "graph", "code", "op", "advise", "set", "help",
@@ -94,11 +103,15 @@ struct Options {
 
   virtual ~Options() {}
   Options()
-      : Options(0, 0, 0, 0, 0, 0, 0, "", {}, {}, {}, {}, {}, false, {}, "",
-                {}) {}
+      : Options(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", {}, {}, {}, {}, {},
+                false, {}, "", {}) {}
 
   Options(int max_depth, tensorflow::int64 min_bytes,
-          tensorflow::int64 min_micros, tensorflow::int64 min_params,
+          tensorflow::int64 min_peak_bytes,
+          tensorflow::int64 min_residual_bytes,
+          tensorflow::int64 min_output_bytes, tensorflow::int64 min_micros,
+          tensorflow::int64 min_accelerator_micros,
+          tensorflow::int64 min_cpu_micros, tensorflow::int64 min_params,
           tensorflow::int64 min_float_ops, tensorflow::int64 min_occurrence,
           tensorflow::int64 step, const string& order_by,
           const std::vector<string>& account_type_regexes,
@@ -111,7 +124,12 @@ struct Options {
           const std::map<string, string>& output_options)
       : max_depth(max_depth),
         min_bytes(min_bytes),
+        min_peak_bytes(min_peak_bytes),
+        min_residual_bytes(min_residual_bytes),
+        min_output_bytes(min_output_bytes),
         min_micros(min_micros),
+        min_accelerator_micros(min_accelerator_micros),
+        min_cpu_micros(min_cpu_micros),
         min_params(min_params),
         min_float_ops(min_float_ops),
         min_occurrence(min_occurrence),
@@ -131,7 +149,12 @@ struct Options {
 
   int max_depth;
   tensorflow::int64 min_bytes;
+  tensorflow::int64 min_peak_bytes;
+  tensorflow::int64 min_residual_bytes;
+  tensorflow::int64 min_output_bytes;
   tensorflow::int64 min_micros;
+  tensorflow::int64 min_accelerator_micros;
+  tensorflow::int64 min_cpu_micros;
   tensorflow::int64 min_params;
   tensorflow::int64 min_float_ops;
   tensorflow::int64 min_occurrence;
diff --git a/tensorflow/core/profiler/internal/tfprof_show.cc b/tensorflow/core/profiler/internal/tfprof_show.cc
index 630eba4ff2e..cf28876089d 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show.cc
@@ -73,8 +73,14 @@ bool TFShow::ShouldShow(const ShowNode* node, const Options& opts,
   // Always show kTFProfRoot.
   if (node->name() == kTFProfRoot) return true;
 
-  if (node->proto().requested_bytes() < opts.min_bytes ||
-      node->proto().exec_micros() < opts.min_micros ||
+  if (node->proto().total_requested_bytes() < opts.min_bytes ||
+      node->proto().total_peak_bytes() < opts.min_peak_bytes ||
+      node->proto().total_residual_bytes() < opts.min_residual_bytes ||
+      node->proto().total_output_bytes() < opts.min_output_bytes ||
+      node->proto().total_exec_micros() < opts.min_micros ||
+      node->proto().total_accelerator_exec_micros() <
+          opts.min_accelerator_micros ||
+      node->proto().total_cpu_exec_micros() < opts.min_cpu_micros ||
       node->proto().parameters() < opts.min_params ||
       node->proto().float_ops() < opts.min_float_ops ||
       node->proto().run_count() < opts.min_occurrence ||
@@ -128,6 +134,17 @@ bool TFShow::ReAccount(ShowNode* node, const Options& opts) {
   return false;
 }
 
+string TFShow::FormatNodeMemory(ShowNode* node, int64 bytes,
+                                int64 total_bytes) const {
+  string memory = FormatMemory(total_bytes);
+  if (node->account) {
+    memory = FormatMemory(bytes) + "/" + memory;
+  } else {
+    memory = "--/" + memory;
+  }
+  return memory;
+}
+
 string TFShow::FormatNode(ShowNode* node, const Options& opts) const {
   std::vector<string> info;
   if (opts.select.find(kShown[2]) != opts.select.end()) {
@@ -152,15 +169,22 @@ string TFShow::FormatNode(ShowNode* node, const Options& opts) const {
     }
     info.push_back(fops);
   }
+  std::vector<string> attrs;
   if (opts.select.find(kShown[0]) != opts.select.end()) {
-    string memory = FormatMemory(node->proto().total_requested_bytes());
-    if (node->account) {
-      memory = FormatMemory(node->proto().requested_bytes()) + "/" + memory;
-
-    } else {
-      memory = "--/" + memory;
-    }
-    info.push_back(memory);
+    info.push_back(FormatNodeMemory(node, node->proto().requested_bytes(),
+                                    node->proto().total_requested_bytes()));
+  }
+  if (opts.select.find(kShown[11]) != opts.select.end()) {
+    info.push_back(FormatNodeMemory(node, node->proto().peak_bytes(),
+                                    node->proto().total_peak_bytes()));
+  }
+  if (opts.select.find(kShown[12]) != opts.select.end()) {
+    info.push_back(FormatNodeMemory(node, node->proto().residual_bytes(),
+                                    node->proto().total_residual_bytes()));
+  }
+  if (opts.select.find(kShown[13]) != opts.select.end()) {
+    info.push_back(FormatNodeMemory(node, node->proto().output_bytes(),
+                                    node->proto().total_output_bytes()));
   }
   if (opts.select.find(kShown[1]) != opts.select.end()) {
     info.push_back(FormatTotalExecTime(node, opts));
@@ -225,6 +249,15 @@ string TFShow::FormatLegend(const Options& opts) const {
     legends.push_back("# float_ops");
   }
   if (opts.select.find(kShown[0]) != opts.select.end()) {
+    legends.push_back("requested bytes");
+  }
+  if (opts.select.find(kShown[11]) != opts.select.end()) {
+    legends.push_back("peak bytes");
+  }
+  if (opts.select.find(kShown[12]) != opts.select.end()) {
+    legends.push_back("residual bytes");
+  }
+  if (opts.select.find(kShown[13]) != opts.select.end()) {
     legends.push_back("output bytes");
   }
   if (opts.select.find(kShown[1]) != opts.select.end()) {
diff --git a/tensorflow/core/profiler/internal/tfprof_show.h b/tensorflow/core/profiler/internal/tfprof_show.h
index 2f7e0e62119..08c231bad7f 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.h
+++ b/tensorflow/core/profiler/internal/tfprof_show.h
@@ -67,6 +67,7 @@ class TFShow {
   bool ReAccount(ShowNode* node, const Options& opts);
 
   string FormatNode(ShowNode* node, const Options& opts) const;
+  string FormatNodeMemory(ShowNode* node, int64 bytes, int64 total_bytes) const;
 
   string FormatLegend(const Options& opts) const;
 
@@ -87,17 +88,25 @@ class TFShow {
         return n1->proto().total_requested_bytes() >
                n2->proto().total_requested_bytes();
       } else if (opts.order_by == kOrderBy[2]) {
+        return n1->proto().total_peak_bytes() > n2->proto().total_peak_bytes();
+      } else if (opts.order_by == kOrderBy[3]) {
+        return n1->proto().total_residual_bytes() >
+               n2->proto().total_residual_bytes();
+      } else if (opts.order_by == kOrderBy[4]) {
+        return n1->proto().total_output_bytes() >
+               n2->proto().total_output_bytes();
+      } else if (opts.order_by == kOrderBy[5]) {
         return n1->proto().total_exec_micros() >
                n2->proto().total_exec_micros();
-      } else if (opts.order_by == kOrderBy[3]) {
+      } else if (opts.order_by == kOrderBy[6]) {
         return n1->proto().total_accelerator_exec_micros() >
                n2->proto().total_accelerator_exec_micros();
-      } else if (opts.order_by == kOrderBy[4]) {
+      } else if (opts.order_by == kOrderBy[7]) {
         return n1->proto().total_cpu_exec_micros() >
                n2->proto().total_cpu_exec_micros();
-      } else if (opts.order_by == kOrderBy[5]) {
+      } else if (opts.order_by == kOrderBy[8]) {
         return n1->proto().total_parameters() > n2->proto().total_parameters();
-      } else if (opts.order_by == kOrderBy[6]) {
+      } else if (opts.order_by == kOrderBy[9]) {
         return n1->proto().total_float_ops() > n2->proto().total_float_ops();
       }
       return name_cmp;
diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.cc b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
index 34b3e9e3f07..eb826a71376 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_multi.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
@@ -65,7 +65,13 @@ bool TFMultiShow::ShouldShow(const ShowMultiNode* node, const Options& opts,
   // want to see the middle code traces (i.e. their own codes.), instead
   // of the TensorFlow internal codes traces.
   if (node->proto().total_requested_bytes() < opts.min_bytes ||
+      node->proto().total_peak_bytes() < opts.min_peak_bytes ||
+      node->proto().total_residual_bytes() < opts.min_residual_bytes ||
+      node->proto().total_output_bytes() < opts.min_output_bytes ||
       node->proto().total_exec_micros() < opts.min_micros ||
+      node->proto().total_accelerator_exec_micros() <
+          opts.min_accelerator_micros ||
+      node->proto().total_cpu_exec_micros() < opts.min_cpu_micros ||
       node->proto().total_parameters() < opts.min_params ||
       node->proto().total_float_ops() < opts.min_float_ops ||
       depth > opts.max_depth || !ShouldShowIfExtra(node, opts, depth)) {
@@ -109,6 +115,15 @@ bool TFMultiShow::ReAccount(ShowMultiNode* node, const Options& opts) {
 string TFMultiShow::FormatLegend(const Options& opts) const {
   std::vector<string> legends;
   if (opts.select.find(kShown[0]) != opts.select.end()) {
+    legends.push_back("requested bytes");
+  }
+  if (opts.select.find(kShown[11]) != opts.select.end()) {
+    legends.push_back("peak bytes");
+  }
+  if (opts.select.find(kShown[12]) != opts.select.end()) {
+    legends.push_back("residual bytes");
+  }
+  if (opts.select.find(kShown[13]) != opts.select.end()) {
     legends.push_back("output bytes");
   }
   if (opts.select.find(kShown[1]) != opts.select.end()) {
diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.h b/tensorflow/core/profiler/internal/tfprof_show_multi.h
index f731f6afbb3..a632c669336 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_multi.h
+++ b/tensorflow/core/profiler/internal/tfprof_show_multi.h
@@ -90,21 +90,30 @@ class TFMultiShow {
                   return n1->proto().total_requested_bytes() >
                          n2->proto().total_requested_bytes();
                 } else if (opts.order_by == kOrderBy[2]) {
+                  return n1->proto().total_peak_bytes() >
+                         n2->proto().total_peak_bytes();
+                } else if (opts.order_by == kOrderBy[3]) {
+                  return n1->proto().total_residual_bytes() >
+                         n2->proto().total_residual_bytes();
+                } else if (opts.order_by == kOrderBy[4]) {
+                  return n1->proto().total_output_bytes() >
+                         n2->proto().total_output_bytes();
+                } else if (opts.order_by == kOrderBy[5]) {
                   return n1->proto().total_exec_micros() >
                          n2->proto().total_exec_micros();
-                } else if (opts.order_by == kOrderBy[3]) {
+                } else if (opts.order_by == kOrderBy[6]) {
                   return n1->proto().total_accelerator_exec_micros() >
                          n2->proto().total_accelerator_exec_micros();
-                } else if (opts.order_by == kOrderBy[4]) {
+                } else if (opts.order_by == kOrderBy[7]) {
                   return n1->proto().total_cpu_exec_micros() >
                          n2->proto().total_cpu_exec_micros();
-                } else if (opts.order_by == kOrderBy[5]) {
+                } else if (opts.order_by == kOrderBy[8]) {
                   return n1->proto().total_parameters() >
                          n2->proto().total_parameters();
-                } else if (opts.order_by == kOrderBy[6]) {
+                } else if (opts.order_by == kOrderBy[9]) {
                   return n1->proto().total_float_ops() >
                          n2->proto().total_float_ops();
-                } else if (opts.order_by == kOrderBy[7]) {
+                } else if (opts.order_by == kOrderBy[10]) {
                   return n1->node->graph_nodes().size() >
                          n2->node->graph_nodes().size();
                 }
diff --git a/tensorflow/core/profiler/internal/tfprof_show_test.cc b/tensorflow/core/profiler/internal/tfprof_show_test.cc
index e2ba113e9bd..f2c8b662d08 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_test.cc
@@ -22,12 +22,12 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
 #include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -72,91 +72,80 @@ class TFProfShowTest : public ::testing::Test {
 };
 
 TEST_F(TFProfShowTest, DumpScopeMode) {
-  string dump_file = io::JoinPath(testing::TmpDir(), "dump");
-  Options opts(5, 0, 0, 0, 0, 0, -1, "name",
-               {"VariableV2"},  // accout_type_regexes
-               {".*"}, {""}, {".*"}, {""}, false,
-               {"params", "bytes", "micros", "float_ops"}, "file",
-               {{"outfile", dump_file}});
-  tf_stats_->ShowGraphNode("scope", opts);
-
-  string dump_str;
-  TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
-  EXPECT_EQ(
-      "node name | # parameters | # float_ops | output bytes | total execution "
-      "time | accelerator execution time | cpu execution time\n_TFProfRoot "
-      "(--/370 params, --/0 flops, --/1.48KB, --/5us, --/0us, --/5us)\n  "
-      "conv2d (--/140 params, --/0 flops, --/560B, --/2us, --/0us, --/2us)\n   "
-      " conv2d/bias (5, 5/5 params, 0/0 flops, 20B/20B, 1us/1us, 0us/0us, "
-      "1us/1us)\n    conv2d/kernel (3x3x3x5, 135/135 params, 0/0 flops, "
-      "540B/540B, 1us/1us, 0us/0us, 1us/1us)\n  conv2d_1 (--/230 params, --/0 "
-      "flops, --/920B, --/3us, --/0us, --/3us)\n    conv2d_1/bias (5, 5/5 "
-      "params, 0/0 flops, 20B/20B, 1us/1us, 0us/0us, 1us/1us)\n    "
-      "conv2d_1/kernel (3x3x5x5, 225/225 params, 0/0 flops, 900B/900B, "
-      "2us/2us, 0us/0us, 2us/2us)\n",
-      dump_str);
-}
-
-TEST_F(TFProfShowTest, DumpAcceleratorAndCPUMicros) {
   string dump_file = io::JoinPath(testing::TmpDir(), "dump");
   Options opts(
-      5, 0, 0, 0, 0, 0, -1, "cpu_micros", {".*"},  // accout_type_regexes
-      {".*"}, {""}, {".*"}, {""}, false, {"accelerator_micros", "cpu_micros"},
+      5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "name",
+      {"VariableV2"},  // accout_type_regexes
+      {".*"}, {""}, {".*"}, {""}, false,
+      {"params", "bytes", "peak_bytes", "residual_bytes", "output_bytes",
+       "micros", "accelerator_micros", "cpu_micros", "float_ops"},
       "file", {{"outfile", dump_file}});
   tf_stats_->ShowGraphNode("scope", opts);
 
+  string dump_str;
+  TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
+  EXPECT_EQ(
+      "node name | # parameters | # float_ops | requested bytes | peak bytes | "
+      "residual bytes | output bytes | total execution time | accelerator "
+      "execution time | cpu execution time\n_TFProfRoot (--/451 params, --/0 "
+      "flops, --/0B, --/0B, --/0B, --/2.56KB, --/13us, --/0us, --/13us)\n  DW "
+      "(3x3x3x6, 162/162 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, "
+      "1.28KB/1.28KB, 2us/2us, 0us/0us, 2us/2us)\n  DW2 (2x2x6x12, 288/288 "
+      "params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, 1.28KB/1.28KB, 11us/11us, "
+      "0us/0us, 11us/11us)\n  ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, "
+      "0B/0B, 0B/0B, 0us/0us, 0us/0us, 0us/0us)\n",
+      dump_str);
+}
+
+TEST_F(TFProfShowTest, DumpAcceleratorAndCPUMicros) {
+  string dump_file = io::JoinPath(testing::TmpDir(), "dump");
+  Options opts(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "cpu_micros",
+               {".*"},  // accout_type_regexes
+               {".*"}, {""}, {".*"}, {""}, false,
+               {"accelerator_micros", "cpu_micros"}, "file",
+               {{"outfile", dump_file}});
+  tf_stats_->ShowGraphNode("scope", opts);
+
   string dump_str;
   TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
   EXPECT_EQ(
       "node name | accelerator execution time | cpu execution "
-      "time\n_TFProfRoot (--/0us, --/97us)\n  conv2d (0us/0us, 0us/76us)\n    "
-      "conv2d/convolution (0us/0us, 60us/60us)\n      conv2d/convolution/Shape "
-      "(0us/0us, 0us/0us)\n      conv2d/convolution/dilation_rate (0us/0us, "
-      "0us/0us)\n    conv2d/BiasAdd (0us/0us, 12us/12us)\n    conv2d/bias "
-      "(0us/0us, 1us/2us)\n      conv2d/bias/read (0us/0us, 1us/1us)\n      "
-      "conv2d/bias/Assign (0us/0us, 0us/0us)\n      conv2d/bias/Initializer "
-      "(0us/0us, 0us/0us)\n        conv2d/bias/Initializer/Const (0us/0us, "
-      "0us/0us)\n    conv2d/kernel (0us/0us, 1us/2us)\n      "
-      "conv2d/kernel/read (0us/0us, 1us/1us)\n      conv2d/kernel/Assign "
-      "(0us/0us, 0us/0us)\n      conv2d/kernel/Initializer (0us/0us, "
-      "0us/0us)\n        conv2d/kernel/Initializer/random_uniform (0us/0us, "
-      "0us/0us)\n  conv2d_2 (0us/0us, 0us/15us)\n    conv2d_2/convolution "
-      "(0us/0us, 13us/13us)\n      conv2d_2/convolution/Shape (0us/0us, "
-      "0us/0us)\n      conv2d_2/convolution/dilation_rate (0us/0us, 0us/0us)\n "
-      "   conv2d_2/BiasAdd (0us/0us, 2us/2us)\n  conv2d_1 (0us/0us, 0us/5us)\n "
-      "   conv2d_1/kernel (0us/0us, 2us/3us)\n      conv2d_1/kernel/read "
-      "(0us/0us, 1us/1us)\n      conv2d_1/kernel/Assign (0us/0us, 0us/0us)\n   "
-      "   conv2d_1/kernel/Initializer (0us/0us, 0us/0us)\n        "
-      "conv2d_1/kernel/Initializer/random_uniform (0us/0us, 0us/0us)\n    "
-      "conv2d_1/bias (0us/0us, 1us/2us)\n      conv2d_1/bias/read (0us/0us, "
-      "1us/1us)\n      conv2d_1/bias/Assign (0us/0us, 0us/0us)\n      "
-      "conv2d_1/bias/Initializer (0us/0us, 0us/0us)\n        "
-      "conv2d_1/bias/Initializer/Const (0us/0us, 0us/0us)\n  zeros (0us/0us, "
-      "1us/1us)\n  init (0us/0us, 0us/0us)\n  save (0us/0us, 0us/0us)\n    "
-      "save/Assign (0us/0us, 0us/0us)\n    save/Assign_1 (0us/0us, 0us/0us)\n  "
-      "  save/Assign_2 (0us/0us, 0us/0us)\n    save/Assign_3 (0us/0us, "
-      "0us/0us)\n    save/Const (0us/0us, 0us/0us)\n    save/RestoreV2 "
-      "(0us/0us, 0us/0us)\n      save/RestoreV2/shape_and_slices (0us/0us, "
-      "0us/0us)\n      save/RestoreV2/tensor_names (0us/0us, 0us/0us)\n    "
-      "save/RestoreV2_1 (0us/0us, 0us/0us)\n      "
-      "save/RestoreV2_1/shape_and_slices (0us/0us, 0us/0us)\n      "
-      "save/RestoreV2_1/tensor_names (0us/0us, 0us/0us)\n    save/RestoreV2_2 "
-      "(0us/0us, 0us/0us)\n      save/RestoreV2_2/shape_and_slices (0us/0us, "
-      "0us/0us)\n      save/RestoreV2_2/tensor_names (0us/0us, 0us/0us)\n    "
-      "save/RestoreV2_3 (0us/0us, 0us/0us)\n      "
-      "save/RestoreV2_3/shape_and_slices (0us/0us, 0us/0us)\n      "
-      "save/RestoreV2_3/tensor_names (0us/0us, 0us/0us)\n    save/SaveV2 "
-      "(0us/0us, 0us/0us)\n      save/SaveV2/shape_and_slices (0us/0us, "
-      "0us/0us)\n      save/SaveV2/tensor_names (0us/0us, 0us/0us)\n    "
-      "save/control_dependency (0us/0us, 0us/0us)\n    save/restore_all "
-      "(0us/0us, 0us/0us)\n",
+      "time\n_TFProfRoot (--/404us, --/4.50ms)\n  Conv2D (226us/226us, "
+      "4.07ms/4.07ms)\n  Conv2D_1 (178us/178us, 419us/419us)\n  DW2 (0us/0us, "
+      "11us/11us)\n    DW2/Assign (0us/0us, 0us/0us)\n    DW2/Initializer "
+      "(0us/0us, 0us/0us)\n      DW2/Initializer/random_normal (0us/0us, "
+      "0us/0us)\n        DW2/Initializer/random_normal/RandomStandardNormal "
+      "(0us/0us, 0us/0us)\n        DW2/Initializer/random_normal/mean "
+      "(0us/0us, 0us/0us)\n        DW2/Initializer/random_normal/mul (0us/0us, "
+      "0us/0us)\n        DW2/Initializer/random_normal/shape (0us/0us, "
+      "0us/0us)\n        DW2/Initializer/random_normal/stddev (0us/0us, "
+      "0us/0us)\n    DW2/read (0us/0us, 0us/0us)\n  DW (0us/0us, 2us/2us)\n    "
+      "DW/Assign (0us/0us, 0us/0us)\n    DW/Initializer (0us/0us, 0us/0us)\n   "
+      "   DW/Initializer/random_normal (0us/0us, 0us/0us)\n        "
+      "DW/Initializer/random_normal/RandomStandardNormal (0us/0us, 0us/0us)\n  "
+      "      DW/Initializer/random_normal/mean (0us/0us, 0us/0us)\n        "
+      "DW/Initializer/random_normal/mul (0us/0us, 0us/0us)\n        "
+      "DW/Initializer/random_normal/shape (0us/0us, 0us/0us)\n        "
+      "DW/Initializer/random_normal/stddev (0us/0us, 0us/0us)\n    DW/read "
+      "(0us/0us, 0us/0us)\n  zeros (0us/0us, 2us/2us)\n  ScalarW (0us/0us, "
+      "0us/0us)\n    ScalarW/Assign (0us/0us, 0us/0us)\n    "
+      "ScalarW/Initializer (0us/0us, 0us/0us)\n      "
+      "ScalarW/Initializer/random_normal (0us/0us, 0us/0us)\n        "
+      "ScalarW/Initializer/random_normal/RandomStandardNormal (0us/0us, "
+      "0us/0us)\n        ScalarW/Initializer/random_normal/mean (0us/0us, "
+      "0us/0us)\n        ScalarW/Initializer/random_normal/mul (0us/0us, "
+      "0us/0us)\n        ScalarW/Initializer/random_normal/shape (0us/0us, "
+      "0us/0us)\n        ScalarW/Initializer/random_normal/stddev (0us/0us, "
+      "0us/0us)\n    ScalarW/read (0us/0us, 0us/0us)\n  init (0us/0us, "
+      "0us/0us)\n",
       dump_str);
 }
 
 TEST_F(TFProfShowTest, DumpOpMode) {
   string dump_file = io::JoinPath(testing::TmpDir(), "dump");
   Options opts(
-      5, 0, 0, 0, 0, 4, -1, "params", {".*"},  // accout_type_regexes
+      5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, "params",
+      {".*"},  // accout_type_regexes
       {".*"}, {""}, {".*"}, {""}, false,
       {"params", "bytes", "micros", "float_ops", "occurrence", "input_shapes"},
       "file", {{"outfile", dump_file}});
@@ -165,17 +154,32 @@ TEST_F(TFProfShowTest, DumpOpMode) {
   string dump_str;
   TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
   EXPECT_EQ(
-      "nodename|outputbytes|totalexecutiontime|acceleratorexecutiontime|"
+      "nodename|requestedbytes|totalexecutiontime|acceleratorexecutiontime|"
       "cpuexecutiontime|#parameters|#float_ops|opoccurrence(run|defined)|"
-      "inputshapes\nVariableV21.48KB(100.00%,17.10%),5us(100.00%,5.15%),0us(0."
-      "00%,0.00%),5us(100.00%,5.15%),370params(100.00%,100.00%),0float_ops(100."
-      "00%,0.00%),4|4\n\ninput_type:\t(run*4|defined*4)\texec_time:"
-      "5us\n\nAssign0B(0.00%,0.00%),0us(94.85%,0.00%),0us(0.00%,0.00%),0us(94."
-      "85%,0.00%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|8\n\ninput_"
-      "type:0:unknown,\t1:unknown\t(run*0|defined*8)\texec_time:0us\n\nConst1."
-      "54KB(58.87%,17.74%),1us(80.41%,1.03%),0us(0.00%,0.00%),1us(80.41%,1.03%)"
-      ",0params(0.00%,0.00%),0float_ops(98.49%,0.00%),1|24\n\ninput_type:\t("
-      "run*1|defined*24)\texec_time:1us\n\n",
+      "inputshapes\nVariableV20B(0.00%,0.00%),13us(100.00%,0.27%),0us(100.00%,"
+      "0.00%),13us(100.00%,0.29%),451params(100.00%,100.00%),0float_ops(100.00%"
+      ",0.00%),2|3\n\ninput_type:\t(run*2|defined*3)\texec_time:13us\n\nAdd0B("
+      "0.00%,0.00%),0us(99.73%,0.00%),0us(100.00%,0.00%),0us(99.71%,0.00%),"
+      "0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|3\n\ninput_type:0:1,"
+      "\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12,\t1:1\t("
+      "run*0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6,\t1:1\t(run*0|"
+      "defined*1)\texec_time:0us\n\nAssign0B(0.00%,0.00%),0us(99.73%,0.00%),"
+      "0us(100.00%,0.00%),0us(99.71%,0.00%),0params(0.00%,0.00%),0float_ops("
+      "100.00%,0.00%),0|3\n\ninput_type:0:1,\t1:1\t(run*0|defined*1)\texec_"
+      "time:0us\ninput_type:0:2x2x6x12,\t1:2x2x6x12\t(run*0|defined*1)\texec_"
+      "time:0us\ninput_type:0:3x3x3x6,\t1:3x3x3x6\t(run*0|defined*1)\texec_"
+      "time:0us\n\nConst0B(0.00%,0.00%),2us(99.73%,0.04%),0us(100.00%,0.00%),"
+      "2us(99.71%,0.04%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),1|"
+      "10\n\ninput_type:\t(run*1|defined*10)\texec_time:2us\n\nConv2D14.59KB("
+      "100.00%,100.00%),4.89ms(99.69%,99.69%),404us(100.00%,100.00%),4.49ms(99."
+      "67%,99.67%),0params(0.00%,0.00%),10.44kfloat_ops(100.00%,100.00%),2|"
+      "2\n\ninput_type:0:2x3x3x6,\t1:2x2x6x12\t(run*1|defined*1)\texec_time:"
+      "597us\ninput_type:0:2x6x6x3,\t1:3x3x3x6\t(run*1|defined*1)\texec_time:4."
+      "29ms\n\nIdentity0B(0.00%,0.00%),0us(0.00%,0.00%),0us(0.00%,0.00%),0us(0."
+      "00%,0.00%),0params(0.00%,0.00%),0float_ops(0.00%,0.00%),0|3\n\ninput_"
+      "type:0:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12\t(run*"
+      "0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6\t(run*0|defined*1)"
+      "\texec_time:0us\n\n",
       StringReplace(dump_str, " ", ""));
 }
 }  // namespace tfprof
diff --git a/tensorflow/core/profiler/internal/tfprof_stats_test.cc b/tensorflow/core/profiler/internal/tfprof_stats_test.cc
index 8744f5be285..e67c1585214 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats_test.cc
@@ -23,12 +23,12 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
 #include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -73,7 +73,7 @@ class TFProfStatsTest : public ::testing::Test {
 };
 
 TEST_F(TFProfStatsTest, CustomOpType) {
-  Options opts(3, 0, 0, 0, 0, 0, -1, "name",
+  Options opts(3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "name",
                {kTrainableVarType},  // accout_type_regexes
                {".*"}, {""}, {".*"}, {""}, false,
                {"params", "bytes", "micros", "float_ops"}, "", {});
@@ -81,62 +81,27 @@ TEST_F(TFProfStatsTest, CustomOpType) {
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
-      "0\ntotal_exec_micros: 5\ntotal_requested_bytes: 1480\ntotal_parameters: "
-      "370\nchildren {\n  name: \"conv2d\"\n  exec_micros: 0\n  "
-      "requested_bytes: 0\n  total_exec_micros: 2\n  total_requested_bytes: "
-      "560\n  total_parameters: 140\n  children {\n    name: \"conv2d/bias\"\n "
-      "   exec_micros: 1\n    requested_bytes: 20\n    parameters: 5\n    "
-      "total_exec_micros: 1\n    total_requested_bytes: 20\n    "
-      "total_parameters: 5\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 1\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 1\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  children {\n    name: "
-      "\"conv2d/kernel\"\n    exec_micros: 1\n    requested_bytes: 540\n    "
-      "parameters: 135\n    total_exec_micros: 1\n    total_requested_bytes: "
-      "540\n    total_parameters: 135\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 1\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 1\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  float_ops: 0\n  total_float_ops: 0\n  "
-      "accelerator_exec_micros: 0\n  cpu_exec_micros: 0\n  "
-      "total_accelerator_exec_micros: 0\n  total_cpu_exec_micros: 2\n  "
-      "run_count: 0\n  total_run_count: 2\n  total_definition_count: "
-      "3\n}\nchildren {\n  name: \"conv2d_1\"\n  exec_micros: 0\n  "
-      "requested_bytes: 0\n  total_exec_micros: 3\n  total_requested_bytes: "
-      "920\n  total_parameters: 230\n  children {\n    name: "
-      "\"conv2d_1/bias\"\n    exec_micros: 1\n    requested_bytes: 20\n    "
-      "parameters: 5\n    total_exec_micros: 1\n    total_requested_bytes: "
-      "20\n    total_parameters: 5\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 1\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 1\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  children {\n    name: "
-      "\"conv2d_1/kernel\"\n    exec_micros: 2\n    requested_bytes: 900\n    "
-      "parameters: 225\n    total_exec_micros: 2\n    total_requested_bytes: "
-      "900\n    total_parameters: 225\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 2\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 2\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  float_ops: 0\n  total_float_ops: 0\n  "
-      "accelerator_exec_micros: 0\n  cpu_exec_micros: 0\n  "
-      "total_accelerator_exec_micros: 0\n  total_cpu_exec_micros: 3\n  "
-      "run_count: 0\n  total_run_count: 2\n  total_definition_count: "
-      "3\n}\nfloat_ops: 0\ntotal_float_ops: 0\naccelerator_exec_micros: "
-      "0\ncpu_exec_micros: 0\ntotal_accelerator_exec_micros: "
-      "0\ntotal_cpu_exec_micros: 5\nrun_count: 0\ntotal_run_count: "
-      "4\ntotal_definition_count: 6\n",
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: "
+      "451\nchildren {\n  name: \"DW\"\n  exec_micros: 2\n  parameters: 162\n  "
+      "total_exec_micros: 2\n  total_parameters: 162\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 2\n  "
+      "total_cpu_exec_micros: 2\n  run_count: 1\n  total_run_count: 1\n  "
+      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
+      "1280\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 11\n  parameters: "
+      "288\n  total_exec_micros: 11\n  total_parameters: 288\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 11\n  "
+      "total_cpu_exec_micros: 11\n  run_count: 1\n  total_run_count: 1\n  "
+      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
+      "1280\n}\nchildren {\n  name: \"ScalarW\"\n  parameters: 1\n  "
+      "total_parameters: 1\n  total_definition_count: "
+      "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: "
+      "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
 
 TEST_F(TFProfStatsTest, CheckPointOpType) {
-  Options opts(3, 0, 0, 0, 0, 0, -1, "name",
+  Options opts(3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "name",
                {kCkptVarType},  // accout_type_regexes
                {".*"}, {""}, {".*"}, {""}, false,
                {"params", "bytes", "micros", "float_ops"}, "", {});
@@ -144,169 +109,235 @@ TEST_F(TFProfStatsTest, CheckPointOpType) {
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
-      "0\ntotal_exec_micros: 5\ntotal_requested_bytes: 1480\ntotal_parameters: "
-      "370\nchildren {\n  name: \"conv2d\"\n  exec_micros: 0\n  "
-      "requested_bytes: 0\n  total_exec_micros: 2\n  total_requested_bytes: "
-      "560\n  total_parameters: 140\n  children {\n    name: \"conv2d/bias\"\n "
-      "   exec_micros: 1\n    requested_bytes: 20\n    parameters: 5\n    "
-      "total_exec_micros: 1\n    total_requested_bytes: 20\n    "
-      "total_parameters: 5\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 1\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 1\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  children {\n    name: "
-      "\"conv2d/kernel\"\n    exec_micros: 1\n    requested_bytes: 540\n    "
-      "parameters: 135\n    total_exec_micros: 1\n    total_requested_bytes: "
-      "540\n    total_parameters: 135\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 1\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 1\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  float_ops: 0\n  total_float_ops: 0\n  "
-      "accelerator_exec_micros: 0\n  cpu_exec_micros: 0\n  "
-      "total_accelerator_exec_micros: 0\n  total_cpu_exec_micros: 2\n  "
-      "run_count: 0\n  total_run_count: 2\n  total_definition_count: "
-      "3\n}\nchildren {\n  name: \"conv2d_1\"\n  exec_micros: 0\n  "
-      "requested_bytes: 0\n  total_exec_micros: 3\n  total_requested_bytes: "
-      "920\n  total_parameters: 230\n  children {\n    name: "
-      "\"conv2d_1/bias\"\n    exec_micros: 1\n    requested_bytes: 20\n    "
-      "parameters: 5\n    total_exec_micros: 1\n    total_requested_bytes: "
-      "20\n    total_parameters: 5\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 1\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 1\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  children {\n    name: "
-      "\"conv2d_1/kernel\"\n    exec_micros: 2\n    requested_bytes: 900\n    "
-      "parameters: 225\n    total_exec_micros: 2\n    total_requested_bytes: "
-      "900\n    total_parameters: 225\n    devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n    float_ops: 0\n    "
-      "total_float_ops: 0\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 2\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 2\n    run_count: 1\n    total_run_count: 1\n    "
-      "total_definition_count: 1\n  }\n  float_ops: 0\n  total_float_ops: 0\n  "
-      "accelerator_exec_micros: 0\n  cpu_exec_micros: 0\n  "
-      "total_accelerator_exec_micros: 0\n  total_cpu_exec_micros: 3\n  "
-      "run_count: 0\n  total_run_count: 2\n  total_definition_count: "
-      "3\n}\nfloat_ops: 0\ntotal_float_ops: 0\naccelerator_exec_micros: "
-      "0\ncpu_exec_micros: 0\ntotal_accelerator_exec_micros: "
-      "0\ntotal_cpu_exec_micros: 5\nrun_count: 0\ntotal_run_count: "
-      "4\ntotal_definition_count: 6\n",
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: "
+      "451\nchildren {\n  name: \"DW\"\n  exec_micros: 2\n  parameters: 162\n  "
+      "total_exec_micros: 2\n  total_parameters: 162\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 2\n  "
+      "total_cpu_exec_micros: 2\n  run_count: 1\n  total_run_count: 1\n  "
+      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
+      "1280\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 11\n  parameters: "
+      "288\n  total_exec_micros: 11\n  total_parameters: 288\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 11\n  "
+      "total_cpu_exec_micros: 11\n  run_count: 1\n  total_run_count: 1\n  "
+      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
+      "1280\n}\nchildren {\n  name: \"ScalarW\"\n  parameters: 1\n  "
+      "total_parameters: 1\n  total_definition_count: "
+      "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: "
+      "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
 
 TEST_F(TFProfStatsTest, TestGraph) {
-  Options opts(100, 0, 10000, 0, 0, 0, -1, "name", {".*"},
-               {"cost.*"},  // start_name_regexes
+  Options opts(100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "name", {".*"},
+               {"DW/Initializer/random_normal/mul"},  // start_name_regexes
                {""}, {".*"}, {""}, false,
                {"params", "bytes", "micros", "float_ops"}, "", {});
   const GraphNodeProto& root = tf_stats_->ShowGraphNode("graph", opts);
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
-      "0\ntotal_exec_micros: 97\ntotal_requested_bytes: "
-      "8656\ntotal_parameters: 370\nfloat_ops: 0\ntotal_float_ops: "
-      "34360\naccelerator_exec_micros: 0\ncpu_exec_micros: "
-      "0\ntotal_accelerator_exec_micros: 0\ntotal_cpu_exec_micros: "
-      "97\nrun_count: 0\ntotal_run_count: 13\ntotal_definition_count: 60\n",
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 4904\ntotal_requested_bytes: "
+      "14592\ntotal_parameters: 451\nchildren {\n  name: "
+      "\"DW/Initializer/random_normal/mul\"\n  children {\n    name: "
+      "\"DW/Initializer/random_normal/RandomStandardNormal\"\n    children {\n "
+      "     name: \"DW/Initializer/random_normal/shape\"\n      "
+      "total_definition_count: 1\n    }\n    input_shapes {\n      key: 0\n    "
+      "  value {\n        dim {\n          size: 4\n        }\n      }\n    "
+      "}\n    total_definition_count: 2\n  }\n  children {\n    name: "
+      "\"DW/Initializer/random_normal/stddev\"\n    total_definition_count: "
+      "1\n  }\n  input_shapes {\n    key: 0\n    value {\n      dim {\n        "
+      "size: 3\n      }\n      dim {\n        size: 3\n      }\n      dim {\n  "
+      "      size: 3\n      }\n      dim {\n        size: 6\n      }\n    }\n  "
+      "}\n  input_shapes {\n    key: 1\n    value {\n      dim {\n        "
+      "size: 1\n      }\n    }\n  }\n  total_definition_count: "
+      "4\n}\ntotal_float_ops: 10440\ntotal_accelerator_exec_micros: "
+      "404\ntotal_cpu_exec_micros: 4500\ntotal_run_count: "
+      "5\ntotal_definition_count: 31\ntotal_peak_bytes: "
+      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
 
 TEST_F(TFProfStatsTest, TestFloatOps) {
-  Options opts(10, 0, 0, 0, 1, 0, -1, "name", {".*"}, {".*"}, {""}, {".*"},
-               {""}, false, {"float_ops"}, "", {});
+  Options opts(10, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, -1, "name", {".*"}, {".*"},
+               {""}, {".*"}, {""}, false, {"float_ops"}, "", {});
   const GraphNodeProto& root = tf_stats_->ShowGraphNode("scope", opts);
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
-      "0\ntotal_exec_micros: 97\ntotal_requested_bytes: "
-      "8656\ntotal_parameters: 370\nchildren {\n  name: \"conv2d/BiasAdd\"\n  "
-      "exec_micros: 12\n  requested_bytes: 1440\n  total_exec_micros: 12\n  "
-      "total_requested_bytes: 1440\n  total_parameters: 0\n  devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 360\n  "
-      "total_float_ops: 360\n  input_shapes {\n    key: 0\n    value {\n      "
-      "unknown_rank: true\n    }\n  }\n  input_shapes {\n    key: 1\n    value "
-      "{\n      unknown_rank: true\n    }\n  }\n  accelerator_exec_micros: 0\n "
-      " cpu_exec_micros: 12\n  total_accelerator_exec_micros: 0\n  "
-      "total_cpu_exec_micros: 12\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n}\nchildren {\n  name: "
-      "\"conv2d/convolution\"\n  exec_micros: 60\n  requested_bytes: 1440\n  "
-      "total_exec_micros: 60\n  total_requested_bytes: 1440\n  "
-      "total_parameters: 0\n  devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 19440\n  "
-      "total_float_ops: 19440\n  input_shapes {\n    key: 0\n    value {\n     "
-      " unknown_rank: true\n    }\n  }\n  input_shapes {\n    key: 1\n    "
-      "value {\n      unknown_rank: true\n    }\n  }\n  "
-      "accelerator_exec_micros: 0\n  cpu_exec_micros: 60\n  "
-      "total_accelerator_exec_micros: 0\n  total_cpu_exec_micros: 60\n  "
-      "run_count: 1\n  total_run_count: 1\n  total_definition_count: "
-      "3\n}\nchildren {\n  name: \"conv2d_2/BiasAdd\"\n  exec_micros: 2\n  "
-      "requested_bytes: 640\n  total_exec_micros: 2\n  total_requested_bytes: "
-      "640\n  total_parameters: 0\n  devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 160\n  "
-      "total_float_ops: 160\n  input_shapes {\n    key: 0\n    value {\n      "
-      "unknown_rank: true\n    }\n  }\n  input_shapes {\n    key: 1\n    value "
-      "{\n      unknown_rank: true\n    }\n  }\n  accelerator_exec_micros: 0\n "
-      " cpu_exec_micros: 2\n  total_accelerator_exec_micros: 0\n  "
-      "total_cpu_exec_micros: 2\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n}\nchildren {\n  name: "
-      "\"conv2d_2/convolution\"\n  exec_micros: 13\n  requested_bytes: 640\n  "
-      "total_exec_micros: 13\n  total_requested_bytes: 640\n  "
-      "total_parameters: 0\n  devices: "
-      "\"/job:localhost/replica:0/task:0/cpu:0\"\n  float_ops: 14400\n  "
-      "total_float_ops: 14400\n  input_shapes {\n    key: 0\n    value {\n     "
-      " unknown_rank: true\n    }\n  }\n  input_shapes {\n    key: 1\n    "
-      "value {\n      unknown_rank: true\n    }\n  }\n  "
-      "accelerator_exec_micros: 0\n  cpu_exec_micros: 13\n  "
-      "total_accelerator_exec_micros: 0\n  total_cpu_exec_micros: 13\n  "
-      "run_count: 1\n  total_run_count: 1\n  total_definition_count: "
-      "3\n}\nfloat_ops: 0\ntotal_float_ops: 34360\naccelerator_exec_micros: "
-      "0\ncpu_exec_micros: 0\ntotal_accelerator_exec_micros: "
-      "0\ntotal_cpu_exec_micros: 97\nrun_count: 0\ntotal_run_count: "
-      "13\ntotal_definition_count: 68\n",
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 4904\ntotal_requested_bytes: "
+      "14592\ntotal_parameters: 451\nchildren {\n  name: \"Conv2D\"\n  "
+      "exec_micros: 4292\n  requested_bytes: 9472\n  total_exec_micros: 4292\n "
+      " total_requested_bytes: 9472\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 5832\n  "
+      "total_float_ops: 5832\n  input_shapes {\n    key: 0\n    value {\n      "
+      "dim {\n        size: 2\n      }\n      dim {\n        size: 6\n      "
+      "}\n      dim {\n        size: 6\n      }\n      dim {\n        size: "
+      "3\n      }\n    }\n  }\n  input_shapes {\n    key: 1\n    value {\n     "
+      " dim {\n        size: 3\n      }\n      dim {\n        size: 3\n      "
+      "}\n      dim {\n        size: 3\n      }\n      dim {\n        size: "
+      "6\n      }\n    }\n  }\n  accelerator_exec_micros: 226\n  "
+      "cpu_exec_micros: 4066\n  total_accelerator_exec_micros: 226\n  "
+      "total_cpu_exec_micros: 4066\n  run_count: 1\n  total_run_count: 1\n  "
+      "total_definition_count: 1\n  peak_bytes: 5888\n  residual_bytes: 768\n  "
+      "output_bytes: 768\n  total_peak_bytes: 5888\n  total_residual_bytes: "
+      "768\n  total_output_bytes: 768\n}\nchildren {\n  name: \"Conv2D_1\"\n  "
+      "exec_micros: 597\n  requested_bytes: 5120\n  total_exec_micros: 597\n  "
+      "total_requested_bytes: 5120\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 4608\n  "
+      "total_float_ops: 4608\n  input_shapes {\n    key: 0\n    value {\n      "
+      "dim {\n        size: 2\n      }\n      dim {\n        size: 3\n      "
+      "}\n      dim {\n        size: 3\n      }\n      dim {\n        size: "
+      "6\n      }\n    }\n  }\n  input_shapes {\n    key: 1\n    value {\n     "
+      " dim {\n        size: 2\n      }\n      dim {\n        size: 2\n      "
+      "}\n      dim {\n        size: 6\n      }\n      dim {\n        size: "
+      "12\n      }\n    }\n  }\n  accelerator_exec_micros: 178\n  "
+      "cpu_exec_micros: 419\n  total_accelerator_exec_micros: 178\n  "
+      "total_cpu_exec_micros: 419\n  run_count: 1\n  total_run_count: 1\n  "
+      "total_definition_count: 1\n  peak_bytes: 4096\n  residual_bytes: 512\n  "
+      "output_bytes: 512\n  total_peak_bytes: 4096\n  total_residual_bytes: "
+      "512\n  total_output_bytes: 512\n}\ntotal_float_ops: "
+      "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: "
+      "4500\ntotal_run_count: 5\ntotal_definition_count: 34\ntotal_peak_bytes: "
+      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
 
 TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
-  Options opts(100, 0, 0, 0, 0, 0, -1, "name", {".*"}, {".*"}, {""},
-               {"unit_2_1.*DW"},  // show_name_regexes.
-               {""}, true,        // account_displayed_op_only.
+  Options opts(100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "name", {".*"}, {".*"},
+               {""}, {"Conv2D_1"},  // show_name_regexes.
+               {""}, true,          // account_displayed_op_only.
                {"params"}, "", {});
   const GraphNodeProto& root = tf_stats_->ShowGraphNode("scope", opts);
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
-      "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
-      "0\nfloat_ops: 0\ntotal_float_ops: 0\naccelerator_exec_micros: "
-      "0\ncpu_exec_micros: 0\ntotal_accelerator_exec_micros: "
-      "0\ntotal_cpu_exec_micros: 0\nrun_count: 0\ntotal_run_count: "
-      "0\ntotal_definition_count: 1\n",
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 597\ntotal_requested_bytes: "
+      "5120\nchildren {\n  name: \"Conv2D_1\"\n  exec_micros: 597\n  "
+      "requested_bytes: 5120\n  total_exec_micros: 597\n  "
+      "total_requested_bytes: 5120\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 4608\n  "
+      "total_float_ops: 4608\n  input_shapes {\n    key: 0\n    value {\n      "
+      "dim {\n        size: 2\n      }\n      dim {\n        size: 3\n      "
+      "}\n      dim {\n        size: 3\n      }\n      dim {\n        size: "
+      "6\n      }\n    }\n  }\n  input_shapes {\n    key: 1\n    value {\n     "
+      " dim {\n        size: 2\n      }\n      dim {\n        size: 2\n      "
+      "}\n      dim {\n        size: 6\n      }\n      dim {\n        size: "
+      "12\n      }\n    }\n  }\n  accelerator_exec_micros: 178\n  "
+      "cpu_exec_micros: 419\n  total_accelerator_exec_micros: 178\n  "
+      "total_cpu_exec_micros: 419\n  run_count: 1\n  total_run_count: 1\n  "
+      "total_definition_count: 1\n  peak_bytes: 4096\n  residual_bytes: 512\n  "
+      "output_bytes: 512\n  total_peak_bytes: 4096\n  total_residual_bytes: "
+      "512\n  total_output_bytes: 512\n}\ntotal_float_ops: "
+      "4608\ntotal_accelerator_exec_micros: 178\ntotal_cpu_exec_micros: "
+      "419\ntotal_run_count: 1\ntotal_definition_count: 2\ntotal_peak_bytes: "
+      "4096\ntotal_residual_bytes: 512\ntotal_output_bytes: 512\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
 
 TEST_F(TFProfStatsTest, TestShowTensorValue) {
-  Options opts(10, 0, 0, 0, 0, 0, -1, "name", {".*"}, {".*"}, {""},
-               {"unit_1_0.*gamma"}, {""}, false,
+  Options opts(10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "name", {".*"}, {".*"},
+               {""}, {"DW"}, {""}, false,
                {"tensor_value"},  // Show tensor value from checkpoint.
                "", {});
   const GraphNodeProto& root = tf_stats_->ShowGraphNode("scope", opts);
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
-      "0\ntotal_exec_micros: 97\ntotal_requested_bytes: "
-      "8656\ntotal_parameters: 370\nfloat_ops: 0\ntotal_float_ops: "
-      "34360\naccelerator_exec_micros: 0\ncpu_exec_micros: "
-      "0\ntotal_accelerator_exec_micros: 0\ntotal_cpu_exec_micros: "
-      "97\nrun_count: 0\ntotal_run_count: 13\ntotal_definition_count: 68\n",
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 4904\ntotal_requested_bytes: "
+      "14592\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  "
+      "exec_micros: 2\n  parameters: 162\n  total_exec_micros: 2\n  "
+      "total_parameters: 162\n  devices: "
+      "\"/job:localhost/replica:0/task:0/gpu:0\"\n  tensor_value {\n    dtype: "
+      "DT_FLOAT\n    value_double: -0.000534315\n    value_double: "
+      "-0.00089602\n    value_double: -0.000417239\n    value_double: "
+      "0.00041444\n    value_double: 0.000780691\n    value_double: "
+      "-0.000559057\n    value_double: -0.000234623\n    value_double: "
+      "0.00013393\n    value_double: -0.00187574\n    value_double: "
+      "0.000785666\n    value_double: 0.000673294\n    value_double: "
+      "0.000653368\n    value_double: 0.000924489\n    value_double: "
+      "-0.000318373\n    value_double: -0.000385202\n    value_double: "
+      "-7.92661e-05\n    value_double: 2.70287e-05\n    value_double: "
+      "0.00152302\n    value_double: 8.04435e-05\n    value_double: "
+      "-0.00058102\n    value_double: 0.000244291\n    value_double: "
+      "-0.000438045\n    value_double: -0.000110199\n    value_double: "
+      "0.000731663\n    value_double: -0.0012326\n    value_double: "
+      "0.00064065\n    value_double: -0.00135203\n    value_double: "
+      "-6.42784e-05\n    value_double: -0.0011857\n    value_double: "
+      "-0.000487383\n    value_double: 3.41493e-05\n    value_double: "
+      "-0.00158447\n    value_double: 0.00168448\n    value_double: "
+      "0.00160946\n    value_double: -0.000600483\n    value_double: "
+      "0.000650259\n    value_double: -0.00109938\n    value_double: "
+      "-0.000842166\n    value_double: -0.0022673\n    value_double: "
+      "-0.00101941\n    value_double: -0.0011169\n    value_double: "
+      "-0.0013557\n    value_double: -1.46354e-05\n    value_double: "
+      "-1.05487e-05\n    value_double: -0.00092014\n    value_double: "
+      "0.00272874\n    value_double: 5.13942e-05\n    value_double: "
+      "-0.00223472\n    value_double: -0.000250875\n    value_double: "
+      "-0.00180747\n    value_double: -0.00234714\n    value_double: "
+      "-0.00113523\n    value_double: -0.00112635\n    value_double: "
+      "-0.000843118\n    value_double: -6.84256e-05\n    value_double: "
+      "0.000243336\n    value_double: 0.00119151\n    value_double: "
+      "0.00131022\n    value_double: 0.000768038\n    value_double: "
+      "-8.90095e-05\n    value_double: -0.000626427\n    value_double: "
+      "-7.0617e-05\n    value_double: -0.0021988\n    value_double: "
+      "-0.00221544\n    value_double: -0.000393118\n    value_double: "
+      "0.000159464\n    value_double: -0.000874746\n    value_double: "
+      "-0.00131239\n    value_double: -0.00135747\n    value_double: "
+      "-0.00179753\n    value_double: -0.00101005\n    value_double: "
+      "-0.000107518\n    value_double: -0.000616882\n    value_double: "
+      "-0.000360923\n    value_double: -0.00026896\n    value_double: "
+      "-0.000142548\n    value_double: 0.000577227\n    value_double: "
+      "0.000536027\n    value_double: 0.00126907\n    value_double: "
+      "-0.00122712\n    value_double: -3.60499e-05\n    value_double: "
+      "0.000151026\n    value_double: 0.00107658\n    value_double: "
+      "0.00116475\n    value_double: -0.00145312\n    value_double: "
+      "0.000233326\n    value_double: -0.00020198\n    value_double: "
+      "0.00179029\n    value_double: 0.00150048\n    value_double: "
+      "-0.000884775\n    value_double: 0.000409188\n    value_double: "
+      "2.97176e-05\n    value_double: -0.000506118\n    value_double: "
+      "-2.33992e-05\n    value_double: -0.00037212\n    value_double: "
+      "0.000862773\n    value_double: 0.00174046\n    value_double: "
+      "-0.000240207\n    value_double: 0.000663976\n    value_double: "
+      "-0.00134747\n    value_double: 0.00115585\n    value_double: "
+      "0.000555869\n    value_double: 0.00176722\n    value_double: "
+      "-0.000518409\n    value_double: 0.00101051\n    value_double: "
+      "0.000129399\n    value_double: -0.000916389\n    value_double: "
+      "-0.00137693\n    value_double: -0.00152412\n    value_double: "
+      "7.32515e-05\n    value_double: -0.000190811\n    value_double: "
+      "-0.000158692\n    value_double: -5.7791e-05\n    value_double: "
+      "0.000671785\n    value_double: -0.00152924\n    value_double: "
+      "0.00117314\n    value_double: -0.000384202\n    value_double: "
+      "0.00176709\n    value_double: -0.000181703\n    value_double: "
+      "-0.000460994\n    value_double: 0.000643716\n    value_double: "
+      "4.76719e-05\n    value_double: -0.00101037\n    value_double: "
+      "0.00159621\n    value_double: 0.00186758\n    value_double: "
+      "0.00100001\n    value_double: -0.00121831\n    value_double: "
+      "0.00132231\n    value_double: 0.0013511\n    value_double: 0.00106659\n "
+      "   value_double: 0.00018091\n    value_double: 0.00155925\n    "
+      "value_double: 4.26087e-05\n    value_double: 0.000243264\n    "
+      "value_double: -0.0017202\n    value_double: -0.000218897\n    "
+      "value_double: 0.00118693\n    value_double: 0.00258909\n    "
+      "value_double: 0.000641913\n    value_double: -0.0013211\n    "
+      "value_double: -0.00171943\n    value_double: 0.00089151\n    "
+      "value_double: -0.00114969\n    value_double: -0.000196331\n    "
+      "value_double: 0.00109994\n    value_double: 0.000302616\n    "
+      "value_double: 0.000675812\n    value_double: 0.00112222\n    "
+      "value_double: 0.000516456\n    value_double: 0.00133357\n    "
+      "value_double: 0.000298491\n    value_double: 0.00145934\n    "
+      "value_double: -0.00159102\n    value_double: -0.000819061\n    "
+      "value_double: 0.000120583\n    value_double: 0.0006108\n    "
+      "value_double: 0.00124132\n    value_double: 0.000764859\n    "
+      "value_double: 0.000374641\n    value_double: -0.00149603\n    "
+      "value_double: -0.000317367\n    value_double: -0.000417829\n  }\n  "
+      "cpu_exec_micros: 2\n  total_cpu_exec_micros: 2\n  run_count: 1\n  "
+      "total_run_count: 1\n  total_definition_count: 10\n  output_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\ntotal_float_ops: "
+      "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: "
+      "4500\ntotal_run_count: 5\ntotal_definition_count: 34\ntotal_peak_bytes: "
+      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
diff --git a/tensorflow/core/profiler/internal/tfprof_tensor.h b/tensorflow/core/profiler/internal/tfprof_tensor.h
index d6c4ae13117..9f72e081c91 100644
--- a/tensorflow/core/profiler/internal/tfprof_tensor.h
+++ b/tensorflow/core/profiler/internal/tfprof_tensor.h
@@ -51,6 +51,33 @@ class TFProfTensor {
 
   void Build();
 
+  template <typename T>
+  bool AddValue(const T& value, TFProfTensorProto* dim) {
+    std::ostringstream sstream;
+    sstream << value;
+    if (typeid(value) == typeid(double)) {
+      double double_val;
+      CHECK(strings::safe_strtod(sstream.str().c_str(), &double_val));
+      dim->add_value_double(double_val);
+      formatted_str_ += strings::Printf(
+          "%.2f ", dim->value_double(dim->value_double_size() - 1));
+    } else if (typeid(value) == typeid(int64)) {
+      int64 int64_val;
+      CHECK(strings::safe_strto64(sstream.str().c_str(), &int64_val));
+      dim->add_value_int64(int64_val);
+      formatted_str_ += strings::Printf(
+          "%lld ",
+          static_cast<int64>(dim->value_int64(dim->value_int64_size() - 1)));
+    } else if (typeid(value) == typeid(string)) {
+      dim->add_value_str(sstream.str());
+      formatted_str_ =
+          strings::StrCat(formatted_str_, "'",
+                          dim->value_str(dim->value_str_size() - 1) + "' ");
+    } else {
+      CHECK(false) << "Unsupported type: " << typeid(value).name();
+    }
+  }
+
   // It assumes the flatten values are stored in row-major, which is mentioned
   // indirectly at various places:
   // TODO(xpan): Further verifying it.
@@ -59,37 +86,65 @@ class TFProfTensor {
                     TFProfTensorProto* dim) {
     formatted_str_ += "[";
     int64 nstart = start;
-    for (int i = 0; i < tensor_->dim_size(depth); i++) {
-      // Last dimension, pull the values.
-      if (depth == tensor_->dims() - 1) {
-        std::ostringstream sstream;
-        sstream << values[nstart];
+    if (tensor_->dims() == 0 && values.size() == 1) {
+      std::ostringstream sstream;
+      sstream << values[nstart];
 
-        if (typeid(values[nstart]) == typeid(double)) {
-          double double_val;
-          CHECK(strings::safe_strtod(sstream.str().c_str(), &double_val));
-          dim->add_value_double(double_val);
-          formatted_str_ += strings::Printf(
-              "%.2f ", dim->value_double(dim->value_double_size() - 1));
-        } else if (typeid(values[nstart]) == typeid(int64)) {
-          int64 int64_val;
-          CHECK(strings::safe_strto64(sstream.str().c_str(), &int64_val));
-          dim->add_value_int64(int64_val);
-          formatted_str_ += strings::Printf(
-              "%lld ", static_cast<int64>(
-                           dim->value_int64(dim->value_int64_size() - 1)));
-        } else if (typeid(values[nstart]) == typeid(string)) {
-          dim->add_value_str(sstream.str());
-          formatted_str_ =
-              strings::StrCat(formatted_str_, "'",
-                              dim->value_str(dim->value_str_size() - 1) + "' ");
-        } else {
-          CHECK(false) << "Unsupported type: " << typeid(values[nstart]).name();
-        }
-        ++nstart;
+      if (typeid(values[nstart]) == typeid(double)) {
+        double double_val;
+        CHECK(strings::safe_strtod(sstream.str().c_str(), &double_val));
+        dim->add_value_double(double_val);
+        formatted_str_ += strings::Printf(
+            "%.2f ", dim->value_double(dim->value_double_size() - 1));
+      } else if (typeid(values[nstart]) == typeid(int64)) {
+        int64 int64_val;
+        CHECK(strings::safe_strto64(sstream.str().c_str(), &int64_val));
+        dim->add_value_int64(int64_val);
+        formatted_str_ += strings::Printf(
+            "%lld ",
+            static_cast<int64>(dim->value_int64(dim->value_int64_size() - 1)));
+      } else if (typeid(values[nstart]) == typeid(string)) {
+        dim->add_value_str(sstream.str());
+        formatted_str_ =
+            strings::StrCat(formatted_str_, "'",
+                            dim->value_str(dim->value_str_size() - 1) + "' ");
       } else {
-        // Not-last dimension. Drill deeper.
-        nstart = BuildOutput<T>(nstart, depth + 1, values, dim);
+        CHECK(false) << "Unsupported type: " << typeid(values[nstart]).name();
+      }
+    } else {
+      for (int i = 0; i < tensor_->dim_size(depth); i++) {
+        // Last dimension, pull the values.
+        if (depth == tensor_->dims() - 1) {
+          std::ostringstream sstream;
+          sstream << values[nstart];
+
+          if (typeid(values[nstart]) == typeid(double)) {
+            double double_val;
+            CHECK(strings::safe_strtod(sstream.str().c_str(), &double_val));
+            dim->add_value_double(double_val);
+            formatted_str_ += strings::Printf(
+                "%.2f ", dim->value_double(dim->value_double_size() - 1));
+          } else if (typeid(values[nstart]) == typeid(int64)) {
+            int64 int64_val;
+            CHECK(strings::safe_strto64(sstream.str().c_str(), &int64_val));
+            dim->add_value_int64(int64_val);
+            formatted_str_ += strings::Printf(
+                "%lld ", static_cast<int64>(
+                             dim->value_int64(dim->value_int64_size() - 1)));
+          } else if (typeid(values[nstart]) == typeid(string)) {
+            dim->add_value_str(sstream.str());
+            formatted_str_ = strings::StrCat(
+                formatted_str_, "'",
+                dim->value_str(dim->value_str_size() - 1) + "' ");
+          } else {
+            CHECK(false) << "Unsupported type: "
+                         << typeid(values[nstart]).name();
+          }
+          ++nstart;
+        } else {
+          // Not-last dimension. Drill deeper.
+          nstart = BuildOutput<T>(nstart, depth + 1, values, dim);
+        }
       }
     }
     if (formatted_str_.length() > kTFProfTenosrMaxDisplayLen) {
diff --git a/tensorflow/core/profiler/internal/tfprof_tensor_test.cc b/tensorflow/core/profiler/internal/tfprof_tensor_test.cc
index 50ef82abc91..c68888e88fc 100644
--- a/tensorflow/core/profiler/internal/tfprof_tensor_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_tensor_test.cc
@@ -18,12 +18,12 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -57,244 +57,19 @@ class TFProfTensorTest : public ::testing::Test {
 };
 
 TEST_F(TFProfTensorTest, Basics) {
-  Options opts(3, 0, 0, 0, 0, 0, -1, "name", {"VariableV2"}, {".*"}, {""},
-               {".*"}, {""}, false, {"tensor_value"},  // show the tensor value.
+  Options opts(3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, "name", {"VariableV2"},
+               {".*"}, {""}, {".*"}, {""}, false,
+               {"tensor_value"},  // show the tensor value.
                "", {});
   const GraphNodeProto& root = tf_stats_->ShowGraphNode("scope", opts);
 
   GraphNodeProto expected;
-  CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
-      "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
-      "370\nchildren {\n  name: \"conv2d\"\n  exec_micros: 0\n  "
-      "requested_bytes: 0\n  total_exec_micros: 0\n  total_requested_bytes: "
-      "0\n  total_parameters: 140\n  children {\n    name: \"conv2d/bias\"\n   "
-      " exec_micros: 0\n    requested_bytes: 0\n    parameters: 5\n    "
-      "total_exec_micros: 0\n    total_requested_bytes: 0\n    "
-      "total_parameters: 5\n    float_ops: 0\n    total_float_ops: 0\n    "
-      "tensor_value {\n      dtype: DT_FLOAT\n      value_double: 0\n      "
-      "value_double: 0\n      value_double: 0\n      value_double: 0\n      "
-      "value_double: 0\n    }\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 0\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 0\n    run_count: 0\n    total_run_count: 0\n    "
-      "total_definition_count: 1\n  }\n  children {\n    name: "
-      "\"conv2d/kernel\"\n    exec_micros: 0\n    requested_bytes: 0\n    "
-      "parameters: 135\n    total_exec_micros: 0\n    total_requested_bytes: "
-      "0\n    total_parameters: 135\n    float_ops: 0\n    total_float_ops: "
-      "0\n    tensor_value {\n      dtype: DT_FLOAT\n      value_double: "
-      "-0.113138\n      value_double: 0.261431\n      value_double: 0.215777\n "
-      "     value_double: 0.24135\n      value_double: -0.113195\n      "
-      "value_double: -0.212639\n      value_double: -0.0907301\n      "
-      "value_double: 0.0221634\n      value_double: 0.21821\n      "
-      "value_double: 0.22715\n      value_double: -0.108698\n      "
-      "value_double: 0.240911\n      value_double: -0.138626\n      "
-      "value_double: -0.144752\n      value_double: -0.00962037\n      "
-      "value_double: 0.0971008\n      value_double: 0.00264764\n      "
-      "value_double: -0.272929\n      value_double: 0.0129845\n      "
-      "value_double: 0.0466554\n      value_double: -0.229184\n      "
-      "value_double: 0.153576\n      value_double: -0.169218\n      "
-      "value_double: -0.112991\n      value_double: 0.205739\n      "
-      "value_double: 0.257844\n      value_double: 0.107455\n      "
-      "value_double: -0.207914\n      value_double: 0.15211\n      "
-      "value_double: 0.277932\n      value_double: 0.145986\n      "
-      "value_double: -0.0883989\n      value_double: 0.167506\n      "
-      "value_double: 0.10237\n      value_double: 0.0542143\n      "
-      "value_double: 0.0334378\n      value_double: 0.159489\n      "
-      "value_double: 0.246583\n      value_double: 0.0154283\n      "
-      "value_double: 0.0872411\n      value_double: -0.25732\n      "
-      "value_double: 0.0499355\n      value_double: 0.0266221\n      "
-      "value_double: 0.088801\n      value_double: -0.0794552\n      "
-      "value_double: -0.00383255\n      value_double: -0.165267\n      "
-      "value_double: 0.0271328\n      value_double: 0.0729822\n      "
-      "value_double: 0.200795\n      value_double: 0.100276\n      "
-      "value_double: 0.285254\n      value_double: -0.171945\n      "
-      "value_double: -0.0187411\n      value_double: -0.218729\n      "
-      "value_double: 0.233753\n      value_double: 0.109184\n      "
-      "value_double: 0.247875\n      value_double: -0.224632\n      "
-      "value_double: 0.0940739\n      value_double: 0.00663087\n      "
-      "value_double: -0.075786\n      value_double: -0.179992\n      "
-      "value_double: -0.276016\n      value_double: 0.261207\n      "
-      "value_double: -0.0658191\n      value_double: -0.0747132\n      "
-      "value_double: -0.0839638\n      value_double: -0.0825393\n      "
-      "value_double: 0.0915958\n      value_double: -0.195425\n      "
-      "value_double: -0.255836\n      value_double: -0.08745\n      "
-      "value_double: -0.181623\n      value_double: -0.235936\n      "
-      "value_double: 0.0205423\n      value_double: 0.185447\n      "
-      "value_double: -0.0691599\n      value_double: -0.0451089\n      "
-      "value_double: -0.153922\n      value_double: -0.0279411\n      "
-      "value_double: 0.148915\n      value_double: -0.018026\n      "
-      "value_double: -0.144903\n      value_double: 0.0370046\n      "
-      "value_double: 0.0764987\n      value_double: 0.0586488\n      "
-      "value_double: -0.222919\n      value_double: 0.0238447\n      "
-      "value_double: -0.106012\n      value_double: -0.102202\n      "
-      "value_double: -0.159347\n      value_double: -0.0232876\n      "
-      "value_double: 0.109855\n      value_double: -0.141833\n      "
-      "value_double: 0.1376\n      value_double: -0.12413\n      value_double: "
-      "-0.208968\n      value_double: 0.0758635\n      value_double: "
-      "-0.217672\n      value_double: -0.20153\n      value_double: "
-      "-0.195414\n      value_double: -0.18549\n      value_double: "
-      "0.00298014\n      value_double: -0.279283\n      value_double: "
-      "0.200084\n      value_double: -0.0968328\n      value_double: -0.243\n  "
-      "    value_double: 0.239319\n      value_double: -0.236288\n      "
-      "value_double: 0.169477\n      value_double: 0.126673\n      "
-      "value_double: 0.182215\n      value_double: -0.028243\n      "
-      "value_double: 0.282762\n      value_double: -0.165548\n      "
-      "value_double: -0.0641245\n      value_double: -0.186382\n      "
-      "value_double: 0.0329038\n      value_double: 0.271848\n      "
-      "value_double: 0.084653\n      value_double: -0.108163\n      "
-      "value_double: 0.247094\n      value_double: 0.192687\n      "
-      "value_double: 0.171922\n      value_double: -0.187649\n      "
-      "value_double: 0.251253\n      value_double: 0.272077\n      "
-      "value_double: 0.19068\n      value_double: 0.220352\n      "
-      "value_double: -0.255741\n      value_double: 0.110853\n      "
-      "value_double: 0.146625\n      value_double: 0.167754\n      "
-      "value_double: 0.249554\n    }\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 0\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 0\n    run_count: 0\n    total_run_count: 0\n    "
-      "total_definition_count: 1\n  }\n  float_ops: 0\n  total_float_ops: 0\n  "
-      "accelerator_exec_micros: 0\n  cpu_exec_micros: 0\n  "
-      "total_accelerator_exec_micros: 0\n  total_cpu_exec_micros: 0\n  "
-      "run_count: 0\n  total_run_count: 0\n  total_definition_count: "
-      "3\n}\nchildren {\n  name: \"conv2d_1\"\n  exec_micros: 0\n  "
-      "requested_bytes: 0\n  total_exec_micros: 0\n  total_requested_bytes: "
-      "0\n  total_parameters: 230\n  children {\n    name: \"conv2d_1/bias\"\n "
-      "   exec_micros: 0\n    requested_bytes: 0\n    parameters: 5\n    "
-      "total_exec_micros: 0\n    total_requested_bytes: 0\n    "
-      "total_parameters: 5\n    float_ops: 0\n    total_float_ops: 0\n    "
-      "tensor_value {\n      dtype: DT_FLOAT\n      value_double: 0\n      "
-      "value_double: 0\n      value_double: 0\n      value_double: 0\n      "
-      "value_double: 0\n    }\n    accelerator_exec_micros: 0\n    "
-      "cpu_exec_micros: 0\n    total_accelerator_exec_micros: 0\n    "
-      "total_cpu_exec_micros: 0\n    run_count: 0\n    total_run_count: 0\n    "
-      "total_definition_count: 1\n  }\n  children {\n    name: "
-      "\"conv2d_1/kernel\"\n    exec_micros: 0\n    requested_bytes: 0\n    "
-      "parameters: 225\n    total_exec_micros: 0\n    total_requested_bytes: "
-      "0\n    total_parameters: 225\n    float_ops: 0\n    total_float_ops: "
-      "0\n    tensor_value {\n      dtype: DT_FLOAT\n      value_double: "
-      "-0.00170514\n      value_double: 0.138601\n      value_double: "
-      "-0.224822\n      value_double: -0.0848449\n      value_double: "
-      "0.170551\n      value_double: 0.147666\n      value_double: "
-      "-0.0570606\n      value_double: -0.132805\n      value_double: "
-      "-0.172013\n      value_double: 0.249707\n      value_double: 0.149734\n "
-      "     value_double: 0.0365986\n      value_double: -0.0923146\n      "
-      "value_double: -0.17745\n      value_double: -0.169978\n      "
-      "value_double: -0.173298\n      value_double: -0.110407\n      "
-      "value_double: 0.1469\n      value_double: 0.0419576\n      "
-      "value_double: 0.0391093\n      value_double: -0.137381\n      "
-      "value_double: 0.212642\n      value_double: -0.067034\n      "
-      "value_double: -0.0727709\n      value_double: -0.0276531\n      "
-      "value_double: 0.218212\n      value_double: 0.0596479\n      "
-      "value_double: -0.0468102\n      value_double: -0.0250467\n      "
-      "value_double: -0.20391\n      value_double: -0.233801\n      "
-      "value_double: 0.135615\n      value_double: -0.182124\n      "
-      "value_double: 0.254205\n      value_double: 0.0819146\n      "
-      "value_double: -0.146696\n      value_double: -0.20095\n      "
-      "value_double: -0.250555\n      value_double: -0.226406\n      "
-      "value_double: 0.0421331\n      value_double: 0.0361264\n      "
-      "value_double: -0.188558\n      value_double: -0.0222711\n      "
-      "value_double: -0.128226\n      value_double: -0.148305\n      "
-      "value_double: -0.137598\n      value_double: -0.041647\n      "
-      "value_double: -0.0574933\n      value_double: 0.122506\n      "
-      "value_double: 0.0415936\n      value_double: 0.244957\n      "
-      "value_double: 0.00372121\n      value_double: -0.139939\n      "
-      "value_double: 0.250411\n      value_double: -0.23848\n      "
-      "value_double: -0.0717569\n      value_double: -0.00884159\n      "
-      "value_double: 0.135616\n      value_double: -0.0493895\n      "
-      "value_double: 0.254308\n      value_double: -0.181419\n      "
-      "value_double: -0.114829\n      value_double: -0.172638\n      "
-      "value_double: 0.06984\n      value_double: -0.086704\n      "
-      "value_double: 0.168515\n      value_double: -0.152275\n      "
-      "value_double: -0.230775\n      value_double: -0.254366\n      "
-      "value_double: -0.115397\n      value_double: 0.0418207\n      "
-      "value_double: -0.199607\n      value_double: -0.167001\n      "
-      "value_double: -0.187238\n      value_double: 0.0196097\n      "
-      "value_double: 0.201653\n      value_double: -0.143758\n      "
-      "value_double: 0.167187\n      value_double: -0.129141\n      "
-      "value_double: 0.230154\n      value_double: -0.119968\n      "
-      "value_double: -0.121843\n      value_double: -0.0118565\n      "
-      "value_double: 0.0285747\n      value_double: -0.0593699\n      "
-      "value_double: -0.175214\n      value_double: -0.211524\n      "
-      "value_double: 0.167042\n      value_double: -0.216357\n      "
-      "value_double: -0.0218886\n      value_double: -0.244211\n      "
-      "value_double: 0.175301\n      value_double: 0.0654932\n      "
-      "value_double: -0.0419763\n      value_double: -0.103275\n      "
-      "value_double: -0.0848433\n      value_double: -0.0845421\n      "
-      "value_double: -0.00269318\n      value_double: -0.145978\n      "
-      "value_double: -0.217061\n      value_double: -0.0937043\n      "
-      "value_double: 0.235796\n      value_double: -0.0893372\n      "
-      "value_double: 0.000827968\n      value_double: 0.0172743\n      "
-      "value_double: -0.234205\n      value_double: -0.0867703\n      "
-      "value_double: 0.131704\n      value_double: 0.134143\n      "
-      "value_double: -0.162257\n      value_double: -0.129706\n      "
-      "value_double: 0.0763288\n      value_double: 0.156988\n      "
-      "value_double: 0.220033\n      value_double: -0.179884\n      "
-      "value_double: 0.066697\n      value_double: 0.212322\n      "
-      "value_double: -0.0961226\n      value_double: -0.11223\n      "
-      "value_double: 0.249944\n      value_double: 0.115673\n      "
-      "value_double: -0.100203\n      value_double: 0.125645\n      "
-      "value_double: -0.256104\n      value_double: 0.0996534\n      "
-      "value_double: 0.167306\n      value_double: -0.00700775\n      "
-      "value_double: 0.242145\n      value_double: 0.088406\n      "
-      "value_double: 0.0975334\n      value_double: -0.0309525\n      "
-      "value_double: -0.0422794\n      value_double: 0.20739\n      "
-      "value_double: 0.113992\n      value_double: 0.253818\n      "
-      "value_double: -0.0857835\n      value_double: 0.223902\n      "
-      "value_double: 0.10291\n      value_double: 0.103091\n      "
-      "value_double: -0.177502\n      value_double: -0.0258242\n      "
-      "value_double: -0.130567\n      value_double: -0.15999\n      "
-      "value_double: -0.101484\n      value_double: 0.0188813\n      "
-      "value_double: 0.160626\n      value_double: 0.0467491\n      "
-      "value_double: 0.193634\n      value_double: -0.0910993\n      "
-      "value_double: 0.0440249\n      value_double: -0.255389\n      "
-      "value_double: -0.240244\n      value_double: -0.213171\n      "
-      "value_double: 0.175978\n      value_double: -0.0251202\n      "
-      "value_double: 0.0943941\n      value_double: -0.196194\n      "
-      "value_double: 0.163395\n      value_double: -0.010777\n      "
-      "value_double: -0.0626751\n      value_double: -0.246234\n      "
-      "value_double: 0.0662063\n      value_double: 0.120589\n      "
-      "value_double: 0.237322\n      value_double: 0.0849243\n      "
-      "value_double: -0.066591\n      value_double: 0.0512236\n      "
-      "value_double: -0.144309\n      value_double: -0.235415\n      "
-      "value_double: -0.0565311\n      value_double: 0.0882529\n      "
-      "value_double: -0.215923\n      value_double: -0.0873292\n      "
-      "value_double: -0.0691103\n      value_double: -0.00238678\n      "
-      "value_double: 0.147789\n      value_double: -0.124451\n      "
-      "value_double: 0.205044\n      value_double: -0.0596834\n      "
-      "value_double: 0.0268479\n      value_double: 0.0857448\n      "
-      "value_double: -0.0923855\n      value_double: -0.0960547\n      "
-      "value_double: 0.169869\n      value_double: 0.16988\n      "
-      "value_double: -0.032271\n      value_double: -0.120731\n      "
-      "value_double: -0.199086\n      value_double: 0.181199\n      "
-      "value_double: 0.00897732\n      value_double: -0.257469\n      "
-      "value_double: -0.135556\n      value_double: -0.149663\n      "
-      "value_double: -0.00990398\n      value_double: 0.221165\n      "
-      "value_double: 0.0327134\n      value_double: -0.0392821\n      "
-      "value_double: -0.0614503\n      value_double: 0.246602\n      "
-      "value_double: -0.171692\n      value_double: -0.150835\n      "
-      "value_double: -0.13854\n      value_double: -0.244668\n      "
-      "value_double: 0.0790781\n      value_double: 0.212678\n      "
-      "value_double: 0.0782059\n      value_double: -0.177888\n      "
-      "value_double: -0.165914\n      value_double: -0.164251\n      "
-      "value_double: 0.165007\n      value_double: 0.239615\n      "
-      "value_double: -0.217642\n      value_double: -0.219843\n      "
-      "value_double: 0.0828398\n      value_double: 0.00272235\n      "
-      "value_double: -0.0323662\n      value_double: -0.255953\n      "
-      "value_double: 0.237298\n      value_double: -0.0896481\n      "
-      "value_double: -0.0605349\n      value_double: 0.231679\n      "
-      "value_double: -0.123842\n      value_double: 0.0858642\n      "
-      "value_double: 0.23111\n      value_double: 0.0491742\n    }\n    "
-      "accelerator_exec_micros: 0\n    cpu_exec_micros: 0\n    "
-      "total_accelerator_exec_micros: 0\n    total_cpu_exec_micros: 0\n    "
-      "run_count: 0\n    total_run_count: 0\n    total_definition_count: 1\n  "
-      "}\n  float_ops: 0\n  total_float_ops: 0\n  accelerator_exec_micros: 0\n "
-      " cpu_exec_micros: 0\n  total_accelerator_exec_micros: 0\n  "
-      "total_cpu_exec_micros: 0\n  run_count: 0\n  total_run_count: 0\n  "
-      "total_definition_count: 3\n}\nfloat_ops: 0\ntotal_float_ops: "
-      "0\naccelerator_exec_micros: 0\ncpu_exec_micros: "
-      "0\ntotal_accelerator_exec_micros: 0\ntotal_cpu_exec_micros: "
-      "0\nrun_count: 0\ntotal_run_count: 0\ntotal_definition_count: 6\n",
-      &expected));
-  EXPECT_EQ(expected.DebugString(), root.DebugString());
+  EXPECT_EQ(root.children(0).name(), "DW");
+  EXPECT_GT(root.children(0).tensor_value().value_double_size(), 10);
+  EXPECT_EQ(root.children(1).name(), "DW2");
+  EXPECT_GT(root.children(1).tensor_value().value_double_size(), 10);
+  EXPECT_EQ(root.children(2).name(), "ScalarW");
+  EXPECT_EQ(root.children(2).tensor_value().value_double_size(), 1);
 }
 
 }  // namespace tfprof
diff --git a/tensorflow/core/profiler/internal/tfprof_timeline.cc b/tensorflow/core/profiler/internal/tfprof_timeline.cc
index cfd80b875a5..f3934860d9a 100644
--- a/tensorflow/core/profiler/internal/tfprof_timeline.cc
+++ b/tensorflow/core/profiler/internal/tfprof_timeline.cc
@@ -147,8 +147,8 @@ void MemoryTracker::TrackNodeConnection(int64 step, const GraphNode* node,
   if (output_idx == node->node->src_output_idx().end()) {
     return;
   }
-  const auto& output = src->node->output_bytes(step).find(output_idx->second);
-  if (output == src->node->output_bytes(step).end()) {
+  const auto& output = src->node->output_memory(step).find(output_idx->second);
+  if (output == src->node->output_memory(step).end()) {
     return;
   }
   int64 output_bytes = output->second.first;
diff --git a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
index 6842f262c63..2fe3653ec2e 100644
--- a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
@@ -62,7 +62,8 @@ class TFProfTimelineTest : public ::testing::Test {
 // manually check it's correct
 TEST_F(TFProfTimelineTest, GraphView) {
   string dump_file = io::JoinPath(testing::TmpDir(), "dump");
-  Options opts(10000, 0, 0, 0, 0, 0, 0, "name", {".*"},  // accout_type_regexes
+  Options opts(10000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "name",
+               {".*"},  // accout_type_regexes
                {".*"}, {""}, {".*"}, {""}, false,
                {"params", "bytes", "micros", "float_ops"}, "timeline",
                {{"outfile", dump_file}});
@@ -70,12 +71,13 @@ TEST_F(TFProfTimelineTest, GraphView) {
 
   string dump_str;
   TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
-  EXPECT_EQ(5576767607271035974ull, Hash64(dump_str));
+  EXPECT_EQ(16947107375505024864ull, Hash64(dump_str));
 }
 
 TEST_F(TFProfTimelineTest, ScopeView) {
   string dump_file = io::JoinPath(testing::TmpDir(), "dump");
-  Options opts(5, 0, 0, 0, 0, 0, 0, "name", {".*"},  // accout_type_regexes
+  Options opts(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "name",
+               {".*"},  // accout_type_regexes
                {".*"}, {""}, {".*"}, {""}, false,
                {"params", "bytes", "micros", "float_ops"}, "timeline",
                {{"outfile", dump_file}});
@@ -83,7 +85,7 @@ TEST_F(TFProfTimelineTest, ScopeView) {
 
   string dump_str;
   TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
-  EXPECT_EQ(10135186027625211652ull, Hash64(dump_str));
+  EXPECT_EQ(2710044785377031280ull, Hash64(dump_str));
 }
 
 // TODO(xpan): tfprof_log is too large to include in testdata when adding
diff --git a/tensorflow/core/profiler/internal/tfprof_utils.cc b/tensorflow/core/profiler/internal/tfprof_utils.cc
index 464a13f7dfc..383c4725b7b 100644
--- a/tensorflow/core/profiler/internal/tfprof_utils.cc
+++ b/tensorflow/core/profiler/internal/tfprof_utils.cc
@@ -140,35 +140,66 @@ tensorflow::Status ParseCmdLine(const string& line, string* cmd,
       ++i;
     } else if (pieces[i] == tensorflow::tfprof::kOptions[2]) {
       if (pieces.size() <= i + 1 ||
-          !strings::safe_strto64(pieces[i + 1], &opts->min_micros)) {
+          !strings::safe_strto64(pieces[i + 1], &opts->min_peak_bytes)) {
         return ReturnError(pieces, i);
       }
       ++i;
     } else if (pieces[i] == tensorflow::tfprof::kOptions[3]) {
       if (pieces.size() <= i + 1 ||
-          !strings::safe_strto64(pieces[i + 1], &opts->min_params)) {
+          !strings::safe_strto64(pieces[i + 1], &opts->min_residual_bytes)) {
         return ReturnError(pieces, i);
       }
       ++i;
     } else if (pieces[i] == tensorflow::tfprof::kOptions[4]) {
       if (pieces.size() <= i + 1 ||
-          !strings::safe_strto64(pieces[i + 1], &opts->min_float_ops)) {
+          !strings::safe_strto64(pieces[i + 1], &opts->min_output_bytes)) {
         return ReturnError(pieces, i);
       }
       ++i;
     } else if (pieces[i] == tensorflow::tfprof::kOptions[5]) {
       if (pieces.size() <= i + 1 ||
-          !strings::safe_strto64(pieces[i + 1], &opts->min_occurrence)) {
+          !strings::safe_strto64(pieces[i + 1], &opts->min_micros)) {
         return ReturnError(pieces, i);
       }
       ++i;
     } else if (pieces[i] == tensorflow::tfprof::kOptions[6]) {
       if (pieces.size() <= i + 1 ||
-          !strings::safe_strto64(pieces[i + 1], &opts->step)) {
+          !strings::safe_strto64(pieces[i + 1],
+                                 &opts->min_accelerator_micros)) {
         return ReturnError(pieces, i);
       }
       ++i;
     } else if (pieces[i] == tensorflow::tfprof::kOptions[7]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_cpu_micros)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[8]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_params)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[9]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_float_ops)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[10]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->min_occurrence)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[11]) {
+      if (pieces.size() <= i + 1 ||
+          !strings::safe_strto64(pieces[i + 1], &opts->step)) {
+        return ReturnError(pieces, i);
+      }
+      ++i;
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[12]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
@@ -180,42 +211,42 @@ tensorflow::Status ParseCmdLine(const string& line, string* cmd,
       }
       opts->order_by = *order_by;
       ++i;
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[8]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[13]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
       opts->account_type_regexes = str_util::Split(StripQuote(pieces[i + 1]),
                                                    ',', str_util::SkipEmpty());
       ++i;
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[9]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[14]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
       opts->start_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
                                                  str_util::SkipEmpty());
       ++i;
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[10]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[15]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
       opts->trim_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
                                                 str_util::SkipEmpty());
       ++i;
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[11]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[16]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
       opts->show_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
                                                 str_util::SkipEmpty());
       ++i;
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[12]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[17]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
       opts->hide_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
                                                 str_util::SkipEmpty());
       ++i;
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[13]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[18]) {
       if ((pieces.size() > i + 1 && pieces[i + 1].find("-") == 0) ||
           pieces.size() == i + 1) {
         opts->account_displayed_op_only = true;
@@ -225,7 +256,7 @@ tensorflow::Status ParseCmdLine(const string& line, string* cmd,
       } else {
         ++i;
       }
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[14]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[19]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
@@ -242,7 +273,7 @@ tensorflow::Status ParseCmdLine(const string& line, string* cmd,
       }
       opts->select = requested_set;
       ++i;
-    } else if (pieces[i] == tensorflow::tfprof::kOptions[15]) {
+    } else if (pieces[i] == tensorflow::tfprof::kOptions[20]) {
       if (pieces.size() <= i + 1) {
         return ReturnError(pieces, i);
       }
diff --git a/tensorflow/core/profiler/profiler.cc b/tensorflow/core/profiler/profiler.cc
index ade478367e9..6acf4ea3773 100644
--- a/tensorflow/core/profiler/profiler.cc
+++ b/tensorflow/core/profiler/profiler.cc
@@ -72,7 +72,12 @@ int Run(int argc, char** argv) {
   string FLAGS_checkpoint_path = "";
   int32 FLAGS_max_depth = 10;
   int64 FLAGS_min_bytes = 0;
+  int64 FLAGS_min_peak_bytes = 0;
+  int64 FLAGS_min_residual_bytes = 0;
+  int64 FLAGS_min_output_bytes = 0;
   int64 FLAGS_min_micros = 0;
+  int64 FLAGS_min_accelerator_micros = 0;
+  int64 FLAGS_min_cpu_micros = 0;
   int64 FLAGS_min_params = 0;
   int64 FLAGS_min_float_ops = 0;
   int64 FLAGS_min_occurrence = 0;
@@ -101,7 +106,14 @@ int Run(int argc, char** argv) {
            "TensorFlow Checkpoint file name"),
       Flag("max_depth", &FLAGS_max_depth, "max depth"),
       Flag("min_bytes", &FLAGS_min_bytes, "min_bytes"),
+      Flag("min_peak_bytes", &FLAGS_min_peak_bytes, "min_peak_bytes"),
+      Flag("min_residual_bytes", &FLAGS_min_residual_bytes,
+           "min_residual_bytes"),
+      Flag("min_output_bytes", &FLAGS_min_output_bytes, "min_output_bytes"),
       Flag("min_micros", &FLAGS_min_micros, "min micros"),
+      Flag("min_accelerator_micros", &FLAGS_min_accelerator_micros,
+           "min acclerator_micros"),
+      Flag("min_cpu_micros", &FLAGS_min_cpu_micros, "min_cpu_micros"),
       Flag("min_params", &FLAGS_min_params, "min params"),
       Flag("min_float_ops", &FLAGS_min_float_ops, "min float ops"),
       Flag("min_occurrence", &FLAGS_min_occurrence, "min occurrence"),
@@ -214,12 +226,14 @@ int Run(int argc, char** argv) {
     return 0;
   }
 
-  Options opts(FLAGS_max_depth, FLAGS_min_bytes, FLAGS_min_micros,
-               FLAGS_min_params, FLAGS_min_float_ops, FLAGS_min_occurrence,
-               FLAGS_step, FLAGS_order_by, account_type_regexes,
-               start_name_regexes, trim_name_regexes, show_name_regexes,
-               hide_name_regexes, FLAGS_account_displayed_op_only, select,
-               output_type, output_options);
+  Options opts(
+      FLAGS_max_depth, FLAGS_min_bytes, FLAGS_min_peak_bytes,
+      FLAGS_min_residual_bytes, FLAGS_min_output_bytes, FLAGS_min_micros,
+      FLAGS_min_accelerator_micros, FLAGS_min_cpu_micros, FLAGS_min_params,
+      FLAGS_min_float_ops, FLAGS_min_occurrence, FLAGS_step, FLAGS_order_by,
+      account_type_regexes, start_name_regexes, trim_name_regexes,
+      show_name_regexes, hide_name_regexes, FLAGS_account_displayed_op_only,
+      select, output_type, output_options);
 
   if (cmd == kCmds[2] || cmd == kCmds[3]) {
     tf_stat.BuildView(cmd);
diff --git a/tensorflow/core/profiler/tfprof_options.proto b/tensorflow/core/profiler/tfprof_options.proto
index 58828330398..b53288d351f 100644
--- a/tensorflow/core/profiler/tfprof_options.proto
+++ b/tensorflow/core/profiler/tfprof_options.proto
@@ -7,7 +7,12 @@ package tensorflow.tfprof;
 message OptionsProto {
   int64 max_depth = 1;
   int64 min_bytes = 2;
+  int64 min_peak_bytes = 19;
+  int64 min_residual_bytes = 20;
+  int64 min_output_bytes = 21;
   int64 min_micros = 3;
+  int64 min_accelerator_micros = 22;
+  int64 min_cpu_micros = 23;
   int64 min_params = 4;
   int64 min_float_ops = 5;
   int64 min_occurrence = 17;
diff --git a/tensorflow/core/profiler/tfprof_output.proto b/tensorflow/core/profiler/tfprof_output.proto
index 5c9f132243a..4a6068da407 100644
--- a/tensorflow/core/profiler/tfprof_output.proto
+++ b/tensorflow/core/profiler/tfprof_output.proto
@@ -28,8 +28,15 @@ message GraphNodeProto {
   int64 accelerator_exec_micros = 17;
   int64 cpu_exec_micros = 18;
 
-  // Total requested bytes by the op.
+  // Total bytes requested by the op.
   int64 requested_bytes = 3;
+  // Max bytes allocated and being used by the op at a point.
+  int64 peak_bytes = 24;
+  // Total bytes requested by the op and not released before end.
+  int64 residual_bytes = 25;
+  // Total bytes output by the op (not necessarily allocated by the op).
+  int64 output_bytes = 26;
+
   // Number of parameters if available.
   int64 parameters = 4;
   // Number of float operations.
@@ -49,6 +56,10 @@ message GraphNodeProto {
   int64 total_cpu_exec_micros = 20;
 
   int64 total_requested_bytes = 7;
+  int64 total_peak_bytes = 27;
+  int64 total_residual_bytes = 28;
+  int64 total_output_bytes = 29;
+
   int64 total_parameters = 8;
   int64 total_float_ops = 14;
 
@@ -81,6 +92,13 @@ message MultiGraphNodeProto {
 
   // Total requested bytes by the code.
   int64 requested_bytes = 3;
+  // Max bytes allocated and being used by the op at a point.
+  int64 peak_bytes = 16;
+  // Total bytes requested by the op and not released before end.
+  int64 residual_bytes = 17;
+  // Total bytes output by the op (not necessarily allocated by the op).
+  int64 output_bytes = 18;
+
   // Number of parameters if available.
   int64 parameters = 4;
   // Number of float operations.
@@ -93,6 +111,10 @@ message MultiGraphNodeProto {
   int64 total_cpu_exec_micros = 15;
 
   int64 total_requested_bytes = 7;
+  int64 total_peak_bytes = 19;
+  int64 total_residual_bytes = 20;
+  int64 total_output_bytes = 21;
+
   int64 total_parameters = 8;
   int64 total_float_ops = 9;
 
diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py
index 2268472fdda..eb95af6a282 100644
--- a/tensorflow/python/profiler/model_analyzer.py
+++ b/tensorflow/python/profiler/model_analyzer.py
@@ -53,7 +53,12 @@ def _build_options(options):
   opts = tfprof_options_pb2.OptionsProto()
   opts.max_depth = options.get('max_depth', 10)
   opts.min_bytes = options.get('min_bytes', 0)
+  opts.min_peak_bytes = options.get('min_peak_bytes', 0)
+  opts.min_residual_bytes = options.get('min_residual_bytes', 0)
+  opts.min_output_bytes = options.get('min_output_bytes', 0)
   opts.min_micros = options.get('min_micros', 0)
+  opts.min_accelerator_micros = options.get('min_accelerator_micros', 0)
+  opts.min_cpu_micros = options.get('min_cpu_micros', 0)
   opts.min_params = options.get('min_params', 0)
   opts.min_float_ops = options.get('min_float_ops', 0)
   opts.min_occurrence = options.get('min_occurrence', 0)
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 7c55f9eaf32..21d26b87828 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import gzip
 import io
 import os
+import random
 
 from tensorflow.core.profiler import profile_pb2
 from tensorflow.core.protobuf import config_pb2
@@ -118,7 +119,7 @@ class PrintModelAnalysisTest(test.TestCase):
       with gfile.Open(outfile, 'r') as f:
         # pylint: disable=line-too-long
         self.assertEqual(
-            'node name | output bytes | # parameters | # float_ops | assigned devices | input',
+            'node name | requested bytes | # parameters | # float_ops | assigned devices | in',
             f.read()[0:80])
         # pylint: enable=line-too-long
 
@@ -243,7 +244,9 @@ class PrintModelAnalysisTest(test.TestCase):
             .with_accounted_types(['.*'])
             .with_min_occurrence(10)
             .order_by('occurrence')
-            .select(['params', 'micros', 'occurrence', 'input_shapes']).build())
+            .select(['params', 'micros', 'bytes',
+                     'peak_bytes', 'residual_bytes',
+                     'output_bytes', 'occurrence', 'input_shapes']).build())
 
     with session.Session() as sess:
       x = lib.BuildFullModel()
@@ -261,8 +264,8 @@ class PrintModelAnalysisTest(test.TestCase):
       with gfile.Open(outfile, 'r') as f:
         # pylint: disable=line-too-long
         self.assertEqual(
-            'nodename|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes\n',
-            f.read().replace('\t', '').replace(' ', '')[0:120])
+            'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes\nConst0B(0',
+            f.read().replace('\t', '').replace(' ', '')[0:180])
         # pylint: enable=line-too-long
 
       total_children = 0
@@ -370,6 +373,123 @@ class PrintModelAnalysisTest(test.TestCase):
     for attr in ['op_types', 'device', 'input_shapes']:
       self.pprof_test_helper(attr, True)
 
+  def testMinOption(self):
+    ops.reset_default_graph()
+
+    def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0):
+      for n in nodes:
+        if mm > 0:
+          self.assertGreaterEqual(n.exec_micros, mm)
+        if mam > 0:
+          self.assertGreaterEqual(n.accelerator_exec_micros, mam)
+        if mcm > 0:
+          self.assertGreaterEqual(n.cpu_exec_micros, mcm)
+        if mb > 0:
+          self.assertGreaterEqual(n.requested_bytes, mb)
+        if mpb > 0:
+          self.assertGreaterEqual(n.peak_bytes, mpb)
+        if mrb > 0:
+          self.assertGreaterEqual(n.residual_bytes, mrb)
+        if mob > 0:
+          self.assertGreaterEqual(n.output_bytes, mob)
+        check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob)
+
+    with session.Session() as sess:
+      x = lib.BuildSmallModel()
+      sess.run(variables.global_variables_initializer())
+      run_meta = config_pb2.RunMetadata()
+      _ = sess.run(x,
+                   options=config_pb2.RunOptions(
+                       trace_level=config_pb2.RunOptions.FULL_TRACE),
+                   run_metadata=run_meta)
+
+      min_val = random.randint(0, 10000)
+
+      opts = builder(builder.time_and_memory(min_micros=min_val)
+                    ).with_empty_output().build()
+      tfprof_node = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_min(tfprof_node.children, mm=min_val)
+
+      opts = builder(builder.time_and_memory(min_accelerator_micros=min_val)
+                    ).with_empty_output().build()
+      tfprof_node = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_min(tfprof_node.children, mam=min_val)
+
+      opts = builder(builder.time_and_memory(min_cpu_micros=min_val)
+                    ).with_empty_output().build()
+      tfprof_node = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_min(tfprof_node.children, mcm=min_val)
+
+      opts = builder(builder.time_and_memory(min_bytes=min_val)
+                    ).with_empty_output().build()
+      tfprof_node = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_min(tfprof_node.children, mb=min_val)
+
+      opts = builder(builder.time_and_memory(min_peak_bytes=min_val)
+                    ).with_empty_output().build()
+      tfprof_node = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_min(tfprof_node.children, mpb=min_val)
+
+      opts = builder(builder.time_and_memory(min_residual_bytes=min_val)
+                    ).with_empty_output().build()
+      tfprof_node = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_min(tfprof_node.children, mrb=min_val)
+
+      opts = builder(builder.time_and_memory(min_output_bytes=min_val)
+                    ).with_empty_output().build()
+      tfprof_node = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_min(tfprof_node.children, mob=min_val)
+
+  def testSelectOption(self):
+    ops.reset_default_graph()
+    outfile = os.path.join(test.get_temp_dir(), 'dump')
+
+    def check_selection(selected, not_selected):
+      with gfile.Open(outfile, 'r') as f:
+        s = f.read()
+        for attr in selected:
+          self.assertTrue(s.find(attr) > 0, s)
+        for attr in not_selected:
+          self.assertFalse(s.find(attr) > 0, s)
+
+    with session.Session() as sess:
+      x = lib.BuildSmallModel()
+      sess.run(variables.global_variables_initializer())
+      run_meta = config_pb2.RunMetadata()
+      _ = sess.run(x,
+                   options=config_pb2.RunOptions(
+                       trace_level=config_pb2.RunOptions.FULL_TRACE),
+                   run_metadata=run_meta)
+
+      opts = builder(builder.time_and_memory()
+                    ).with_file_output(outfile).select(['micros']).build()
+      _ = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_selection(['total execution time', 'accelerator execution time'],
+                      ['bytes'])
+
+      opts = builder(builder.time_and_memory()
+                    ).with_file_output(outfile).select(['bytes']).build()
+      _ = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_selection(['requested bytes'],
+                      ['peak bytes', 'residual bytes', 'output bytes'])
+
+      opts = builder(builder.time_and_memory()).with_file_output(
+          outfile).select(
+              ['peak_bytes', 'residual_bytes', 'output_bytes']).build()
+      _ = model_analyzer.profile(
+          sess.graph, run_meta=run_meta, options=opts)
+      check_selection(['peak bytes', 'residual bytes', 'output bytes'],
+                      ['requested_bytes'])
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/profiler/option_builder.py b/tensorflow/python/profiler/option_builder.py
index 1a91060da8a..e2e022425dd 100644
--- a/tensorflow/python/profiler/option_builder.py
+++ b/tensorflow/python/profiler/option_builder.py
@@ -139,18 +139,41 @@ class ProfileOptionBuilder(object):
             'output': 'stdout'}
 
   @staticmethod
-  def time_and_memory(min_micros=1, min_bytes=1):
+  def time_and_memory(min_micros=1, min_bytes=1, min_accelerator_micros=0,
+                      min_cpu_micros=0, min_peak_bytes=0, min_residual_bytes=0,
+                      min_output_bytes=0):
     """Show operation time and memory consumptions.
 
     Args:
-      min_micros: Only show profiler nodes with more execution time than this.
-      min_bytes: Only show profiler nodes consuming more memory than this.
+      min_micros: Only show profiler nodes with execution time
+          no less than this. It sums accelerator and cpu times.
+      min_bytes: Only show profiler nodes requested to allocate no less bytes
+          than this.
+      min_accelerator_micros: Only show profiler nodes spend no less than
+          this time on accelerator (e.g. GPU).
+      min_cpu_micros: Only show profiler nodes spend no less than
+          this time on cpu.
+      min_peak_bytes: Only show profiler nodes using no less than this bytes
+          at peak (high watermark). For profiler nodes consist of multiple
+          graph nodes, it sums the graph nodes' peak_bytes.
+      min_residual_bytes: Only show profiler nodes have no less than
+          this bytes not being de-allocated after Compute() ends. For
+          profiler nodes consist of multiple graph nodes, it sums the
+          graph nodes' residual_bytes.
+      min_output_bytes: Only show profiler nodes have no less than this bytes
+          output. The output are not necessarily allocated by this profiler
+          nodes.
     Returns:
       A dict of profiling options.
     """
     return {'max_depth': 10000,
             'min_bytes': min_bytes,
+            'min_peak_bytes': min_peak_bytes,
+            'min_residual_bytes': min_residual_bytes,
+            'min_output_bytes': min_output_bytes,
             'min_micros': min_micros,
+            'min_accelerator_micros': min_accelerator_micros,
+            'min_cpu_micros': min_cpu_micros,
             'min_params': 0,
             'min_float_ops': 0,
             'min_occurrence': 0,
@@ -188,28 +211,54 @@ class ProfileOptionBuilder(object):
     self._options['max_depth'] = max_depth
     return self
 
-  def with_min_memory(self, min_bytes):
+  def with_min_memory(self,
+                      min_bytes=0,
+                      min_peak_bytes=0,
+                      min_residual_bytes=0,
+                      min_output_bytes=0):
     """Only show profiler nodes consuming no less than 'min_bytes'.
 
     Args:
-      min_bytes: Only show profiler nodes with memory consumption
-          no less than this.
+      min_bytes: Only show profiler nodes requested to allocate no less bytes
+          than this.
+      min_peak_bytes: Only show profiler nodes using no less than this bytes
+          at peak (high watermark). For profiler nodes consist of multiple
+          graph nodes, it sums the graph nodes' peak_bytes.
+      min_residual_bytes: Only show profiler nodes have no less than
+          this bytes not being de-allocated after Compute() ends. For
+          profiler nodes consist of multiple graph nodes, it sums the
+          graph nodes' residual_bytes.
+      min_output_bytes: Only show profiler nodes have no less than this bytes
+          output. The output are not necessarily allocated by this profiler
+          nodes.
     Returns:
       self
     """
     self._options['min_bytes'] = min_bytes
+    self._options['min_peak_bytes'] = min_peak_bytes
+    self._options['min_residual_bytes'] = min_residual_bytes
+    self._options['min_output_bytes'] = min_output_bytes
     return self
 
-  def with_min_execution_time(self, min_micros):
+  def with_min_execution_time(self,
+                              min_micros=0,
+                              min_accelerator_micros=0,
+                              min_cpu_micros=0):
     """Only show profiler nodes consuming no less than 'min_micros'.
 
     Args:
       min_micros: Only show profiler nodes with execution time
-          no less than this.
+          no less than this. It sums accelerator and cpu times.
+      min_accelerator_micros: Only show profiler nodes spend no less than
+          this time on accelerator (e.g. GPU).
+      min_cpu_micros: Only show profiler nodes spend no less than
+          this time on cpu.
     Returns:
       self
     """
     self._options['min_micros'] = min_micros
+    self._options['min_accelerator_micros'] = min_accelerator_micros
+    self._options['min_cpu_micros'] = min_cpu_micros
     return self
 
   def with_min_parameters(self, min_params):
diff --git a/tensorflow/python/profiler/profiler_test.py b/tensorflow/python/profiler/profiler_test.py
index 7d30c29264f..2170e1bdeae 100644
--- a/tensorflow/python/profiler/profiler_test.py
+++ b/tensorflow/python/profiler/profiler_test.py
@@ -118,7 +118,7 @@ class ProfilerTest(test.TestCase):
 
   def testMultiStepProfile(self):
     ops.reset_default_graph()
-    opts = builder.time_and_memory()
+    opts = builder.time_and_memory(min_bytes=0)
 
     with session.Session() as sess:
       r1, r2, r3 = lib.BuildSplitableModel()
diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt
index f3bb71354e5..b80896a8a0f 100644
--- a/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt
@@ -46,14 +46,26 @@ tf_class {
     name: "NAME_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "OUTPUT_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "PARAMETERS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "PEAK_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "REQUESTED_BYTES_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "RESIDUAL_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "RUN_COUNT_FIELD_NUMBER"
     mtype: "<type \'int\'>"
@@ -86,14 +98,26 @@ tf_class {
     name: "TOTAL_FLOAT_OPS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "TOTAL_OUTPUT_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "TOTAL_PARAMETERS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "TOTAL_PEAK_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "TOTAL_REQUESTED_BYTES_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "TOTAL_RESIDUAL_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "TOTAL_RUN_COUNT_FIELD_NUMBER"
     mtype: "<type \'int\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt
index 9b88a11b2c3..33deff64979 100644
--- a/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt
@@ -38,14 +38,26 @@ tf_class {
     name: "NAME_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "OUTPUT_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "PARAMETERS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "PEAK_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "REQUESTED_BYTES_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "RESIDUAL_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "TOTAL_ACCELERATOR_EXEC_MICROS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
@@ -62,14 +74,26 @@ tf_class {
     name: "TOTAL_FLOAT_OPS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "TOTAL_OUTPUT_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "TOTAL_PARAMETERS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "TOTAL_PEAK_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "TOTAL_REQUESTED_BYTES_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "TOTAL_RESIDUAL_BYTES_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member_method {
     name: "ByteSize"
   }
diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt
index 862b2aa7cc0..19ff38a3900 100644
--- a/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt
@@ -28,7 +28,7 @@ tf_class {
   }
   member_method {
     name: "time_and_memory"
-    argspec: "args=[\'min_micros\', \'min_bytes\'], varargs=None, keywords=None, defaults=[\'1\', \'1\'], "
+    argspec: "args=[\'min_micros\', \'min_bytes\', \'min_accelerator_micros\', \'min_cpu_micros\', \'min_peak_bytes\', \'min_residual_bytes\', \'min_output_bytes\'], varargs=None, keywords=None, defaults=[\'1\', \'1\', \'0\', \'0\', \'0\', \'0\', \'0\'], "
   }
   member_method {
     name: "trainable_variables_parameter"
@@ -52,7 +52,7 @@ tf_class {
   }
   member_method {
     name: "with_min_execution_time"
-    argspec: "args=[\'self\', \'min_micros\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'min_micros\', \'min_accelerator_micros\', \'min_cpu_micros\'], varargs=None, keywords=None, defaults=[\'0\', \'0\', \'0\'], "
   }
   member_method {
     name: "with_min_float_operations"
@@ -60,7 +60,7 @@ tf_class {
   }
   member_method {
     name: "with_min_memory"
-    argspec: "args=[\'self\', \'min_bytes\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'min_bytes\', \'min_peak_bytes\', \'min_residual_bytes\', \'min_output_bytes\'], varargs=None, keywords=None, defaults=[\'0\', \'0\', \'0\', \'0\'], "
   }
   member_method {
     name: "with_min_occurrence"