diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD
index 81bc222e119..80acf39aaed 100644
--- a/tensorflow/core/profiler/protobuf/BUILD
+++ b/tensorflow/core/profiler/protobuf/BUILD
@@ -148,3 +148,10 @@ tf_proto_library(
     cc_api_version = 2,
     visibility = [":friends"],
 )
+
+tf_proto_library(
+    name = "tf_data_stats_proto",
+    srcs = ["tf_data_stats.proto"],
+    cc_api_version = 2,
+    visibility = [":friends"],
+)
diff --git a/tensorflow/core/profiler/protobuf/tf_data_stats.proto b/tensorflow/core/profiler/protobuf/tf_data_stats.proto
new file mode 100644
index 00000000000..f4edc4144d4
--- /dev/null
+++ b/tensorflow/core/profiler/protobuf/tf_data_stats.proto
@@ -0,0 +1,89 @@
+// This proto describes the format of the output profile file from
+// the tf.data stats tool.
+syntax = "proto3";
+
+package tensorflow.profiler;
+
+// Stat for iterator.
+message IteratorStat {
+  // Id of the iterator.
+  int64 id = 1;
+  // Start time of the iterator's GetNext in ps.
+  int64 start_time_ps = 2;
+  // Duration of the iterator's GetNext in ps.
+  int64 duration_ps = 3;
+  // Self time of the iterator's GetNext in ps. It takes account into async
+  // iterators. It is calculated by subtracting the time overlapped with its
+  // child iterator's duration from the iterator's duration.
+  int64 self_time_ps = 4;
+  // Whether it is blocking the root iterator. An async iterator's child
+  // iterator may not block its parent iterator if it is executed in advance and
+  // does not overlap with the parent iterator.
+  bool is_blocking = 5;
+  // The number of times this iterator is called. For example, a batch
+  // iterator's child iterator may be called multiple times.
+  int64 num_calls = 6;
+}
+
+// Metadata for iterator.
+message IteratorMetadata {
+  // Id of the iterator.
+  int64 id = 1;
+  // Id of the parent iterator.
+  int64 parent_id = 2;
+  // Name of the iterator.
+  string name = 3;
+  // Long name of the iterator.
+  string long_name = 6;
+  // Whether it is an async iterator.
+  bool is_async = 4;
+  // Parameters of the iterator (e.g., num_parallel_calls).
+  map<string, string> params = 5;
+}
+
+// Stat and metadata for input pipeline.
+message InputPipelineStat {
+  // Id of the blocking iterator with the longest self time.
+  int64 bottleneck_iterator_id = 2;
+  // Stats per iterator.
+  map<int64, IteratorStat> iterator_stats = 1;
+}
+
+// Metadata for input pipeline.
+message InputPipelineMetadata {
+  // The distribution strategy creates one "host" input pipeline which actually
+  // runs tf.data user code. Also, it creates a "device" input pipeline per
+  // device (e.g., TensorCore) which takes an element from the host input
+  // pipeline and transfers it to the device.
+  enum InputPipelineType {
+    HOST = 0;
+    DEVICE = 1;
+  }
+  // Id of the input pipeline which is set to the id of its root iterator.
+  int64 id = 1;
+  InputPipelineType type = 2;
+  string name = 4;
+  reserved 3;
+}
+
+// Collection of metadata and stats of input pipeline.
+message InputPipelineStats {
+  // Metadata of the input pipeline.
+  InputPipelineMetadata metadata = 1;
+  // Average latency (i.e., the root iterator's latency) of the input pipeline.
+  int64 avg_latency_ps = 3;
+  // Minimum latency of the input pipeline.
+  int64 min_latency_ps = 4;
+  // Maximum latency of the input pipeline.
+  int64 max_latency_ps = 5;
+  // Stats per call sorted by the root iterator's duration.
+  repeated InputPipelineStat stats = 2;
+}
+
+// Collection of stats of tf.data input pipelines within a host.
+message TfDataStats {
+  // Metadata per iterator.
+  map<int64, IteratorMetadata> iterator_metadata = 2;
+  // Stats per input pipeline.
+  map<int64, InputPipelineStats> input_pipelines = 1;
+}