Export public symbols for on demand profiling APIs.

PiperOrigin-RevId: 296064710 Change-Id: I3a3b549fb59fe9ecbfc6c07ba809b0c1732932e4
2020-02-19 14:57:57 -08:00 · 2020-02-19 14:57:57 -08:00 · 439595440b
commit 439595440b
parent 07827aafe7
7 changed files with 88 additions and 7 deletions
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@ -204,6 +204,7 @@ py_library(
        "//tensorflow/python/ops/ragged",
        "//tensorflow/python/ops/signal",
        "//tensorflow/python/profiler",
+        "//tensorflow/python/profiler:profiler_client",
        "//tensorflow/python/profiler:profiler_v2",
        "//tensorflow/python/saved_model",
        "//tensorflow/python/tools:module_util",
--- a/tensorflow/python/init.py
+++ b/tensorflow/python/init.py
@ -111,6 +111,7 @@ from tensorflow.python.ops.linalg.sparse import sparse
 from tensorflow.python.ops.losses import losses
 from tensorflow.python.ops.signal import signal
 from tensorflow.python.profiler import profiler
+from tensorflow.python.profiler import profiler_client
 from tensorflow.python.profiler import profiler_v2
 from tensorflow.python.saved_model import saved_model
 from tensorflow.python.summary import summary
--- a/tensorflow/python/profiler/BUILD
+++ b/tensorflow/python/profiler/BUILD
@ -26,6 +26,7 @@ py_library(
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow/python:c_api_util",
+        "//tensorflow/python:util",
        "//tensorflow/python/profiler/internal:_pywrap_profiler",
    ],
 )
--- a/tensorflow/python/profiler/profiler_client.py
+++ b/tensorflow/python/profiler/profiler_client.py
@ -20,7 +20,12 @@ from __future__ import print_function

 from tensorflow.python.profiler.internal import _pywrap_profiler

+from tensorflow.python.util.tf_export import tf_export

+_GRPC_PREFIX = 'grpc://'
+
+
+@tf_export('profiler.experimental.client.trace', v1=[])
 def trace(service_addr,
          logdir,
          duration_ms,
@ -28,10 +33,15 @@ def trace(service_addr,
          num_tracing_attempts=3):
  """Sends grpc requests to profiler server to perform on-demand profiling.

-  This method will block caller thread until receives tracing result.
+  This method will block caller thread until it receives tracing result. This
+  method supports CPU, GPU, and Cloud TPU. This method supports profiling a
+  single host for CPU, GPU, TPU, as well as multiple TPU workers.
+  The profiled results will be saved to your specified TensorBoard log
+  directory (e.g. the directory you save your model checkpoints). Use the
+  TensorBoard profile plugin to view the visualization and analysis results.

  Args:
-    service_addr: Address of profiler service e.g. localhost:6009.
+    service_addr: gRPC address of profiler service e.g. grpc://localhost:6009.
    logdir: Path of TensorBoard log directory e.g. /tmp/tb_log.
    duration_ms: Duration of tracing or monitoring in ms.
    worker_list: Optional. The list of workers that we are about to profile in
@ -41,23 +51,75 @@ def trace(service_addr,

  Raises:
    UnavailableError: If no trace event is collected.
+
+  Example usage (CPU/GPU):
+  # Start a profiler server before your model runs.
+  ```python
+  tf.profiler.experimental.server.start(6009)
+  # your model code.
+  # Send gRPC request to the profiler server to collect a trace of your model.
+  ```python
+  tf.profiler.experimental.client.trace('grpc://localhost:6009',
+                                        '/tmp/tb_log', 2000)
+
+  Example usage (TPU):
+  # Send gRPC request to a TPU worker to collect a trace of your model. A
+  # profiler service has been started in the TPU worker at port 8466.
+  ```python
+  # E.g. your TPU IP address is 10.0.0.2 and you want to profile for 2 seconds.
+  tf.profiler.experimental.client.trace('grpc://10.0.0.2:8466',
+                                        'gs://your_tb_dir', 2000)
+
+  Example usage (Multiple TPUs):
+  # Send gRPC request to a TPU pod to collect a trace of your model on multiple
+  # TPUs. A profiler service has been started in all the TPU workers at the
+  # port 8466.
+  ```python
+  # E.g. your TPU IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you want to
+  # profile for 2 seconds.
+  tf.profiler.experimental.client.trace('grpc://10.0.0.2:8466',
+                                        'gs://your_tb_dir',
+                                        2000, '10.0.0.3,10.0.0.4')
+
+  Launch TensorBoard and point it to the same logdir you provided to this API.
+  $ tensorboard --logdir=/tmp/tb_log (or gs://your_tb_dir in the above examples)
+  Open your browser and go to localhost:6006/#profile to view profiling results.
+
  """
-  _pywrap_profiler.trace(service_addr, logdir, worker_list, True, duration_ms,
-                         num_tracing_attempts)
+  _pywrap_profiler.trace(
+      _strip_prefix(service_addr, _GRPC_PREFIX), logdir, worker_list, True,
+      duration_ms, num_tracing_attempts)


+@tf_export('profiler.experimental.client.monitor', v1=[])
 def monitor(service_addr, duration_ms, level=1):
  """Sends grpc requests to profiler server to perform on-demand monitoring.

-  This method will block caller thread until receives monitoring result.
+  The monitoring result is a light weight performance summary of your model
+  execution. This method will block the caller thread until it receives the
+  monitoring result. This method currently supports Cloud TPU only.

  Args:
-    service_addr: Address of profiler service e.g. localhost:6009.
+    service_addr: gRPC address of profiler service e.g. grpc://10.0.0.2:8466.
    duration_ms: Duration of monitoring in ms.
    level: Choose a monitoring level between 1 and 2 to monitor your job. Level
      2 is more verbose than level 1 and shows more metrics.

  Returns:
    A string of monitoring output.
+
+  Example usage:
+  # Continuously send gRPC requests to the Cloud TPU to monitor the model
+  # execution.
+  ```python
+  for query in range(0, 100):
+    print(tf.profiler.experimental.client.monitor('grpc://10.0.0.2:8466', 1000))
+
+
  """
-  return _pywrap_profiler.monitor(service_addr, duration_ms, level, True)
+  return _pywrap_profiler.monitor(
+      _strip_prefix(service_addr, _GRPC_PREFIX), duration_ms, level, True)
+
+
+def _strip_prefix(s, prefix):
+  return s[len(prefix):] if s.startswith(prefix) else s
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@ -50,6 +50,7 @@ TENSORFLOW_API_INIT_FILES = [
    "nn/__init__.py",
    "profiler/__init__.py",
    "profiler/experimental/__init__.py",
+    "profiler/experimental/client/__init__.py",
    "profiler/experimental/server/__init__.py",
    "quantization/__init__.py",
    "ragged/__init__.py",
--- a/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.client.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.client.pbtxt
@ -0,0 +1,11 @@
+path: "tensorflow.profiler.experimental.client"
+tf_module {
+  member_method {
+    name: "monitor"
+    argspec: "args=[\'service_addr\', \'duration_ms\', \'level\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "trace"
+    argspec: "args=[\'service_addr\', \'logdir\', \'duration_ms\', \'worker_list\', \'num_tracing_attempts\'], varargs=None, keywords=None, defaults=[\'\', \'3\'], "
+  }
+}
--- a/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.pbtxt
@ -4,6 +4,10 @@ tf_module {
    name: "Profile"
    mtype: "<type \'type\'>"
  }
+  member {
+    name: "client"
+    mtype: "<type \'module\'>"
+  }
  member {
    name: "server"
    mtype: "<type \'module\'>"