From 439595440b378c2b87c4a0159e86e5ba694687c9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 19 Feb 2020 14:57:57 -0800 Subject: [PATCH] Export public symbols for on demand profiling APIs. PiperOrigin-RevId: 296064710 Change-Id: I3a3b549fb59fe9ecbfc6c07ba809b0c1732932e4 --- tensorflow/python/BUILD | 1 + tensorflow/python/__init__.py | 1 + tensorflow/python/profiler/BUILD | 1 + tensorflow/python/profiler/profiler_client.py | 76 +++++++++++++++++-- .../tools/api/generator/api_init_files.bzl | 1 + ...sorflow.profiler.experimental.client.pbtxt | 11 +++ .../v2/tensorflow.profiler.experimental.pbtxt | 4 + 7 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.client.pbtxt diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 583d16e7b26..15d21d34bc5 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -204,6 +204,7 @@ py_library( "//tensorflow/python/ops/ragged", "//tensorflow/python/ops/signal", "//tensorflow/python/profiler", + "//tensorflow/python/profiler:profiler_client", "//tensorflow/python/profiler:profiler_v2", "//tensorflow/python/saved_model", "//tensorflow/python/tools:module_util", diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 7a9eac7931e..6d88cb566ae 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -111,6 +111,7 @@ from tensorflow.python.ops.linalg.sparse import sparse from tensorflow.python.ops.losses import losses from tensorflow.python.ops.signal import signal from tensorflow.python.profiler import profiler +from tensorflow.python.profiler import profiler_client from tensorflow.python.profiler import profiler_v2 from tensorflow.python.saved_model import saved_model from tensorflow.python.summary import summary diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD index 6c2abbd1f4b..2566b8b48c6 100644 --- a/tensorflow/python/profiler/BUILD +++ b/tensorflow/python/profiler/BUILD @@ -26,6 +26,7 @@ py_library( srcs_version = "PY2AND3", deps = [ "//tensorflow/python:c_api_util", + "//tensorflow/python:util", "//tensorflow/python/profiler/internal:_pywrap_profiler", ], ) diff --git a/tensorflow/python/profiler/profiler_client.py b/tensorflow/python/profiler/profiler_client.py index d67c275aebf..d8856c48c53 100644 --- a/tensorflow/python/profiler/profiler_client.py +++ b/tensorflow/python/profiler/profiler_client.py @@ -20,7 +20,12 @@ from __future__ import print_function from tensorflow.python.profiler.internal import _pywrap_profiler +from tensorflow.python.util.tf_export import tf_export +_GRPC_PREFIX = 'grpc://' + + +@tf_export('profiler.experimental.client.trace', v1=[]) def trace(service_addr, logdir, duration_ms, @@ -28,10 +33,15 @@ def trace(service_addr, num_tracing_attempts=3): """Sends grpc requests to profiler server to perform on-demand profiling. - This method will block caller thread until receives tracing result. + This method will block caller thread until it receives tracing result. This + method supports CPU, GPU, and Cloud TPU. This method supports profiling a + single host for CPU, GPU, TPU, as well as multiple TPU workers. + The profiled results will be saved to your specified TensorBoard log + directory (e.g. the directory you save your model checkpoints). Use the + TensorBoard profile plugin to view the visualization and analysis results. Args: - service_addr: Address of profiler service e.g. localhost:6009. + service_addr: gRPC address of profiler service e.g. grpc://localhost:6009. logdir: Path of TensorBoard log directory e.g. /tmp/tb_log. duration_ms: Duration of tracing or monitoring in ms. worker_list: Optional. The list of workers that we are about to profile in @@ -41,23 +51,75 @@ def trace(service_addr, Raises: UnavailableError: If no trace event is collected. + + Example usage (CPU/GPU): + # Start a profiler server before your model runs. + ```python + tf.profiler.experimental.server.start(6009) + # your model code. + # Send gRPC request to the profiler server to collect a trace of your model. + ```python + tf.profiler.experimental.client.trace('grpc://localhost:6009', + '/tmp/tb_log', 2000) + + Example usage (TPU): + # Send gRPC request to a TPU worker to collect a trace of your model. A + # profiler service has been started in the TPU worker at port 8466. + ```python + # E.g. your TPU IP address is 10.0.0.2 and you want to profile for 2 seconds. + tf.profiler.experimental.client.trace('grpc://10.0.0.2:8466', + 'gs://your_tb_dir', 2000) + + Example usage (Multiple TPUs): + # Send gRPC request to a TPU pod to collect a trace of your model on multiple + # TPUs. A profiler service has been started in all the TPU workers at the + # port 8466. + ```python + # E.g. your TPU IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you want to + # profile for 2 seconds. + tf.profiler.experimental.client.trace('grpc://10.0.0.2:8466', + 'gs://your_tb_dir', + 2000, '10.0.0.3,10.0.0.4') + + Launch TensorBoard and point it to the same logdir you provided to this API. + $ tensorboard --logdir=/tmp/tb_log (or gs://your_tb_dir in the above examples) + Open your browser and go to localhost:6006/#profile to view profiling results. + """ - _pywrap_profiler.trace(service_addr, logdir, worker_list, True, duration_ms, - num_tracing_attempts) + _pywrap_profiler.trace( + _strip_prefix(service_addr, _GRPC_PREFIX), logdir, worker_list, True, + duration_ms, num_tracing_attempts) +@tf_export('profiler.experimental.client.monitor', v1=[]) def monitor(service_addr, duration_ms, level=1): """Sends grpc requests to profiler server to perform on-demand monitoring. - This method will block caller thread until receives monitoring result. + The monitoring result is a light weight performance summary of your model + execution. This method will block the caller thread until it receives the + monitoring result. This method currently supports Cloud TPU only. Args: - service_addr: Address of profiler service e.g. localhost:6009. + service_addr: gRPC address of profiler service e.g. grpc://10.0.0.2:8466. duration_ms: Duration of monitoring in ms. level: Choose a monitoring level between 1 and 2 to monitor your job. Level 2 is more verbose than level 1 and shows more metrics. Returns: A string of monitoring output. + + Example usage: + # Continuously send gRPC requests to the Cloud TPU to monitor the model + # execution. + ```python + for query in range(0, 100): + print(tf.profiler.experimental.client.monitor('grpc://10.0.0.2:8466', 1000)) + + """ - return _pywrap_profiler.monitor(service_addr, duration_ms, level, True) + return _pywrap_profiler.monitor( + _strip_prefix(service_addr, _GRPC_PREFIX), duration_ms, level, True) + + +def _strip_prefix(s, prefix): + return s[len(prefix):] if s.startswith(prefix) else s diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl index 8542c745bb4..3aab59e50aa 100644 --- a/tensorflow/python/tools/api/generator/api_init_files.bzl +++ b/tensorflow/python/tools/api/generator/api_init_files.bzl @@ -50,6 +50,7 @@ TENSORFLOW_API_INIT_FILES = [ "nn/__init__.py", "profiler/__init__.py", "profiler/experimental/__init__.py", + "profiler/experimental/client/__init__.py", "profiler/experimental/server/__init__.py", "quantization/__init__.py", "ragged/__init__.py", diff --git a/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.client.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.client.pbtxt new file mode 100644 index 00000000000..4b44f126be8 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.client.pbtxt @@ -0,0 +1,11 @@ +path: "tensorflow.profiler.experimental.client" +tf_module { + member_method { + name: "monitor" + argspec: "args=[\'service_addr\', \'duration_ms\', \'level\'], varargs=None, keywords=None, defaults=[\'1\'], " + } + member_method { + name: "trace" + argspec: "args=[\'service_addr\', \'logdir\', \'duration_ms\', \'worker_list\', \'num_tracing_attempts\'], varargs=None, keywords=None, defaults=[\'\', \'3\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.pbtxt index 9c503abf268..2823f422b85 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.profiler.experimental.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "Profile" mtype: "" } + member { + name: "client" + mtype: "" + } member { name: "server" mtype: ""