From 8ee3640e160ab58b146693267ee8c9e36e6152f8 Mon Sep 17 00:00:00 2001 From: Yi Situ Date: Wed, 16 Sep 2020 21:15:27 -0700 Subject: [PATCH] Add a LocalProfiler class for target side profiling. Part 1/2 Part 1 (this) adds the class. Part 2 will introduce a delayed start. ### LocalProfiler * Encapsulates multiple profiler backends that each implement ProfilerInterface. * Coordinates Start/Stop/Serialize operations on multiple profilers. * Owns a profiler lock such that only one instantiation is permitted. PiperOrigin-RevId: 332153466 Change-Id: I86e2e4c1b6729f517bd452cb8f7cf6c0a8ae8612 --- tensorflow/core/profiler/lib/BUILD | 51 +++++ .../core/profiler/lib/local_profiler.cc | 185 ++++++++++++++++++ tensorflow/core/profiler/lib/local_profiler.h | 101 ++++++++++ 3 files changed, 337 insertions(+) create mode 100644 tensorflow/core/profiler/lib/local_profiler.cc create mode 100644 tensorflow/core/profiler/lib/local_profiler.h diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index 827143c0bd9..67eb9923986 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -64,6 +64,57 @@ cc_library( alwayslink = True, ) +tf_pybind_cc_library_wrapper( + name = "local_profiler_headers", + visibility = [ + "//tensorflow/core/profiler/rpc:__pkg__", + "//tensorflow/python/profiler/internal:__pkg__", + ], + deps = [":local_profiler"], +) + +cc_library( + name = "local_profiler", + hdrs = ["local_profiler.h"], + visibility = ["//tensorflow/core/profiler:internal"], + deps = [ + "@com_google_absl//absl/memory", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform", + "//tensorflow/core/profiler:profiler_options_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler/internal:profiler_interface", + ] + if_static([ + ":local_profiler_impl", + ]), +) + +cc_library( + name = "local_profiler_impl", + srcs = ["local_profiler.cc"], + hdrs = ["local_profiler.h"], + visibility = ["//tensorflow/core/profiler:internal"], + deps = [ + ":profiler_lock", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform", + "//tensorflow/core/profiler:profiler_options_proto_cc", + "//tensorflow/core/profiler/convert:post_process_single_host_xplane", + "//tensorflow/core/profiler/internal:profiler_factory", + "//tensorflow/core/profiler/internal:profiler_interface", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler/utils:derived_timeline", + "//tensorflow/core/profiler/utils:group_events", + "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_utils", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", + ], + alwayslink = True, +) + tf_cuda_library( name = "profiler_backends", cuda_deps = [ diff --git a/tensorflow/core/profiler/lib/local_profiler.cc b/tensorflow/core/profiler/lib/local_profiler.cc new file mode 100644 index 00000000000..b3a8cedefb0 --- /dev/null +++ b/tensorflow/core/profiler/lib/local_profiler.cc @@ -0,0 +1,185 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/profiler/lib/local_profiler.h" + +#include + +#include "absl/memory/memory.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/platform.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/convert/post_process_single_host_xplane.h" +#include "tensorflow/core/profiler/internal/profiler_factory.h" +#include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/lib/profiler_lock.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/derived_timeline.h" +#include "tensorflow/core/profiler/utils/group_events.h" +#include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" + +namespace tensorflow { +namespace profiler { + +/*static*/ std::unique_ptr LocalProfiler::Create( + const ProfileOptions& options, Status* out_status) { + auto profiler = absl::WrapUnique(new LocalProfiler(options)); + Status status = profiler->Init(); + if (out_status) { + *out_status = status; + } + if (!status.ok()) { + LOG(ERROR) << status; + return nullptr; + } + return profiler; +} + +LocalProfiler::LocalProfiler(ProfileOptions options) + : options_(std::move(options)) {} + +LocalProfiler::~LocalProfiler() { + mutex_lock lock(mutex_); + + for (auto& profiler : profilers_) { + profiler->Stop().IgnoreError(); + } + + if (active_) { + // Allow another LocalProfiler to be instantiated. + ReleaseProfilerLock(); + active_ = false; + } +} + +Status LocalProfiler::Init() { + mutex_lock lock(mutex_); + VLOG(1) << "Creating a LocalProfiler."; + + bool active_ = AcquireProfilerLock(); + if (!active_) { + return errors::Unavailable("Another LocalProfiler is active."); + } + + CreateProfilers(options_, &profilers_); + + VLOG(1) << "LocalProfiler initialized with " << profilers_.size() + << " profilers."; + return Status::OK(); +} + +Status LocalProfiler::Start() { + mutex_lock lock(mutex_); + VLOG(1) << "Starting all profilers."; + + if (!active_) { + return errors::FailedPrecondition("LocalProfiler is inactive."); + } + + if (start_time_ns_ != 0) { + return errors::FailedPrecondition("LocalProfiler is not restartable."); + } + + start_time_ns_ = EnvTime::NowNanos(); + + Status status; + for (auto& profiler : profilers_) { + Status start_status = profiler->Start(); + if (!start_status.ok()) { + LOG(WARNING) << "Encountered error while starting profiler: " + << start_status.ToString(); + } + status.Update(start_status); + } + + VLOG(1) << "Started all profilers."; + return status; +} + +Status LocalProfiler::Stop() { + mutex_lock lock(mutex_); + VLOG(1) << "Stopping all profilers."; + + if (!active_) { + return errors::FailedPrecondition("LocalProfiler is inactive."); + } + + if (start_time_ns_ == 0) { + return errors::FailedPrecondition( + "LocalProfiler needs to Start() before it can stop producing data."); + } + + Status status; + for (auto& profiler : profilers_) { + status.Update(profiler->Stop()); + } + + // Allow another LocalProfiler to be instantiated. + if (active_) { + ReleaseProfilerLock(); + active_ = false; + } + + VLOG(1) << "Stopped all profilers."; + return status; +} + +Status LocalProfiler::CollectData(XSpace* space) { + Status status; + uint64 data_start_time_ns; + + { + mutex_lock lock(mutex_); + VLOG(1) << "Collecting data from " << profilers_.size() << " profilers."; + + if (!active_) { + return errors::FailedPrecondition("LocalProfiler is inactive."); + } + + if (start_time_ns_ != 0) { + return errors::FailedPrecondition( + "LocalProfiler needs to Stop() before collecting data."); + } + + for (auto& profiler : profilers_) { + VLOG(3) << "Collecting data from " << typeid(*profiler).name(); + status.Update(profiler->CollectData(space)); + } + + profilers_.clear(); + + data_start_time_ns = start_time_ns_; + } + + PostProcessSingleHostXSpace(space, data_start_time_ns); + return status; +} + +Status LocalProfiler::CollectData(RunMetadata* run_metadata) { + return errors::Unimplemented( + "Collecting profiler data into RunMetaData is unsupported."); +} + +} // namespace profiler +} // namespace tensorflow diff --git a/tensorflow/core/profiler/lib/local_profiler.h b/tensorflow/core/profiler/lib/local_profiler.h new file mode 100644 index 00000000000..1de71d13676 --- /dev/null +++ b/tensorflow/core/profiler/lib/local_profiler.h @@ -0,0 +1,101 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PROFILER_LIB_LOCAL_PROFILER_H_ +#define TENSORFLOW_CORE_PROFILER_LIB_LOCAL_PROFILER_H_ + +#include +#include + +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" + +namespace tensorflow { +namespace profiler { + +// LocalProfiler encapsulates multiple profiler backends that each implements. +// ProfilerInterface. +// Thread-safety: LocalProfiler is thread-safe. +class LocalProfiler : public ProfilerInterface { + public: + // Instantiates a LocalProfiler if there is not one already active. + // Returns null on errors, which will be indicated by the Status code. + static std::unique_ptr Create(const ProfileOptions& options, + Status* status); + + static ProfileOptions DefaultOptions() { + ProfileOptions options; + options.set_version(1); + options.set_device_tracer_level(1); + options.set_host_tracer_level(2); + options.set_device_type(ProfileOptions::UNSPECIFIED); + options.set_python_tracer_level(0); + options.set_enable_hlo_proto(false); + options.set_include_dataset_ops(true); + return options; + } + + // Starts all profilers. + Status Start() override TF_LOCKS_EXCLUDED(mutex_); + + // Stops all profilers. + Status Stop() override TF_LOCKS_EXCLUDED(mutex_); + + // Collects data from all profilers into XSpace. Post-process the XSpace + // (e.g., groups trace events per step). This is best effort profiling and + // XSpace may contain data collected before any errors occurred. + Status CollectData(XSpace* space) override TF_LOCKS_EXCLUDED(mutex_); + + // Unimplemented, do not use. This will be deprecated in future. + Status CollectData(RunMetadata* run_metadata) override; + + // Deletes an existing Profiler and enables starting a new one. + ~LocalProfiler() override; + + private: + // Constructs an instance of the class and starts profiling + explicit LocalProfiler(ProfileOptions options); + + // Neither copyable or movable. + LocalProfiler(const LocalProfiler&) = delete; + LocalProfiler& operator=(const LocalProfiler&) = delete; + + // Initializes LocalProfiler and sets ups all profilers. + Status Init(); + + mutex mutex_; + + std::vector> profilers_ + TF_GUARDED_BY(mutex_); + + // True if the LocalProfiler is active. + bool active_ TF_GUARDED_BY(mutex_) = false; + + // Time when Start() was called. + uint64 start_time_ns_ TF_GUARDED_BY(mutex_) = 0; + + ProfileOptions options_; +}; + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_LIB_LOCAL_PROFILER_H_