diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index 10e5df84345..25fdc35368a 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -55,6 +55,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler:profiler_options_proto_cc", "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", "//tensorflow/core:protos_all_cc", ] + if_not_android([ ":profiler_lock", diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc index f37cb12ebab..c1b871025d6 100644 --- a/tensorflow/core/profiler/lib/profiler_session.cc +++ b/tensorflow/core/profiler/lib/profiler_session.cc @@ -18,7 +18,10 @@ limitations under the License. #include #include "absl/memory/memory.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/platform.h" @@ -65,6 +68,7 @@ tensorflow::Status ProfilerSession::Status() { Status ProfilerSession::CollectData(profiler::XSpace* space) { mutex_lock l(mutex_); if (!status_.ok()) return status_; + LOG(INFO) << "Profiler session collecting data."; for (auto& profiler : profilers_) { profiler->Stop().IgnoreError(); } @@ -116,7 +120,6 @@ ProfilerSession::ProfilerSession(ProfileOptions options) #else : active_(false), #endif - start_time_ns_(EnvTime::NowNanos()), options_(std::move(options)) { if (!active_) { #if !defined(IS_MOBILE_PLATFORM) @@ -130,6 +133,25 @@ ProfilerSession::ProfilerSession(ProfileOptions options) return; } + LOG(INFO) << "Profiler session initializing."; + // Sleep until it is time to start profiling. + const bool delayed_start = options_.start_timestamp_ns() > 0; + if (delayed_start) { + absl::Time scheduled_start = + absl::FromUnixNanos(options_.start_timestamp_ns()); + auto now = absl::Now(); + if (scheduled_start < now) { + LOG(WARNING) << "Profiling is late (" << now + << ") for the scheduled start (" << scheduled_start + << ") and will start immediately."; + } else { + absl::Duration sleep_duration = scheduled_start - now; + LOG(INFO) << "Delaying start of profiler session by " << sleep_duration; + absl::SleepFor(sleep_duration); + } + } + + start_time_ns_ = EnvTime::NowNanos(); LOG(INFO) << "Profiler session started."; #if !defined(IS_MOBILE_PLATFORM) @@ -147,7 +169,7 @@ ProfilerSession::ProfilerSession(ProfileOptions options) } ProfilerSession::~ProfilerSession() { - VLOG(1) << "Profiler session stopping."; + LOG(INFO) << "Profiler session tear down."; for (auto& profiler : profilers_) { profiler->Stop().IgnoreError(); } diff --git a/tensorflow/core/profiler/lib/profiler_session.h b/tensorflow/core/profiler/lib/profiler_session.h index 976ebcfc884..2a852c92267 100644 --- a/tensorflow/core/profiler/lib/profiler_session.h +++ b/tensorflow/core/profiler/lib/profiler_session.h @@ -79,7 +79,7 @@ class ProfilerSession { bool active_ TF_GUARDED_BY(mutex_); tensorflow::Status status_ TF_GUARDED_BY(mutex_); - const uint64 start_time_ns_; + uint64 start_time_ns_; mutex mutex_; ProfileOptions options_; }; diff --git a/tensorflow/core/profiler/profiler_options.proto b/tensorflow/core/profiler/profiler_options.proto index 7858f08c8ec..899bdb909b0 100644 --- a/tensorflow/core/profiler/profiler_options.proto +++ b/tensorflow/core/profiler/profiler_options.proto @@ -63,7 +63,7 @@ message ProfileOptions { } // Options for remote profiler session manager. -// Next ID: 5 +// Next ID: 6 message RemoteProfilerSessionManagerOptions { // Options for each local profiler. ProfileOptions profiler_options = 1; @@ -79,4 +79,7 @@ message RemoteProfilerSessionManagerOptions { // continues until interrupted. Otherwise, value must be greater than // profiler_options.duration_ms. uint64 max_session_duration_ms = 4; + + // Start of profiling is delayed by this much (in milliseconds). + uint64 delay_ms = 5; } diff --git a/tensorflow/core/profiler/rpc/client/BUILD b/tensorflow/core/profiler/rpc/client/BUILD index ca1ff506f2a..ea37a8852a0 100644 --- a/tensorflow/core/profiler/rpc/client/BUILD +++ b/tensorflow/core/profiler/rpc/client/BUILD @@ -37,6 +37,7 @@ cc_library( "//tensorflow/core/profiler/convert:xplane_to_profile_response", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", ], ) diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc index 78e6fdbddd7..7d3f0c1a7b5 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.cc +++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc @@ -21,6 +21,8 @@ limitations under the License. #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/host_info.h" #include "tensorflow/core/platform/status.h" @@ -176,7 +178,7 @@ Status NewSession(absl::string_view repository_root, } // namespace Status Trace(const std::string& logdir, int num_tracing_attempts, - const RemoteProfilerSessionManagerOptions& opts, + RemoteProfilerSessionManagerOptions& opts, bool is_cloud_tpu_session) { DCHECK_GT(opts.profiler_options().duration_ms(), 0); DCHECK(!opts.service_addresses().empty()); @@ -190,6 +192,14 @@ Status Trace(const std::string& logdir, int num_tracing_attempts, Status status; int remaining_attempts = num_tracing_attempts; while (true) { + auto start_timestamp = absl::Now() + absl::Milliseconds(opts.delay_ms()); + opts.mutable_profiler_options()->set_start_timestamp_ns( + absl::ToUnixNanos(start_timestamp)); + LOG(INFO) << "Profiler delay_ms was " << opts.delay_ms() + << ", start_timestamp_ns set to " + << opts.profiler_options().start_timestamp_ns() << " [" + << start_timestamp << "]"; + std::cout << "Starting to trace for " << duration_ms << " ms. " << "Remaining attempt(s): " << --remaining_attempts << std::endl; diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.h b/tensorflow/core/profiler/rpc/client/capture_profile.h index cb9183e28a7..96f3dafa06a 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.h +++ b/tensorflow/core/profiler/rpc/client/capture_profile.h @@ -40,7 +40,7 @@ Status Monitor(const std::string& service_addr, int duration_ms, // in the given logdir. If no trace was collected, retries tracing for // num_tracing_attempts. Assumes that options have been validated. Status Trace(const std::string& logdir, int num_tracing_attempts, - const RemoteProfilerSessionManagerOptions& opts, + RemoteProfilerSessionManagerOptions& opts, bool is_cloud_tpu_session); } // namespace profiler diff --git a/tensorflow/python/profiler/integration_test/profiler_api_test.py b/tensorflow/python/profiler/integration_test/profiler_api_test.py index 4e2a9dfd4e3..1d79b660ba5 100644 --- a/tensorflow/python/profiler/integration_test/profiler_api_test.py +++ b/tensorflow/python/profiler/integration_test/profiler_api_test.py @@ -116,8 +116,8 @@ class ProfilerApiTest(test_util.TensorFlowTestCase): thread_worker.join(120) self._check_xspace_pb_exist(logdir) - def test_single_worker_sampling_mode_delayed(self): - """Test single worker sampling mode with delay.""" + def test_single_worker_sampling_mode_long_delay(self): + """Test single worker sampling mode with a long delay.""" self.test_single_worker_sampling_mode(delay_ms=1000) diff --git a/tensorflow/python/profiler/internal/profiler_wrapper.cc b/tensorflow/python/profiler/internal/profiler_wrapper.cc index 2b513547612..6827e4e32a5 100644 --- a/tensorflow/python/profiler/internal/profiler_wrapper.cc +++ b/tensorflow/python/profiler/internal/profiler_wrapper.cc @@ -146,7 +146,6 @@ RemoteProfilerSessionManagerOptions GetOptionsLocked(absl::string_view logdir, VLOG(2) << "repository_path set to " << options.profiler_options().repository_path(); - int delay_ms = 0; for (const auto& kw : opts) { std::string key = py::cast(kw.first); if (key == "host_tracer_level") { @@ -163,26 +162,15 @@ RemoteProfilerSessionManagerOptions GetOptionsLocked(absl::string_view logdir, VLOG(1) << "python_tracer_level set to " << value; } else if (key == "delay_ms") { if (!kw.second.is_none()) { - delay_ms = py::cast(kw.second); + auto value = py::cast(kw.second); + options.set_delay_ms(value); + VLOG(1) << "delay_ms was set to " << value; } } else { LOG(WARNING) << "Unrecognised key: " << key; } } - if (delay_ms) { - absl::Time start_timestamp = now + absl::Milliseconds(delay_ms); - tensorflow::int64 start_timestamp_ns = absl::ToUnixNanos(start_timestamp); - options.mutable_profiler_options()->set_start_timestamp_ns( - start_timestamp_ns); - LOG(INFO) << "delay_ms was " << delay_ms << ", start_timestamp_ns set to " - << start_timestamp_ns << " [" << start_timestamp << "]"; - } else { - DCHECK_EQ(options.mutable_profiler_options()->start_timestamp_ns(), 0); - LOG(INFO) << "Profiling will start immediately because delay_ms was unset " - "or zero."; - } - return options; }