Add SleepFor and SpinFor to time_utils.h
Use them in ProfilerSession and tests. PiperOrigin-RevId: 345802969 Change-Id: I25b8a89d00b6472d5b614d41effebcf12d3d75cc
This commit is contained in:
parent
cfe2cd1e1d
commit
c4be3d9838
tensorflow/core/profiler
@ -24,7 +24,6 @@ limitations under the License.
|
||||
#include "absl/strings/numbers.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/env_time.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/notification.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
@ -36,15 +35,14 @@ namespace tensorflow {
|
||||
namespace profiler {
|
||||
namespace {
|
||||
|
||||
using ::tensorflow::profiler::SpinForNanos;
|
||||
using ::testing::ElementsAre;
|
||||
|
||||
MATCHER_P(Named, name, "") { return arg.name == name; }
|
||||
|
||||
constexpr static uint64 kNanosInSec = 1000000000;
|
||||
|
||||
TEST(RecorderTest, SingleThreaded) {
|
||||
int64 start_time = GetCurrentTimeNanos();
|
||||
int64 end_time = start_time + kNanosInSec;
|
||||
int64 end_time = start_time + SecondsToNanos(1);
|
||||
|
||||
TraceMeRecorder::Record({"before", start_time, end_time});
|
||||
TraceMeRecorder::Start(/*level=*/1);
|
||||
@ -58,12 +56,6 @@ TEST(RecorderTest, SingleThreaded) {
|
||||
ElementsAre(Named("during1"), Named("during2")));
|
||||
}
|
||||
|
||||
void SpinNanos(int nanos) {
|
||||
uint64 deadline = Env::Default()->NowNanos() + nanos;
|
||||
while (Env::Default()->NowNanos() < deadline) {
|
||||
}
|
||||
}
|
||||
|
||||
// Checks the functional behavior of the recorder, when used from several
|
||||
// unsynchronized threads.
|
||||
//
|
||||
@ -90,7 +82,7 @@ TEST(RecorderTest, Multithreaded) {
|
||||
bool was_active = false;
|
||||
auto record_event = [&j]() {
|
||||
int64 start_time = GetCurrentTimeNanos();
|
||||
int64 end_time = start_time + kNanosInSec;
|
||||
int64 end_time = start_time + SecondsToNanos(1);
|
||||
TraceMeRecorder::Record(
|
||||
{/*name=*/absl::StrCat(j++), start_time, end_time});
|
||||
};
|
||||
@ -116,7 +108,7 @@ TEST(RecorderTest, Multithreaded) {
|
||||
// This snowballs into OOM in some configurations, causing flakiness.
|
||||
// Keep this big enough to prevent OOM and small enough such that
|
||||
// each thread records at least one event.
|
||||
SpinNanos(10);
|
||||
SpinForNanos(10);
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -141,7 +133,7 @@ TEST(RecorderTest, Multithreaded) {
|
||||
// Wait while all the threads are spun up.
|
||||
while (thread_count.load(std::memory_order_relaxed) < kNumThreads) {
|
||||
LOG(INFO) << "Waiting for all threads to spin up...";
|
||||
Env::Default()->SleepForMicroseconds(1 * EnvTime::kMillisToMicros);
|
||||
SleepForMillis(1);
|
||||
}
|
||||
|
||||
// We will probably be done after two iterations (with each thread getting
|
||||
@ -152,7 +144,7 @@ TEST(RecorderTest, Multithreaded) {
|
||||
for (int iters = 0; iters < kMaxIters && !done(); ++iters) {
|
||||
LOG(INFO) << "Looping until convergence, iteration: " << iters;
|
||||
TraceMeRecorder::Start(/*level=*/1);
|
||||
Env::Default()->SleepForMicroseconds(100 * EnvTime::kMillisToMicros);
|
||||
SleepForMillis(100);
|
||||
auto results = TraceMeRecorder::Stop();
|
||||
for (const auto& thread : results) {
|
||||
if (thread.events.empty()) continue;
|
||||
@ -180,7 +172,7 @@ TEST(RecorderTest, Multithreaded) {
|
||||
}
|
||||
}
|
||||
}
|
||||
Env::Default()->SleepForMicroseconds(1 * EnvTime::kMillisToMicros);
|
||||
SleepForMillis(1);
|
||||
}
|
||||
stop.Notify();
|
||||
|
||||
|
@ -55,7 +55,6 @@ cc_library(
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core/profiler:profiler_options_proto_cc",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/time",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
] + if_not_android([
|
||||
":profiler_factory",
|
||||
|
@ -18,8 +18,6 @@ limitations under the License.
|
||||
#include <memory>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/time/clock.h"
|
||||
#include "absl/time/time.h"
|
||||
#include "tensorflow/core/platform/errors.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
@ -63,7 +61,8 @@ tensorflow::Status ProfilerSession::Status() {
|
||||
|
||||
Status ProfilerSession::CollectData(profiler::XSpace* space) {
|
||||
mutex_lock l(mutex_);
|
||||
if (!status_.ok()) return status_;
|
||||
TF_RETURN_IF_ERROR(status_);
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
LOG(INFO) << "Profiler session collecting data.";
|
||||
for (auto& profiler : profilers_) {
|
||||
profiler->Stop().IgnoreError();
|
||||
@ -75,13 +74,10 @@ Status ProfilerSession::CollectData(profiler::XSpace* space) {
|
||||
|
||||
if (active_) {
|
||||
// Allow another session to start.
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
profiler::ReleaseProfilerLock();
|
||||
#endif
|
||||
active_ = false;
|
||||
}
|
||||
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
PostProcessSingleHostXSpace(space, start_time_ns_);
|
||||
#endif
|
||||
|
||||
@ -90,7 +86,8 @@ Status ProfilerSession::CollectData(profiler::XSpace* space) {
|
||||
|
||||
Status ProfilerSession::CollectData(RunMetadata* run_metadata) {
|
||||
mutex_lock l(mutex_);
|
||||
if (!status_.ok()) return status_;
|
||||
TF_RETURN_IF_ERROR(status_);
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
for (auto& profiler : profilers_) {
|
||||
profiler->Stop().IgnoreError();
|
||||
}
|
||||
@ -101,57 +98,49 @@ Status ProfilerSession::CollectData(RunMetadata* run_metadata) {
|
||||
|
||||
if (active_) {
|
||||
// Allow another session to start.
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
profiler::ReleaseProfilerLock();
|
||||
#endif
|
||||
active_ = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
ProfilerSession::ProfilerSession(ProfileOptions options)
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
: active_(profiler::AcquireProfilerLock()),
|
||||
#else
|
||||
#if defined(IS_MOBILE_PLATFORM)
|
||||
: active_(false),
|
||||
status_(tensorflow::Status(
|
||||
error::UNIMPLEMENTED,
|
||||
"Profiler is unimplemented for mobile platforms.")),
|
||||
#else
|
||||
: active_(profiler::AcquireProfilerLock()),
|
||||
#endif
|
||||
options_(std::move(options)) {
|
||||
if (!active_) {
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
if (!active_) {
|
||||
status_ = tensorflow::Status(error::UNAVAILABLE,
|
||||
"Another profiler session is active.");
|
||||
#else
|
||||
status_ =
|
||||
tensorflow::Status(error::UNIMPLEMENTED,
|
||||
"Profiler is unimplemented for mobile platforms.");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
LOG(INFO) << "Profiler session initializing.";
|
||||
// Sleep until it is time to start profiling.
|
||||
const bool delayed_start = options_.start_timestamp_ns() > 0;
|
||||
if (delayed_start) {
|
||||
absl::Time scheduled_start =
|
||||
absl::FromUnixNanos(options_.start_timestamp_ns());
|
||||
auto now = absl::Now();
|
||||
if (scheduled_start < now) {
|
||||
LOG(WARNING) << "Profiling is late (" << now
|
||||
<< ") for the scheduled start (" << scheduled_start
|
||||
<< ") and will start immediately.";
|
||||
if (options_.start_timestamp_ns() > 0) {
|
||||
int64 sleep_duration_ns =
|
||||
options_.start_timestamp_ns() - profiler::GetCurrentTimeNanos();
|
||||
if (sleep_duration_ns < 0) {
|
||||
LOG(WARNING) << "Profiling is late by " << -sleep_duration_ns
|
||||
<< " nanoseconds and will start immediately.";
|
||||
} else {
|
||||
absl::Duration sleep_duration = scheduled_start - now;
|
||||
LOG(INFO) << "Delaying start of profiler session by " << sleep_duration;
|
||||
absl::SleepFor(sleep_duration);
|
||||
LOG(INFO) << "Delaying start of profiler session by "
|
||||
<< sleep_duration_ns;
|
||||
profiler::SleepForNanos(sleep_duration_ns);
|
||||
}
|
||||
}
|
||||
|
||||
LOG(INFO) << "Profiler session started.";
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
start_time_ns_ = profiler::GetCurrentTimeNanos();
|
||||
CreateProfilers(options_, &profilers_);
|
||||
#endif
|
||||
status_ = Status::OK();
|
||||
|
||||
for (auto& profiler : profilers_) {
|
||||
@ -161,9 +150,11 @@ ProfilerSession::ProfilerSession(ProfileOptions options)
|
||||
<< start_status.ToString();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
ProfilerSession::~ProfilerSession() {
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
LOG(INFO) << "Profiler session tear down.";
|
||||
for (auto& profiler : profilers_) {
|
||||
profiler->Stop().IgnoreError();
|
||||
@ -171,9 +162,8 @@ ProfilerSession::~ProfilerSession() {
|
||||
|
||||
if (active_) {
|
||||
// Allow another session to start.
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
profiler::ReleaseProfilerLock();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} // namespace tensorflow
|
||||
|
@ -16,6 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||
|
||||
#include "absl/time/clock.h"
|
||||
#include "absl/time/time.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
@ -27,5 +28,14 @@ int64 GetCurrentTimeNanos() {
|
||||
return absl::GetCurrentTimeNanos();
|
||||
}
|
||||
|
||||
void SleepForNanos(int64 ns) { absl::SleepFor(absl::Nanoseconds(ns)); }
|
||||
|
||||
void SpinForNanos(int64 ns) {
|
||||
if (ns <= 0) return;
|
||||
int64 deadline = GetCurrentTimeNanos() + ns;
|
||||
while (GetCurrentTimeNanos() < deadline) {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
@ -40,6 +40,17 @@ inline uint64 SecondsToNanos(double s) { return s * 1E9; }
|
||||
// Returns the current CPU wallclock time in nanoseconds.
|
||||
int64 GetCurrentTimeNanos();
|
||||
|
||||
// Sleeps for the specified duration.
|
||||
void SleepForNanos(int64 ns);
|
||||
inline void SleepForMicros(int64 us) { SleepForNanos(us * 1000); }
|
||||
inline void SleepForMillis(int64 ms) { SleepForNanos(ms * 1000000); }
|
||||
inline void SleepForSeconds(int64 s) { SleepForNanos(s * 1000000000); }
|
||||
|
||||
// Spins to simulate doing some work instead of sleeping, because sleep
|
||||
// precision is poor. For testing only.
|
||||
void SpinForNanos(int64 ns);
|
||||
inline void SpinForMicros(int64 us) { SpinForNanos(us * 1000); }
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user