Added timestamp flag in TensorFlow TPU profiler for monitoring.
PiperOrigin-RevId: 243116546
This commit is contained in:
parent
ceeea9c916
commit
9a4141c306
@ -31,6 +31,7 @@ int main(int argc, char** argv) {
|
||||
int FLAGS_num_tracing_attempts = 3;
|
||||
bool FLAGS_include_dataset_ops = true;
|
||||
int FLAGS_monitoring_level = 0;
|
||||
bool FLAGS_timestamp = false;
|
||||
int FLAGS_num_queries = 100;
|
||||
std::vector<tensorflow::Flag> flag_list = {
|
||||
tensorflow::Flag("service_addr", &FLAGS_service_addr,
|
||||
@ -54,6 +55,9 @@ int main(int argc, char** argv) {
|
||||
"Choose a monitoring level between 1 and 2 to monitor "
|
||||
"your TPU job continuously. Level 2 is more verbose "
|
||||
"than level 1 and shows more metrics."),
|
||||
tensorflow::Flag("timestamp", &FLAGS_timestamp,
|
||||
"Set to true to display timestamp in monitoring "
|
||||
"results."),
|
||||
tensorflow::Flag("num_queries", &FLAGS_num_queries,
|
||||
"This script will run monitoring for num_queries before "
|
||||
"it stops.")};
|
||||
@ -102,7 +106,8 @@ int main(int argc, char** argv) {
|
||||
<< "ms and show metrics for " << num_queries << " time(s)."
|
||||
<< std::endl;
|
||||
tensorflow::profiler::client::StartMonitoring(
|
||||
FLAGS_service_addr, duration_ms, FLAGS_monitoring_level, num_queries);
|
||||
FLAGS_service_addr, duration_ms, FLAGS_monitoring_level,
|
||||
FLAGS_timestamp, num_queries);
|
||||
} else {
|
||||
status = tensorflow::profiler::client::StartTracing(
|
||||
FLAGS_service_addr, FLAGS_logdir, FLAGS_workers_list,
|
||||
|
@ -243,11 +243,10 @@ MonitorRequest PopulateMonitorRequest(int duration_ms, int monitoring_level,
|
||||
}
|
||||
|
||||
void StartMonitoring(const tensorflow::string& service_addr, int duration_ms,
|
||||
int monitoring_level, int num_queries) {
|
||||
int monitoring_level, bool timestamp, int num_queries) {
|
||||
for (int query = 0; query < num_queries; ++query) {
|
||||
MonitorRequest request =
|
||||
PopulateMonitorRequest(duration_ms, monitoring_level,
|
||||
/*timestamp=*/false);
|
||||
PopulateMonitorRequest(duration_ms, monitoring_level, timestamp);
|
||||
|
||||
::grpc::ClientContext context;
|
||||
::grpc::ChannelArguments channel_args;
|
||||
|
@ -26,9 +26,10 @@ namespace client {
|
||||
Status ValidateHostPortPair(const string& host_port);
|
||||
|
||||
// Repeatedly collects profiles and shows user-friendly metrics for
|
||||
// 'num_queries' time(s).
|
||||
// 'num_queries' time(s). If timestamp flag is true, timestamp will be
|
||||
// displayed in "%H:%M:%S" format.
|
||||
void StartMonitoring(const tensorflow::string& service_addr, int duration_ms,
|
||||
int monitoring_level, int num_queries);
|
||||
int monitoring_level, bool timestamp, int num_queries);
|
||||
|
||||
// Starts tracing on a single or multiple hosts and saves the result in the
|
||||
// given logdir. If no trace was collected, retries tracing for
|
||||
|
Loading…
Reference in New Issue
Block a user