[tf.data] Fix documentation and variables names for Iterator GetNext monitoring.

PiperOrigin-RevId: 319260104
Change-Id: Ic37ac540ea25e01f328eb705f82905d084298283
This commit is contained in:
Michael Kuchnik 2020-07-01 11:12:30 -07:00 committed by TensorFlower Gardener
parent b5f51cde5b
commit 82adecb5a5

View File

@ -83,23 +83,24 @@ auto* tf_data_elements_counter = monitoring::Counter<1>::New(
auto* tf_data_fingerprint_counter = monitoring::Counter<1>::New(
"/tensorflow/data/fingerprint", "tf.data fingerprint", "name");
auto* tf_data_getnext_duration_counter = monitoring::Sampler<0>::New(
auto* tf_data_getnext_duration_usecs_histogram = monitoring::Sampler<0>::New(
{"/tensorflow/data/getnext_duration",
"Microseconds spent fetching an element from tf.data Dataset iterator."},
// Power of 2 with bucket count 10 (1024 ms)
// Power of 2 with bucket count 10 (1024 microseconds)
{monitoring::Buckets::Exponential(1, 2, 10)});
auto* tf_data_getnext_time_between_ms_histogram = monitoring::Sampler<0>::New(
{"/tensorflow/data/getnext_time_between",
"Milliseconds spent in between calls to tf.data Dataset TF iterator."},
// A typical training step is in the 200ms to 1 second range.
// Elapsed time less than 25ms are likely due to multiple devices calling
// the iterator's getNext() during the same step.
// Bucket density is highest for small time intervals to more accurately
// measure fast ingest rates. Step sizes are as follows:
{monitoring::Buckets::Explicit({25., 50., 75., 100., 125., 150., 175., 200.,
225., 250., 300., 350., 400., 450., 500.,
1000., 10000.})});
auto* tf_data_getnext_time_between_msecs_histogram =
monitoring::Sampler<0>::New(
{"/tensorflow/data/getnext_time_between",
"Milliseconds spent in between calls to tf.data Dataset iterator."},
// A typical training step is in the 200ms to 1 second range.
// Elapsed time less than 25ms are likely due to multiple devices
// calling the iterator's getNext() during the same step. Bucket density
// is highest for small time intervals to more accurately measure fast
// ingest rates. Buckets from 25ms to 10 seconds.
{monitoring::Buckets::Explicit({25., 50., 75., 100., 125., 150., 175.,
200., 225., 250., 300., 350., 400.,
450., 500., 1000., 10000.})});
auto* tf_data_optimization_counter = monitoring::Counter<1>::New(
"/tensorflow/data/optimization", "tf.data optimization", "name");
@ -178,13 +179,13 @@ void RecordTFDataFingerprint(const string& name) {
void RecordTFDataGetNextDuration(uint64 duration_us) {
static auto* tfdata_getnext_duration_cell =
tf_data_getnext_duration_counter->GetCell();
tf_data_getnext_duration_usecs_histogram->GetCell();
tfdata_getnext_duration_cell->Add(duration_us);
}
void RecordTFDataGetNextTimeBetween(uint64 duration_us) {
static auto* tfdata_getnext_time_between_cell =
tf_data_getnext_time_between_ms_histogram->GetCell();
tf_data_getnext_time_between_msecs_histogram->GetCell();
// Convert to milliseconds for histogram
const auto duration_ms = duration_us / 1000;
tfdata_getnext_time_between_cell->Add(duration_ms);