Split the memory bandwidth reporting into its own function so that users trying to get memory usage information won't incur additional calls
PiperOrigin-RevId: 357122076 Change-Id: Ia23cbb2e06c36cf23a8b0f3f31ffb9d62d780b56
This commit is contained in:
parent
0f270839dc
commit
2ad0a33259
@ -463,6 +463,7 @@ tf_kernel_library(
|
||||
"//tensorflow/core:session_options",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/profiler/lib:traceme_encode",
|
||||
"@com_google_absl//absl/memory",
|
||||
],
|
||||
)
|
||||
|
@ -47,10 +47,12 @@ limitations under the License.
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/casts.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/mem.h"
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
#include "tensorflow/core/platform/refcount.h"
|
||||
#include "tensorflow/core/platform/resource.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme_encode.h"
|
||||
#include "tensorflow/core/public/session_options.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -965,9 +967,18 @@ void RecordElementSize(const std::vector<Tensor> element,
|
||||
Status IteratorGetNextOp::DoCompute(OpKernelContext* ctx) {
|
||||
profiler::TraceMe traceme(
|
||||
[&] {
|
||||
return strings::StrCat(
|
||||
"IteratorGetNextOp::DoCompute#id=", ctx->step_id(),
|
||||
",iter_num=", ctx->frame_iter().iter_id, "#");
|
||||
int64 mem_bw = port::GetMemoryBandwidthInfo().bw_used;
|
||||
|
||||
if (mem_bw != INT64_MAX) {
|
||||
return profiler::TraceMeEncode(
|
||||
"IteratorGetNextOp::DoCompute",
|
||||
{{"id", ctx->step_id()},
|
||||
{"iter_num", ctx->frame_iter().iter_id},
|
||||
{"mem_bw_used_megabytes_per_sec", mem_bw}});
|
||||
}
|
||||
return profiler::TraceMeEncode(
|
||||
"IteratorGetNextOp::DoCompute",
|
||||
{{"id", ctx->step_id()}, {"iter_num", ctx->frame_iter().iter_id}});
|
||||
},
|
||||
profiler::kInfo);
|
||||
tensorflow::ResourceTagger tag(kTFDataResourceTag,
|
||||
|
@ -369,5 +369,10 @@ MemoryInfo GetMemoryInfo() {
|
||||
return mem_info;
|
||||
}
|
||||
|
||||
MemoryBandwidthInfo GetMemoryBandwidthInfo() {
|
||||
MemoryBandwidthInfo membw_info = {INT64_MAX};
|
||||
return membw_info;
|
||||
}
|
||||
|
||||
} // namespace port
|
||||
} // namespace tensorflow
|
||||
|
@ -64,11 +64,19 @@ struct MemoryInfo {
|
||||
int64 free = 0;
|
||||
};
|
||||
|
||||
struct MemoryBandwidthInfo {
|
||||
int64 bw_used = 0; // memory bandwidth used across all CPU (in MBs/second)
|
||||
};
|
||||
|
||||
// Retrieves the host memory information. If any of the fields in the returned
|
||||
// MemoryInfo structure is INT64_MAX, it means such information is not
|
||||
// available.
|
||||
MemoryInfo GetMemoryInfo();
|
||||
|
||||
// Retrieves the host memory bandwidth information. If any field in the returned
|
||||
// structure is INT64_MAX, it means such information is not available.
|
||||
MemoryBandwidthInfo GetMemoryBandwidthInfo();
|
||||
|
||||
// Returns the amount of RAM available in bytes, or INT64_MAX if unknown.
|
||||
static inline int64 AvailableRam() { return GetMemoryInfo().free; }
|
||||
|
||||
|
@ -202,6 +202,11 @@ MemoryInfo GetMemoryInfo() {
|
||||
return mem_info;
|
||||
}
|
||||
|
||||
MemoryBandwidthInfo GetMemoryBandwidthInfo() {
|
||||
MemoryBandwidthInfo membw_info = {INT64_MAX};
|
||||
return membw_info;
|
||||
}
|
||||
|
||||
int NumHyperthreadsPerCore() {
|
||||
static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
|
||||
return (ht_per_core > 0) ? ht_per_core : 1;
|
||||
|
Loading…
Reference in New Issue
Block a user