Fix ODR violations in profiler instrumentation
PiperOrigin-RevId: 282408764 Change-Id: I135af07d8855be957fa5d8cf437f1f15a06574f7
This commit is contained in:
parent
96bcf5e059
commit
b1e67d9456
@ -2687,6 +2687,8 @@ tf_cuda_library(
|
||||
"//tensorflow/core/kernels:bounds_check",
|
||||
"//tensorflow/core/platform/default/build_config:platformlib",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/profiler/internal:traceme_recorder_impl",
|
||||
"//tensorflow/core/profiler/internal:annotation_stack_impl",
|
||||
] + if_static(
|
||||
extra_deps = ["@com_google_protobuf//:protobuf"],
|
||||
otherwise = ["@com_google_protobuf//:protobuf_headers"],
|
||||
|
@ -1,4 +1,5 @@
|
||||
load("//tensorflow:tensorflow.bzl", "if_not_windows", "tf_cc_test")
|
||||
load("//tensorflow/core/platform:build_config_root.bzl", "if_static")
|
||||
|
||||
package(
|
||||
default_visibility = ["//tensorflow:internal"],
|
||||
@ -366,7 +367,6 @@ cc_library(
|
||||
|
||||
cc_library(
|
||||
name = "traceme_recorder",
|
||||
srcs = ["traceme_recorder.cc"],
|
||||
hdrs = ["traceme_recorder.h"],
|
||||
visibility = [
|
||||
"//perftools/accelerators/xprof/xprofilez:__subpackages__",
|
||||
@ -374,8 +374,23 @@ cc_library(
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
] + if_static([
|
||||
":traceme_recorder_impl",
|
||||
]),
|
||||
)
|
||||
|
||||
# Linked into libtensorflow_framework.so via :framework_internal_impl.
|
||||
cc_library(
|
||||
name = "traceme_recorder_impl",
|
||||
srcs = [
|
||||
"traceme_recorder.cc",
|
||||
"traceme_recorder.h",
|
||||
],
|
||||
alwayslink = 1,
|
||||
visibility = ["//tensorflow/core:__pkg__"],
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
],
|
||||
alwayslink = True,
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
@ -414,16 +429,32 @@ filegroup(
|
||||
|
||||
cc_library(
|
||||
name = "annotation_stack",
|
||||
srcs = ["annotation_stack.cc"],
|
||||
hdrs = ["annotation_stack.h"],
|
||||
visibility = [
|
||||
"//perftools/accelerators/xprof/xprofilez:__subpackages__",
|
||||
"//tensorflow/core/profiler:__subpackages__",
|
||||
],
|
||||
deps = [
|
||||
"@com_google_absl//absl/strings",
|
||||
"//tensorflow/core:lib",
|
||||
] + if_static([
|
||||
":annotation_stack_impl",
|
||||
]),
|
||||
)
|
||||
|
||||
# Linked into libtensorflow_framework.so via :framework_internal_impl.
|
||||
cc_library(
|
||||
name = "annotation_stack_impl",
|
||||
srcs = [
|
||||
"annotation_stack.cc",
|
||||
"annotation_stack.h",
|
||||
],
|
||||
visibility = ["//tensorflow/core:__pkg__"],
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = True,
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
|
@ -246,5 +246,17 @@ TraceMeRecorder::Events TraceMeRecorder::StopRecording() {
|
||||
return events;
|
||||
}
|
||||
|
||||
/*static*/ uint64 TraceMeRecorder::NewActivityId() {
|
||||
// Activity IDs: To avoid contention over a counter, the top 32 bits identify
|
||||
// the originating thread, the bottom 32 bits name the event within a thread.
|
||||
// IDs may be reused after 4 billion events on one thread, or 4 billion
|
||||
// threads.
|
||||
static std::atomic<uint32> thread_counter(1); // avoid kUntracedActivity
|
||||
const thread_local static uint32 thread_id =
|
||||
thread_counter.fetch_add(1, std::memory_order_relaxed);
|
||||
thread_local static uint32 per_thread_activity_id = 0;
|
||||
return static_cast<uint64>(thread_id) << 32 | per_thread_activity_id++;
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
@ -88,6 +88,9 @@ class TraceMeRecorder {
|
||||
// Records an event. Non-blocking.
|
||||
static void Record(Event event);
|
||||
|
||||
// Returns an activity_id for TraceMe::ActivityStart.
|
||||
static uint64 NewActivityId();
|
||||
|
||||
private:
|
||||
class ThreadLocalRecorder;
|
||||
|
||||
|
@ -52,7 +52,6 @@ tf_cuda_library(
|
||||
|
||||
cc_library(
|
||||
name = "traceme",
|
||||
srcs = ["traceme.cc"],
|
||||
hdrs = ["traceme.h"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
@ -89,7 +88,6 @@ filegroup(
|
||||
"profiler_utils.cc",
|
||||
"profiler_utils.h",
|
||||
"scoped_annotation.h",
|
||||
"traceme.cc",
|
||||
"traceme.h",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
|
@ -1,46 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
// Activity IDs: To avoid contention over a counter, the top 32 bits identify
|
||||
// the originating thread, the bottom 32 bits name the event within a thread.
|
||||
// IDs may be reused after 4 billion events on one thread, or 4 billion threads.
|
||||
static std::atomic<uint32> thread_counter(1); // avoid kUntracedActivity
|
||||
uint64 NewActivityId() {
|
||||
const thread_local static uint32 thread_id = thread_counter.fetch_add(1);
|
||||
thread_local static uint32 per_thread_activity_id = 0;
|
||||
return static_cast<uint64>(thread_id) << 32 | per_thread_activity_id++;
|
||||
}
|
||||
|
||||
/* static */ uint64 TraceMe::ActivityStartImpl(
|
||||
absl::string_view activity_name) {
|
||||
uint64 activity_id = NewActivityId();
|
||||
TraceMeRecorder::Record({activity_id, string(activity_name),
|
||||
/*start_time=*/EnvTime::NowNanos(),
|
||||
/*end_time=*/0});
|
||||
return activity_id;
|
||||
}
|
||||
|
||||
/* static */ void TraceMe::ActivityEndImpl(uint64 activity_id) {
|
||||
TraceMeRecorder::Record({activity_id, /*name=*/"", /*start_time=*/0,
|
||||
/*end_time=*/EnvTime::NowNanos()});
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
@ -162,8 +162,14 @@ class TraceMe {
|
||||
// Record the start time of an activity.
|
||||
// Returns the activity ID, which is used to stop the activity.
|
||||
static uint64 ActivityStart(absl::string_view name, int level = 1) {
|
||||
return TraceMeRecorder::Active(level) ? ActivityStartImpl(name)
|
||||
: kUntracedActivity;
|
||||
if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
|
||||
uint64 activity_id = TraceMeRecorder::NewActivityId();
|
||||
TraceMeRecorder::Record({activity_id, string(name),
|
||||
/*start_time=*/EnvTime::NowNanos(),
|
||||
/*end_time=*/0});
|
||||
return activity_id;
|
||||
}
|
||||
return kUntracedActivity;
|
||||
}
|
||||
|
||||
// Record the end time of an activity started by ActivityStart().
|
||||
@ -171,7 +177,8 @@ class TraceMe {
|
||||
// We don't check the level again (see ~TraceMe()).
|
||||
if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) {
|
||||
if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
|
||||
ActivityEndImpl(activity_id);
|
||||
TraceMeRecorder::Record({activity_id, /*name=*/"", /*start_time=*/0,
|
||||
/*end_time=*/EnvTime::NowNanos()});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -186,9 +193,6 @@ class TraceMe {
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(TraceMe);
|
||||
|
||||
static uint64 ActivityStartImpl(absl::string_view activity_name);
|
||||
static void ActivityEndImpl(uint64 activity_id);
|
||||
|
||||
// Wrap the name into a union so that we can avoid the cost of string
|
||||
// initialization when tracing is disabled.
|
||||
union NoInit {
|
||||
|
@ -1,6 +1,6 @@
|
||||
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
|
||||
load("//tensorflow:tensorflow.bzl", "pybind_extension")
|
||||
load("//tensorflow:tensorflow.bzl", "py_test", "tf_binary_additional_srcs")
|
||||
load("//tensorflow:tensorflow.bzl", "py_test", "tf_binary_additional_data_deps", "tf_binary_additional_srcs")
|
||||
|
||||
package(
|
||||
default_visibility = ["//tensorflow/python/profiler:__subpackages__"],
|
||||
@ -78,6 +78,7 @@ cuda_py_test(
|
||||
pybind_extension(
|
||||
name = "_pywrap_traceme",
|
||||
srcs = ["traceme_wrapper.cc"] + tf_binary_additional_srcs(),
|
||||
data = tf_binary_additional_data_deps(),
|
||||
module_name = "_pywrap_traceme",
|
||||
visibility = [
|
||||
"//perftools/accelerators/xprof/xprofilez/integration_tests:__pkg__",
|
||||
@ -94,6 +95,7 @@ pybind_extension(
|
||||
pybind_extension(
|
||||
name = "_pywrap_scoped_annotation",
|
||||
srcs = ["scoped_annotation_wrapper.cc"] + tf_binary_additional_srcs(),
|
||||
data = tf_binary_additional_data_deps(),
|
||||
module_name = "_pywrap_scoped_annotation",
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
|
Loading…
x
Reference in New Issue
Block a user