diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 918113e9eed..12da6010979 100755 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -550,6 +550,7 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:scoped_annotation", "//tensorflow/stream_executor", "//tensorflow/stream_executor:blas", "//tensorflow/stream_executor:device_memory", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index d40909606e9..99bc0f7fee0 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -36,7 +36,7 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/stream_executor/platform.h" @@ -44,7 +44,7 @@ namespace xla { namespace gpu { namespace { -using tensorflow::tracing::ScopedAnnotation; +using tensorflow::profiler::ScopedAnnotation; } // namespace diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index e54d4609427..263be4405bf 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2188,12 +2188,12 @@ cc_library( ], }), deps = tf_additional_lib_deps() + [ - "//tensorflow/core/platform:annotation", "@com_google_absl//absl/meta:type_traits", "@com_google_absl//absl/strings", "//third_party/eigen3", "@com_google_absl//absl/base:core_headers", "//tensorflow/core/platform:regexp", + "//tensorflow/core/platform:tracing", "//tensorflow/core/platform/default/build_config:platformlib", ] + if_static([":lib_internal_impl"]), ) @@ -2291,7 +2291,6 @@ cc_library( "//tensorflow/core/lib/strings:strcat", "//tensorflow/core/lib/strings:stringprintf", "//tensorflow/core/platform:abi", - "//tensorflow/core/platform:annotation", "//tensorflow/core/platform:context", "//tensorflow/core/platform:cord", "//tensorflow/core/platform:cpu_feature_guard", @@ -3069,8 +3068,8 @@ tf_cuda_library( "@com_google_absl//absl/types:optional", "//third_party/eigen3", "//tensorflow/core/grappler/utils:functions", + "//tensorflow/core/profiler/lib:scoped_annotation", "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/core/profiler/internal:traceme_recorder", ] + mkl_deps(), alwayslink = 1, ) @@ -3302,6 +3301,7 @@ tf_cuda_library( ":lib_internal", ":protos_all_cc", ":stream_executor", + "//tensorflow/core/profiler/lib:scoped_annotation", "//third_party/eigen3", ], alwayslink = 1, diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc index 33b253609a3..2a071e44a5c 100644 --- a/tensorflow/core/common_runtime/copy_tensor.cc +++ b/tensorflow/core/common_runtime/copy_tensor.cc @@ -18,12 +18,13 @@ limitations under the License. #include #include #include + #include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/framework/variant_op_registry.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/util/reffed_status_callback.h" namespace tensorflow { @@ -203,7 +204,7 @@ void CopyTensor::ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context, const Tensor* input, Tensor* output, int dev_to_dev_stream_index, StatusCallback done, bool sync_dst_compute) { - tracing::ScopedAnnotation annotation(edge_name); + profiler::ScopedAnnotation annotation(edge_name); VLOG(1) << "Copy " << edge_name; const DeviceType src_device_type( diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 30e08a59532..47df665a81b 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -217,6 +217,7 @@ KERNEL_AND_DEVICE_DEPS = [ "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:scoped_annotation", "//tensorflow/core/grappler/optimizers:meta_optimizer", ] diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 3afc9ad9a62..228a236edc2 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -36,7 +36,7 @@ limitations under the License. #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" @@ -308,7 +308,7 @@ Status KernelAndDeviceOp::Run( [&] { return absl::StrCat(op_name, ":", kernel_->type_string()); }, profiler::TraceMeLevel::kInfo); // 'ScopedAnnotation' will trace the OpKernel execution time on device. - tracing::ScopedAnnotation annotation( + profiler::ScopedAnnotation annotation( [&]() { return absl::StrCat(op_name, ":", kernel_->type_string()); }); device_->Compute(kernel_.get(), &context); } diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 12b8ef9d957..228ce68b442 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -65,7 +65,7 @@ limitations under the License. #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/profiler/internal/traceme_recorder.h" +#include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" @@ -1637,7 +1637,7 @@ bool MightTrace(const NodeItem& item, const tracing::EventCollector* event_collector) { // Tracing will only be enabled if either `event_collector` is non null, // or `trace_collector` is non-null and enabled for this particular kernel. - // Although `profiler::TraceMe`, `tracing::ScopedAnnotation`, and + // Although `profiler::TraceMe`, `profiler::ScopedAnnotation`, and // `tracing::ScopedRegion` check subsets of these properties internally in // their constructors, the cost of passing the necessary arguments to them can // be significant, so we avoid constructing them in the common case (when we @@ -1646,9 +1646,9 @@ bool MightTrace(const NodeItem& item, return true; } - if (tracing::ScopedAnnotation::IsEnabled()) return true; + if (profiler::ScopedAnnotation::IsEnabled()) return true; - return profiler::TraceMeRecorder::Active( + return profiler::TraceMe::Active( profiler::GetTFTraceMeLevel(item.kernel->IsExpensive())); } @@ -1874,7 +1874,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { absl::string_view(kernel_label), profiler::GetTFTraceMeLevel(op_kernel->IsExpensive())); // 'ScopedAnnotation' will trace the OpKernel execution time. - tracing::ScopedAnnotation annotation(kernel_label); + profiler::ScopedAnnotation annotation(kernel_label); device->Compute(op_kernel, &ctx); } else { // In the common case, avoid creating any tracing objects. diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index f334311492f..0e230a5d2bd 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -68,8 +68,8 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" -#include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/env_var.h" @@ -618,7 +618,7 @@ Status BaseGPUDevice::MaybeCopyTensorToGPU( done(s); }; - tracing::ScopedAnnotation annotation("MakeTensorFromProto"); + profiler::ScopedAnnotation annotation("MakeTensorFromProto"); device_context_->CopyCPUTensorToDevice( &from, this, copy, std::move(wrapped_done), !timestamped_allocator_ /*sync_dst_compute*/); diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc index 70cf1999c87..c6cee1866ba 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_util.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/tensor_coding.h" -#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/util/util.h" // IMPLEMENTATION NOTE: @@ -147,7 +147,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev, char* buf = nullptr; const int64 total_bytes = is_dead ? 0 : tensor.TotalBytes(); if (total_bytes > 0) { - tracing::ScopedAnnotation annotation("SetProtoFromGPU"); + profiler::ScopedAnnotation annotation("SetProtoFromGPU"); alloc = GPUProcessState::singleton()->GetGpuHostAllocator(0); buf = static_cast( alloc->AllocateRaw(Allocator::kAllocatorAlignment, total_bytes)); diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index d29d3a9c947..562e9acf8cb 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -83,21 +83,6 @@ cc_library( deps = [":types"], ) -cc_library( - name = "annotation", - srcs = ["annotation.cc"], - hdrs = ["annotation.h"], - visibility = [ - "//perftools/accelerators/xprof:__subpackages__", - "//tensorflow:__subpackages__", - ], - deps = [ - ":macros", - ":types", - "@com_google_absl//absl/strings", - ], -) - cc_library( name = "byte_order", hdrs = ["byte_order.h"], @@ -606,7 +591,6 @@ cc_binary( visibility = ["//visibility:private"], deps = [ ":abi", - ":annotation", ":byte_order", ":cord", ":cpu_feature_guard", @@ -752,7 +736,6 @@ filegroup( "profile_utils/**/*.h", ], exclude = [ - "annotation.h", "dynamic_annotations.h", "denormal.h", "gif.h", @@ -791,7 +774,6 @@ filegroup( "**/human_readable_json.cc", "**/rocm_rocdl_path.cc", "abi.cc", - "annotation.cc", "cpu_info.cc", "cpu_feature_guard.cc", "denormal.cc", diff --git a/tensorflow/core/platform/build_test.cc b/tensorflow/core/platform/build_test.cc index 520766718a4..2a07ba958c4 100644 --- a/tensorflow/core/platform/build_test.cc +++ b/tensorflow/core/platform/build_test.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/platform/abi.h" -#include "tensorflow/core/platform/annotation.h" #include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cord.h" #include "tensorflow/core/platform/cpu_feature_guard.h" diff --git a/tensorflow/core/platform/default/build_refactor.bzl b/tensorflow/core/platform/default/build_refactor.bzl index bf57ea0d83a..4a1296b4264 100644 --- a/tensorflow/core/platform/default/build_refactor.bzl +++ b/tensorflow/core/platform/default/build_refactor.bzl @@ -340,14 +340,11 @@ TF_DEFAULT_PLATFORM_LIBRARIES = { "//tensorflow/core/platform:tracing.cc", ], "deps": [ - "@com_google_absl//absl/memory", "//tensorflow/core/lib/core:errors", "//tensorflow/core/lib/hash", "//tensorflow/core/platform", - "//tensorflow/core/platform:annotation", "//tensorflow/core/platform:logging", "//tensorflow/core/platform:macros", - "//tensorflow/core/platform:mutex", "//tensorflow/core/platform:strcat", "//tensorflow/core/platform:str_util", "//tensorflow/core/platform:stringpiece", diff --git a/tensorflow/core/platform/tracing.cc b/tensorflow/core/platform/tracing.cc index 79a49bd45db..30aa664ae01 100644 --- a/tensorflow/core/platform/tracing.cc +++ b/tensorflow/core/platform/tracing.cc @@ -17,20 +17,13 @@ limitations under the License. #include #include -#include -#include -#include #include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/str_util.h" -#include "tensorflow/core/platform/strcat.h" namespace tensorflow { namespace tracing { namespace { std::atomic unique_arg{1}; -std::atomic enable_annotation; } // namespace const char* GetEventCategoryName(EventCategory category) { @@ -62,13 +55,5 @@ uint64 GetArgForName(StringPiece name) { return Hash64(name.data(), name.size()); } -void ScopedAnnotation::Enable(bool enable) { - return enable_annotation.store(enable, std::memory_order_release); -} - -const bool ScopedAnnotation::IsEnabled() { - return enable_annotation.load(std::memory_order_acquire); -} - } // namespace tracing } // namespace tensorflow diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h index 3091f4cb981..3908d40b695 100644 --- a/tensorflow/core/platform/tracing.h +++ b/tensorflow/core/platform/tracing.h @@ -19,16 +19,9 @@ limitations under the License. // Tracing interface #include -#include -#include -#include -#include "absl/memory/memory.h" -#include "tensorflow/core/platform/annotation.h" #include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/platform.h" -#include "tensorflow/core/platform/strcat.h" #include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/core/platform/types.h" @@ -96,9 +89,6 @@ inline void RecordEvent(EventCategory category, uint64 arg) { // Records an event for the duration of the instance lifetime through the // currently registered EventCollector. class ScopedRegion { - ScopedRegion(ScopedRegion&) = delete; // Not copy-constructible. - ScopedRegion& operator=(ScopedRegion&) = delete; // Not assignable. - public: ScopedRegion(ScopedRegion&& other) noexcept // Move-constructible. : collector_(other.collector_) { @@ -114,7 +104,7 @@ class ScopedRegion { // Same as ScopedRegion(category, GetUniqueArg()), but faster if // EventCollector::IsEnaled() returns false. - ScopedRegion(EventCategory category) + explicit ScopedRegion(EventCategory category) : collector_(GetEventCollector(category)) { if (collector_) { collector_->StartRegion(GetUniqueArg()); @@ -139,6 +129,8 @@ class ScopedRegion { bool IsEnabled() const { return collector_ != nullptr; } private: + TF_DISALLOW_COPY_AND_ASSIGN(ScopedRegion); + const EventCollector* collector_; }; diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD index 2cf64873931..68f431cb24c 100644 --- a/tensorflow/core/profiler/internal/BUILD +++ b/tensorflow/core/profiler/internal/BUILD @@ -369,11 +369,8 @@ cc_library( srcs = ["traceme_recorder.cc"], hdrs = ["traceme_recorder.h"], visibility = [ - "//perftools/accelerators/xprof/xprofilez/cpu:__pkg__", # host_tracer - "//perftools/accelerators/xprof/xprofilez/integration_tests:__pkg__", # traceme_test - "//tensorflow/core:__pkg__", # executor.cc - "//tensorflow/core/profiler/internal/cpu:__pkg__", # host_tracer - "//tensorflow/core/profiler/lib:__pkg__", # traceme + "//perftools/accelerators/xprof/xprofilez:__subpackages__", + "//tensorflow/core/profiler:__subpackages__", ], deps = [ "//tensorflow/core:lib", @@ -407,6 +404,8 @@ cc_library( filegroup( name = "mobile_srcs", srcs = [ + "annotation_stack.cc", + "annotation_stack.h", "profiler_interface.cc", "profiler_interface.h", "traceme_recorder.cc", @@ -415,11 +414,26 @@ filegroup( visibility = ["//visibility:public"], ) +cc_library( + name = "annotation_stack", + srcs = ["annotation_stack.cc"], + hdrs = ["annotation_stack.h"], + visibility = [ + "//perftools/accelerators/xprof/xprofilez:__subpackages__", + "//tensorflow/core/profiler:__subpackages__", + ], + deps = [ + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + ], +) + tf_cc_test( name = "scoped_annotation_test", size = "small", srcs = ["scoped_annotation_test.cc"], deps = [ + ":annotation_stack", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", @@ -429,7 +443,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - "//tensorflow/core/platform:annotation", + "//tensorflow/core/profiler/lib:scoped_annotation", "@com_google_absl//absl/strings", ], ) @@ -439,7 +453,6 @@ cc_library( hdrs = ["python_traceme.h"], visibility = ["//tensorflow/python/profiler/internal:__pkg__"], deps = [ - ":traceme_recorder", "//tensorflow/core/profiler/lib:traceme", "@com_google_absl//absl/types:optional", ], @@ -450,7 +463,7 @@ cc_library( hdrs = ["python_scoped_annotation.h"], visibility = ["//tensorflow/python/profiler/internal:__pkg__"], deps = [ - "//tensorflow/core:lib", + "//tensorflow/core/profiler/lib:scoped_annotation", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], diff --git a/tensorflow/core/platform/annotation.cc b/tensorflow/core/profiler/internal/annotation_stack.cc similarity index 68% rename from tensorflow/core/platform/annotation.cc rename to tensorflow/core/profiler/internal/annotation_stack.cc index f80d29113a3..9a38f4266f4 100644 --- a/tensorflow/core/platform/annotation.cc +++ b/tensorflow/core/profiler/internal/annotation_stack.cc @@ -13,11 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/annotation.h" +#include "tensorflow/core/profiler/internal/annotation_stack.h" namespace tensorflow { -/*static*/ std::string* Annotation::ThreadAnnotation() { - static thread_local std::string annotation; - return &annotation; +namespace profiler { +namespace internal { + +std::atomic g_annotation_enabled; + +} // namespace internal + +/*static*/ string* AnnotationStack::ThreadAnnotationStack() { + static thread_local string annotation_stack; + return &annotation_stack; } + +} // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/internal/annotation_stack.h b/tensorflow/core/profiler/internal/annotation_stack.h new file mode 100644 index 00000000000..7a4cca25053 --- /dev/null +++ b/tensorflow/core/profiler/internal/annotation_stack.h @@ -0,0 +1,95 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_ANNOTATION_STACK_H_ +#define TENSORFLOW_CORE_PROFILER_INTERNAL_ANNOTATION_STACK_H_ + +#include + +#include + +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace profiler { +namespace internal { + +// Whether annotations are enabled. +// Static atomic so Annotation::IsEnabled can be fast and non-blocking. +extern std::atomic g_annotation_enabled; + +} // namespace internal + +// Backend for ScopedAnnotation. +class AnnotationStack { + public: + // Appends name to the annotation for the current thread and returns the + // original length of the annotation. + // Append name to the current annotation, separated by "::". + // The choice of separator "::" is based on characters not used by + // TensorFlow for its TensorOps. + static size_t PushAnnotation(absl::string_view name) { + string* annotation_stack = ThreadAnnotationStack(); + size_t old_length = annotation_stack->size(); + if (old_length != 0) { + absl::StrAppend(annotation_stack, "::", name); + } else { + *annotation_stack = string(name); + } + return old_length; + } + + static size_t PushAnnotation(string&& name) { + string* annotation_stack = ThreadAnnotationStack(); + size_t old_length = annotation_stack->size(); + if (old_length != 0) { + absl::StrAppend(annotation_stack, "::", name); + } else { + *annotation_stack = std::move(name); + } + return old_length; + } + + // Returns the annotation stack for the current thread. + static const string& Get() { return *ThreadAnnotationStack(); } + + // Resizes the annotation stack for the current thread to its old length. + static void PopAnnotation(size_t old_length) { + ThreadAnnotationStack()->resize(old_length); + } + + static void Enable(bool enable) { + internal::g_annotation_enabled.store(enable, std::memory_order_release); + } + + static bool IsEnabled() { + return internal::g_annotation_enabled.load(std::memory_order_acquire); + } + + private: + AnnotationStack() = default; + + TF_DISALLOW_COPY_AND_ASSIGN(AnnotationStack); + + // Returns a reference to the annotation for the current thread. + static string* ThreadAnnotationStack(); +}; + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_INTERNAL_ANNOTATION_STACK_H_ diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD index f5c83c78b47..14a307cd7b2 100644 --- a/tensorflow/core/profiler/internal/gpu/BUILD +++ b/tensorflow/core/profiler/internal/gpu/BUILD @@ -34,6 +34,7 @@ tf_cuda_library( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/profiler/internal:annotation_stack", "//tensorflow/core/profiler/internal:parse_annotation", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/lib:traceme", @@ -106,7 +107,7 @@ tf_cuda_library( ":cupti_interface", ":cupti_utils", "//tensorflow/core:lib", - "//tensorflow/core/platform:annotation", + "//tensorflow/core/profiler/internal:annotation_stack", "@com_google_absl//absl/container:fixed_array", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:node_hash_map", diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc index 340919133d5..1eab7fc18af 100644 --- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc @@ -20,10 +20,10 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/platform/annotation.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mem.h" +#include "tensorflow/core/profiler/internal/annotation_stack.h" namespace tensorflow { namespace profiler { @@ -1030,7 +1030,7 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook { std::vector record_indices; record_indices.reserve(params->numDevices); *cbdata->correlationData = -1; // Invalid value. - auto &annotation = tensorflow::Annotation::CurrentAnnotation(); + const auto &annotation = AnnotationStack::Get(); for (int i = 0; i < params->numDevices; ++i) { CUstream stream = params->launchParamsList[i].hStream; ScopedCudaContext scoped_cuda_context(stream); @@ -1531,7 +1531,7 @@ Status CuptiTracer::HandleCallback(CUpti_CallbackDomain domain, device_id, domain, cbid, cbdata)); } else if (cbdata->callbackSite == CUPTI_API_EXIT) { // Set up the map from correlation id to annotation string. - const std::string &annotation = tensorflow::Annotation::CurrentAnnotation(); + const auto &annotation = AnnotationStack::Get(); if (!annotation.empty()) { annotation_map_->Add(device_id, cbdata->correlationId, annotation); } diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer.cc b/tensorflow/core/profiler/internal/gpu/device_tracer.cc index c1163ade2a5..4f953fb55cd 100644 --- a/tensorflow/core/profiler/internal/gpu/device_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/device_tracer.cc @@ -25,9 +25,9 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/abi.h" -#include "tensorflow/core/platform/annotation.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stringprintf.h" +#include "tensorflow/core/profiler/internal/annotation_stack.h" #include "tensorflow/core/profiler/internal/gpu/cupti_tracer.h" #include "tensorflow/core/profiler/internal/gpu/cupti_wrapper.h" #include "tensorflow/core/profiler/internal/parse_annotation.h" @@ -224,8 +224,7 @@ class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector { class GpuTracer : public profiler::ProfilerInterface { public: GpuTracer(CuptiTracer* cupti_tracer, CuptiInterface* cupti_interface) - : cupti_tracer_(cupti_tracer), - trace_collector_(&step_stats_) { + : cupti_tracer_(cupti_tracer), trace_collector_(&step_stats_) { VLOG(1) << "GpuTracer created."; } ~GpuTracer() override {} @@ -321,7 +320,7 @@ Status GpuTracer::DoStart() { collector_options, "", num_gpus, start_walltime_ns, start_gputime_ns, &trace_collector_); - tensorflow::tracing::ScopedAnnotation::Enable(true); + AnnotationStack::Enable(true); cupti_tracer_->Enable(options_, step_stats_cupti_adaptor_.get()); return Status::OK(); } @@ -339,7 +338,7 @@ Status GpuTracer::Start() { Status GpuTracer::DoStop() { cupti_tracer_->Disable(); - tensorflow::tracing::ScopedAnnotation::Enable(false); + AnnotationStack::Enable(false); return Status::OK(); } diff --git a/tensorflow/core/profiler/internal/python_scoped_annotation.h b/tensorflow/core/profiler/internal/python_scoped_annotation.h index bcabad983e4..9b2090e15d4 100644 --- a/tensorflow/core/profiler/internal/python_scoped_annotation.h +++ b/tensorflow/core/profiler/internal/python_scoped_annotation.h @@ -21,7 +21,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/types/optional.h" -#include "tensorflow/core/platform/annotation.h" +#include "tensorflow/core/profiler/lib/scoped_annotation.h" namespace tensorflow { namespace profiler { @@ -36,11 +36,11 @@ class PythonScopedAnnotation { void Enter() { current_.emplace(std::move(name_)); } void Exit() { current_.reset(); } - static bool IsEnabled() { return tracing::ScopedAnnotation::IsEnabled(); } + static bool IsEnabled() { return ScopedAnnotation::IsEnabled(); } private: std::string name_; - absl::optional current_; + absl::optional current_; }; } // namespace profiler diff --git a/tensorflow/core/profiler/internal/python_traceme.h b/tensorflow/core/profiler/internal/python_traceme.h index 0824b0a5411..e9bd78301ce 100644 --- a/tensorflow/core/profiler/internal/python_traceme.h +++ b/tensorflow/core/profiler/internal/python_traceme.h @@ -20,7 +20,6 @@ limitations under the License. #include #include "absl/types/optional.h" -#include "tensorflow/core/profiler/internal/traceme_recorder.h" #include "tensorflow/core/profiler/lib/traceme.h" namespace tensorflow { @@ -34,7 +33,7 @@ class PythonTraceMe { void Enter() { current_.emplace(std::move(activity_name_)); } void Exit() { current_.reset(); } - static bool IsEnabled() { return TraceMeRecorder::Active(); } + static bool IsEnabled() { return TraceMe::Active(); } private: std::string activity_name_; diff --git a/tensorflow/core/profiler/internal/scoped_annotation_test.cc b/tensorflow/core/profiler/internal/scoped_annotation_test.cc index 56a5e974107..70a627fd640 100644 --- a/tensorflow/core/profiler/internal/scoped_annotation_test.cc +++ b/tensorflow/core/profiler/internal/scoped_annotation_test.cc @@ -13,48 +13,49 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/core/profiler/lib/scoped_annotation.h" + #include "absl/strings/str_cat.h" -#include "tensorflow/core/platform/annotation.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" -#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/internal/annotation_stack.h" namespace tensorflow { +namespace profiler { namespace { TEST(ScopedAnnotation, Simple) { { - tracing::ScopedAnnotation trace("blah"); - EXPECT_EQ(Annotation::CurrentAnnotation(), ""); // not enabled + ScopedAnnotation trace("blah"); + EXPECT_EQ(AnnotationStack::Get(), ""); // not enabled } { - tracing::ScopedAnnotation::Enable(true); - tracing::ScopedAnnotation trace("blah"); - EXPECT_EQ(Annotation::CurrentAnnotation(), "blah"); // enabled - tracing::ScopedAnnotation::Enable(false); + AnnotationStack::Enable(true); + ScopedAnnotation trace("blah"); + EXPECT_EQ(AnnotationStack::Get(), "blah"); // enabled + AnnotationStack::Enable(false); } { - tracing::ScopedAnnotation::Enable(true); - tracing::ScopedAnnotation outer("foo"); - tracing::ScopedAnnotation inner("bar"); - EXPECT_EQ(Annotation::CurrentAnnotation(), "foo::bar"); // enabled - tracing::ScopedAnnotation::Enable(false); + AnnotationStack::Enable(true); + ScopedAnnotation outer("foo"); + ScopedAnnotation inner("bar"); + EXPECT_EQ(AnnotationStack::Get(), "foo::bar"); // enabled + AnnotationStack::Enable(false); } - EXPECT_EQ(Annotation::CurrentAnnotation(), ""); // not enabled + EXPECT_EQ(AnnotationStack::Get(), ""); // not enabled } -std::string GenerateRandomString(int length) { - return std::string(length, 'a'); -} +string GenerateRandomString(int length) { return string(length, 'a'); } void BM_ScopedAnnotationDisabled(int iters, int annotation_size) { testing::StopTiming(); - std::string annotation = GenerateRandomString(annotation_size); + string annotation = GenerateRandomString(annotation_size); testing::StartTiming(); for (int i = 0; i < iters; i++) { - tracing::ScopedAnnotation trace(annotation); + ScopedAnnotation trace(annotation); } testing::StopTiming(); } @@ -63,66 +64,65 @@ BENCHMARK(BM_ScopedAnnotationDisabled)->Arg(8)->Arg(32)->Arg(128); void BM_ScopedAnnotationEnabled(int iters, int annotation_size) { testing::StopTiming(); - std::string annotation = GenerateRandomString(annotation_size); - tracing::ScopedAnnotation::Enable(true); + string annotation = GenerateRandomString(annotation_size); + AnnotationStack::Enable(true); testing::StartTiming(); for (int i = 0; i < iters; i++) { - tracing::ScopedAnnotation trace(annotation); + ScopedAnnotation trace(annotation); } testing::StopTiming(); - tracing::ScopedAnnotation::Enable(false); + AnnotationStack::Enable(false); } BENCHMARK(BM_ScopedAnnotationEnabled)->Arg(8)->Arg(32)->Arg(128); void BM_ScopedAnnotationEnabled_Nested(int iters, int annotation_size) { testing::StopTiming(); - std::string annotation = GenerateRandomString(annotation_size); - tracing::ScopedAnnotation::Enable(true); + string annotation = GenerateRandomString(annotation_size); + AnnotationStack::Enable(true); testing::StartTiming(); for (int i = 0; i < iters; i++) { - tracing::ScopedAnnotation trace(annotation); - { tracing::ScopedAnnotation trace(annotation); } + ScopedAnnotation trace(annotation); + { ScopedAnnotation trace(annotation); } } testing::StopTiming(); - tracing::ScopedAnnotation::Enable(false); + AnnotationStack::Enable(false); } BENCHMARK(BM_ScopedAnnotationEnabled_Nested)->Arg(8)->Arg(32)->Arg(128); void BM_ScopedAnnotationEnabled_Adhoc(int iters, int annotation_size) { testing::StopTiming(); - tracing::ScopedAnnotation::Enable(true); + AnnotationStack::Enable(true); testing::StartTiming(); for (int i = 0; i < iters; i++) { // generate the annotation on the fly. - tracing::ScopedAnnotation trace(absl::StrCat(i, "-", i * i)); + ScopedAnnotation trace(absl::StrCat(i, "-", i * i)); } testing::StopTiming(); - tracing::ScopedAnnotation::Enable(false); + AnnotationStack::Enable(false); } BENCHMARK(BM_ScopedAnnotationEnabled_Adhoc)->Arg(8)->Arg(32)->Arg(128); void BM_ScopedAnnotationDisabled_Lambda(int iters, int annotation_size) { for (int i = 0; i < iters; i++) { - tracing::ScopedAnnotation trace( - [&]() { return absl::StrCat(i, "-", i * i); }); + ScopedAnnotation trace([&]() { return absl::StrCat(i, "-", i * i); }); } } BENCHMARK(BM_ScopedAnnotationDisabled_Lambda)->Arg(8)->Arg(32)->Arg(128); void BM_ScopedAnnotationEnabled_Adhoc_Lambda(int iters, int annotation_size) { - tracing::ScopedAnnotation::Enable(true); + AnnotationStack::Enable(true); for (int i = 0; i < iters; i++) { - tracing::ScopedAnnotation trace( - [&]() { return absl::StrCat(i, "-", i * i); }); + ScopedAnnotation trace([&]() { return absl::StrCat(i, "-", i * i); }); } - tracing::ScopedAnnotation::Enable(false); + AnnotationStack::Enable(false); } BENCHMARK(BM_ScopedAnnotationEnabled_Adhoc_Lambda)->Arg(8)->Arg(32)->Arg(128); } // namespace +} // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/internal/traceme_recorder.h b/tensorflow/core/profiler/internal/traceme_recorder.h index 003095de294..62f3e0336b5 100644 --- a/tensorflow/core/profiler/internal/traceme_recorder.h +++ b/tensorflow/core/profiler/internal/traceme_recorder.h @@ -28,12 +28,13 @@ limitations under the License. namespace tensorflow { namespace profiler { - namespace internal { + // Current trace level. // Static atomic so TraceMeRecorder::Active can be fast and non-blocking. // Modified by TraceMeRecorder singleton when tracing starts/stops. extern std::atomic g_trace_level; + } // namespace internal // TraceMeRecorder is a singleton repository of TraceMe events. @@ -78,8 +79,7 @@ class TraceMeRecorder { // Returns whether we're currently recording. Racy, but cheap! static inline bool Active(int level = 1) { - return TF_PREDICT_FALSE( - internal::g_trace_level.load(std::memory_order_acquire) >= level); + return internal::g_trace_level.load(std::memory_order_acquire) >= level; } // Default value for trace_level_ when tracing is disabled @@ -96,9 +96,7 @@ class TraceMeRecorder { TraceMeRecorder() = default; - // No copy and assignment - TraceMeRecorder(const TraceMeRecorder&) = delete; - TraceMeRecorder& operator=(const TraceMeRecorder&) = delete; + TF_DISALLOW_COPY_AND_ASSIGN(TraceMeRecorder); void RegisterThread(int32 tid, ThreadLocalRecorder* thread); void UnregisterThread(ThreadEvents&& events); diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index ab4bb3e3c55..d4817ca4133 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -63,6 +63,17 @@ cc_library( ], ) +cc_library( + name = "scoped_annotation", + hdrs = ["scoped_annotation.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core/profiler/internal:annotation_stack", + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "profiler_utils", srcs = ["profiler_utils.cc"], @@ -78,6 +89,7 @@ filegroup( "profiler_session.h", "profiler_utils.cc", "profiler_utils.h", + "scoped_annotation.h", "traceme.cc", "traceme.h", ], diff --git a/tensorflow/core/platform/annotation.h b/tensorflow/core/profiler/lib/scoped_annotation.h similarity index 50% rename from tensorflow/core/platform/annotation.h rename to tensorflow/core/profiler/lib/scoped_annotation.h index f0d066ccfb5..1ba6982b8f1 100644 --- a/tensorflow/core/platform/annotation.h +++ b/tensorflow/core/profiler/lib/scoped_annotation.h @@ -12,66 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_PLATFORM_ANNOTATION_H_ -#define TENSORFLOW_CORE_PLATFORM_ANNOTATION_H_ +#ifndef TENSORFLOW_CORE_PROFILER_LIB_SCOPED_ANNOTATION_H_ +#define TENSORFLOW_CORE_PROFILER_LIB_SCOPED_ANNOTATION_H_ #include #include -#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/internal/annotation_stack.h" namespace tensorflow { +namespace profiler { -// Backend for ScopedAnnotation. -class Annotation { - public: - // Appends name to the annotation for the current thread and returns the - // original length of the annotation. - // Append name to the current annotation, separated by "::". - // The choice of separator "::" is based on characters not used by - // TensorFlow for its TensorOps. - static size_t PushAnnotation(absl::string_view name) { - string* annotation = ThreadAnnotation(); - size_t old_length = annotation->size(); - if (old_length != 0) { - absl::StrAppend(annotation, "::", name); - } else { - *annotation = string(name); - } - return old_length; - } - - static size_t PushAnnotation(string&& name) { - string* annotation = ThreadAnnotation(); - size_t old_length = annotation->size(); - if (old_length != 0) { - absl::StrAppend(annotation, "::", name); - } else { - *annotation = std::move(name); - } - return old_length; - } - - // Returns the annotation for the current thread. - static const string& CurrentAnnotation() { return *ThreadAnnotation(); } - - // Resizes the annotation for the current thread to its old length. - static void PopAnnotation(size_t old_length) { - ThreadAnnotation()->resize(old_length); - } - - private: - Annotation(const Annotation&) = delete; // Unconstructible. - - // Returns a reference to the annotation for the current thread. - static string* ThreadAnnotation(); -}; - -namespace tracing { // Adds an annotation to all activities for the duration of the instance // lifetime through the currently registered TraceCollector. // @@ -84,8 +39,8 @@ namespace tracing { class ScopedAnnotation { public: explicit ScopedAnnotation(absl::string_view name) { - if (TF_PREDICT_FALSE(IsEnabled())) { - old_length_ = Annotation::PushAnnotation(name); + if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) { + old_length_ = AnnotationStack::PushAnnotation(name); } } @@ -93,21 +48,21 @@ class ScopedAnnotation { : ScopedAnnotation(absl::string_view(name)) {} explicit ScopedAnnotation(const string& name) { - if (TF_PREDICT_FALSE(IsEnabled())) { - old_length_ = Annotation::PushAnnotation(name); + if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) { + old_length_ = AnnotationStack::PushAnnotation(name); } } explicit ScopedAnnotation(string&& name) { - if (TF_PREDICT_FALSE(IsEnabled())) { - old_length_ = Annotation::PushAnnotation(std::move(name)); + if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) { + old_length_ = AnnotationStack::PushAnnotation(std::move(name)); } } template explicit ScopedAnnotation(NameGeneratorT name_generator) { - if (TF_PREDICT_FALSE(IsEnabled())) { - old_length_ = Annotation::PushAnnotation(name_generator()); + if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) { + old_length_ = AnnotationStack::PushAnnotation(name_generator()); } } @@ -117,20 +72,22 @@ class ScopedAnnotation { // fail probably due to compiler in that presubmit config. std::atomic_thread_fence(std::memory_order_acquire); if (TF_PREDICT_FALSE(old_length_ != kInvalidLength)) { - Annotation::PopAnnotation(old_length_); + AnnotationStack::PopAnnotation(old_length_); } } - static void Enable(bool enable); - static const bool IsEnabled(); + static bool IsEnabled() { return AnnotationStack::IsEnabled(); } private: // signals that annotation is disabled at the constructor. static constexpr size_t kInvalidLength = static_cast(-1); + + TF_DISALLOW_COPY_AND_ASSIGN(ScopedAnnotation); + size_t old_length_ = kInvalidLength; }; -} // namespace tracing +} // namespace profiler } // namespace tensorflow -#endif // TENSORFLOW_CORE_PLATFORM_ANNOTATION_H_ +#endif // TENSORFLOW_CORE_PROFILER_LIB_SCOPED_ANNOTATION_H_ diff --git a/tensorflow/core/profiler/lib/traceme.h b/tensorflow/core/profiler/lib/traceme.h index b8e4acf6ab8..47217959d8e 100644 --- a/tensorflow/core/profiler/lib/traceme.h +++ b/tensorflow/core/profiler/lib/traceme.h @@ -20,6 +20,7 @@ limitations under the License. #include "absl/strings/string_view.h" #include "tensorflow/core/platform/env_time.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/internal/traceme_recorder.h" @@ -79,7 +80,7 @@ class TraceMe { // out their host traces based on verbosity. explicit TraceMe(absl::string_view activity_name, int level = 1) { DCHECK_GE(level, 1); - if (TraceMeRecorder::Active(level)) { + if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) { new (&no_init_.name) string(activity_name); start_time_ = EnvTime::Default()->NowNanos(); } else { @@ -94,7 +95,7 @@ class TraceMe { // constructor so we avoid copying them when tracing is disabled. explicit TraceMe(string &&activity_name, int level = 1) { DCHECK_GE(level, 1); - if (TraceMeRecorder::Active(level)) { + if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) { new (&no_init_.name) string(std::move(activity_name)); start_time_ = EnvTime::Default()->NowNanos(); } else { @@ -124,7 +125,7 @@ class TraceMe { template explicit TraceMe(NameGeneratorT name_generator, int level = 1) { DCHECK_GE(level, 1); - if (TraceMeRecorder::Active(level)) { + if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) { new (&no_init_.name) string(name_generator()); start_time_ = EnvTime::Default()->NowNanos(); } else { @@ -144,8 +145,8 @@ class TraceMe { // spuriously record the event. This is extremely rare, and acceptable as // event will be discarded when its start timestamp fall outside of the // start/stop session timestamp. - if (start_time_ != kUntracedActivity) { - if (TraceMeRecorder::Active()) { + if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) { + if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) { TraceMeRecorder::Record({kCompleteActivity, std::move(no_init_.name), start_time_, EnvTime::Default()->NowNanos()}); } @@ -156,10 +157,6 @@ class TraceMe { ~TraceMe() { Stop(); } - // TraceMe is not movable or copyable. - TraceMe(const TraceMe &) = delete; - TraceMe &operator=(const TraceMe &) = delete; - // Static API, for use when scoped objects are inconvenient. // Record the start time of an activity. @@ -172,19 +169,23 @@ class TraceMe { // Record the end time of an activity started by ActivityStart(). static void ActivityEnd(uint64 activity_id) { // We don't check the level again (see ~TraceMe()). - if (activity_id != kUntracedActivity) { - if (TraceMeRecorder::Active()) { + if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) { + if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) { ActivityEndImpl(activity_id); } } } + static bool Active(int level = 1) { return TraceMeRecorder::Active(level); } + private: // Activity ID or start time used when tracing is disabled. constexpr static uint64 kUntracedActivity = 0; // Activity ID used as a placeholder when both start and end are present. constexpr static uint64 kCompleteActivity = 1; + TF_DISALLOW_COPY_AND_ASSIGN(TraceMe); + static uint64 ActivityStartImpl(absl::string_view activity_name); static void ActivityEndImpl(uint64 activity_id);