diff --git a/tensorflow/compiler/tf2tensorrt/BUILD b/tensorflow/compiler/tf2tensorrt/BUILD index 73401999df2..57cacf3e0e9 100644 --- a/tensorflow/compiler/tf2tensorrt/BUILD +++ b/tensorflow/compiler/tf2tensorrt/BUILD @@ -433,6 +433,7 @@ tf_cuda_library( "//tensorflow/core/grappler/clusters:virtual_cluster", "//tensorflow/core/grappler/costs:graph_properties", "//tensorflow/core/grappler/optimizers:meta_optimizer", + "//tensorflow/core/profiler/lib:annotated_traceme", "//tensorflow/stream_executor/lib", "//tensorflow/tools/graph_transforms:transform_utils", ] + if_tensorrt([":tensorrt_lib"]) + tf_custom_op_library_additional_deps(), diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index eed37cdff53..1f5456494ce 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -56,6 +56,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/lib/annotated_traceme.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/strided_slice_op.h" @@ -1409,6 +1410,13 @@ Status Converter::BuildCudaEngine( TrtUniquePtrType* engine, int max_batch_size, size_t max_workspace_size_bytes, nvinfer1::IGpuAllocator* allocator, TRTInt8Calibrator* calibrator, TrtShapeOptimizationProfile* profiles) { + tensorflow::profiler::AnnotatedTraceMe activity( + [&]() { + return tensorflow::profiler::TraceMeOpOverride("TRTEngineOp", + "BuildEngine"); + }, + tensorflow::profiler::TraceMeLevel::kInfo); + VLOG(1) << "Configuring TensorRT builder"; trt_builder_->setMaxBatchSize(max_batch_size); trt_builder_->setGpuAllocator(allocator); diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index 2d56209a068..a1d5cfa7685 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -45,6 +45,7 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/util/env_var.h" #include "tensorflow/stream_executor/lib/statusor.h" @@ -433,6 +434,9 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) void TRTEngineOp::ExecuteNativeSegment(OpKernelContext* ctx, AsyncHelper* helper) { + tensorflow::profiler::TraceMe activity( + "TRTEngineOp::ExecuteNativeSegment", + tensorflow::profiler::TraceMeLevel::kInfo); std::vector inputs; std::vector* outputs = new std::vector(); if (native_execution_func_handle_ == kInvalidHandle) { @@ -469,6 +473,9 @@ void TRTEngineOp::ExecuteNativeSegment(OpKernelContext* ctx, void TRTEngineOp::ExecuteCalibration(OpKernelContext* ctx, TRTEngineCacheResource* cache_res, AsyncHelper* helper) { + tensorflow::profiler::TraceMe activity( + "TRTEngineOp::ExecuteCalibration", + tensorflow::profiler::TraceMeLevel::kInfo); VLOG(1) << "Executing TRT calibration: " << name(); helper->Ref(); core::ScopedUnref sc(helper); @@ -594,6 +601,8 @@ static bool AllowEngineNativeSegmentExecution() { void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, AsyncOpKernel::DoneCallback done) { + tensorflow::profiler::TraceMe activity( + "TRTEngineOp::ComputeAsync", tensorflow::profiler::TraceMeLevel::kInfo); auto helper = new AsyncHelper(done); core::ScopedUnref sc(helper); @@ -718,6 +727,9 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, Status TRTEngineOp::ExecuteTrtEngine(OpKernelContext* ctx, EngineContext* engine_context, int trt_context_idx) { + tensorflow::profiler::TraceMe activity( + "TRTEngineOp::ExecuteTrtEngine", + tensorflow::profiler::TraceMeLevel::kInfo); VLOG(1) << "Executing TRT engine: " << name(); auto& cuda_engine = engine_context->cuda_engine; diff --git a/tensorflow/core/profiler/lib/traceme_encode.h b/tensorflow/core/profiler/lib/traceme_encode.h index 1a97d8b0e19..de1046cc726 100644 --- a/tensorflow/core/profiler/lib/traceme_encode.h +++ b/tensorflow/core/profiler/lib/traceme_encode.h @@ -145,6 +145,17 @@ TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp( return op_name; } +// Concatenates op_name and op_type. +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOpOverride( + absl::string_view op_name, absl::string_view op_type) { + return absl::StrCat("#tf_op=", op_name, ":", op_type, "#"); +} + +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOpOverride( + const char* op_name, const char* op_type) { + return absl::StrCat("#tf_op=", op_name, ":", op_type, "#"); +} + } // namespace profiler } // namespace tensorflow