[TF-TRT] Add xprof trace to the TRTEngineOp runtime.
Mark the execution of BuildCudaEngine with AnnotatedTraceMe to collect the GPU execution time for the routine. Mark the execution of ExecuteNativeSegment, ExecuteCalibration and ComputeAsync with TraceMe to collect the CPU execution time for the routines. We may want to fine tune this later. PiperOrigin-RevId: 347490797 Change-Id: Id6906ea0d433133c5ec5d2b2d234525645bb3c9d
This commit is contained in:
parent
af9ad9d0f4
commit
ce0cb26681
tensorflow
compiler/tf2tensorrt
core/profiler/lib
@ -433,6 +433,7 @@ tf_cuda_library(
|
||||
"//tensorflow/core/grappler/clusters:virtual_cluster",
|
||||
"//tensorflow/core/grappler/costs:graph_properties",
|
||||
"//tensorflow/core/grappler/optimizers:meta_optimizer",
|
||||
"//tensorflow/core/profiler/lib:annotated_traceme",
|
||||
"//tensorflow/stream_executor/lib",
|
||||
"//tensorflow/tools/graph_transforms:transform_utils",
|
||||
] + if_tensorrt([":tensorrt_lib"]) + tf_custom_op_library_additional_deps(),
|
||||
|
@ -56,6 +56,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/tensor_coding.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
|
||||
#include "tensorflow/core/public/version.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
#include "tensorflow/core/util/strided_slice_op.h"
|
||||
@ -1409,6 +1410,13 @@ Status Converter::BuildCudaEngine(
|
||||
TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, int max_batch_size,
|
||||
size_t max_workspace_size_bytes, nvinfer1::IGpuAllocator* allocator,
|
||||
TRTInt8Calibrator* calibrator, TrtShapeOptimizationProfile* profiles) {
|
||||
tensorflow::profiler::AnnotatedTraceMe activity(
|
||||
[&]() {
|
||||
return tensorflow::profiler::TraceMeOpOverride("TRTEngineOp",
|
||||
"BuildEngine");
|
||||
},
|
||||
tensorflow::profiler::TraceMeLevel::kInfo);
|
||||
|
||||
VLOG(1) << "Configuring TensorRT builder";
|
||||
trt_builder_->setMaxBatchSize(max_batch_size);
|
||||
trt_builder_->setGpuAllocator(allocator);
|
||||
|
@ -45,6 +45,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/stream_executor.h"
|
||||
#include "tensorflow/core/platform/thread_annotations.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||
|
||||
@ -433,6 +434,9 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context)
|
||||
|
||||
void TRTEngineOp::ExecuteNativeSegment(OpKernelContext* ctx,
|
||||
AsyncHelper* helper) {
|
||||
tensorflow::profiler::TraceMe activity(
|
||||
"TRTEngineOp::ExecuteNativeSegment",
|
||||
tensorflow::profiler::TraceMeLevel::kInfo);
|
||||
std::vector<Tensor> inputs;
|
||||
std::vector<Tensor>* outputs = new std::vector<Tensor>();
|
||||
if (native_execution_func_handle_ == kInvalidHandle) {
|
||||
@ -469,6 +473,9 @@ void TRTEngineOp::ExecuteNativeSegment(OpKernelContext* ctx,
|
||||
void TRTEngineOp::ExecuteCalibration(OpKernelContext* ctx,
|
||||
TRTEngineCacheResource* cache_res,
|
||||
AsyncHelper* helper) {
|
||||
tensorflow::profiler::TraceMe activity(
|
||||
"TRTEngineOp::ExecuteCalibration",
|
||||
tensorflow::profiler::TraceMeLevel::kInfo);
|
||||
VLOG(1) << "Executing TRT calibration: " << name();
|
||||
helper->Ref();
|
||||
core::ScopedUnref sc(helper);
|
||||
@ -594,6 +601,8 @@ static bool AllowEngineNativeSegmentExecution() {
|
||||
|
||||
void TRTEngineOp::ComputeAsync(OpKernelContext* ctx,
|
||||
AsyncOpKernel::DoneCallback done) {
|
||||
tensorflow::profiler::TraceMe activity(
|
||||
"TRTEngineOp::ComputeAsync", tensorflow::profiler::TraceMeLevel::kInfo);
|
||||
auto helper = new AsyncHelper(done);
|
||||
core::ScopedUnref sc(helper);
|
||||
|
||||
@ -718,6 +727,9 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx,
|
||||
Status TRTEngineOp::ExecuteTrtEngine(OpKernelContext* ctx,
|
||||
EngineContext* engine_context,
|
||||
int trt_context_idx) {
|
||||
tensorflow::profiler::TraceMe activity(
|
||||
"TRTEngineOp::ExecuteTrtEngine",
|
||||
tensorflow::profiler::TraceMeLevel::kInfo);
|
||||
VLOG(1) << "Executing TRT engine: " << name();
|
||||
auto& cuda_engine = engine_context->cuda_engine;
|
||||
|
||||
|
@ -145,6 +145,17 @@ TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp(
|
||||
return op_name;
|
||||
}
|
||||
|
||||
// Concatenates op_name and op_type.
|
||||
TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOpOverride(
|
||||
absl::string_view op_name, absl::string_view op_type) {
|
||||
return absl::StrCat("#tf_op=", op_name, ":", op_type, "#");
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOpOverride(
|
||||
const char* op_name, const char* op_type) {
|
||||
return absl::StrCat("#tf_op=", op_name, ":", op_type, "#");
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user