From fe020fccbb7a08f386d351877937dc9cdb4554d5 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Mon, 11 Mar 2019 14:50:02 -0700 Subject: [PATCH 1/2] Store an empty engine in the case that dynamic engine creation fails. The next time the same input shape appears, we will retrieve the empty engine which tells us to not rebuild the same failing engine again. --- tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index 0f800d7cf26..e612d311bce 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -633,6 +633,9 @@ EngineContext* TRTEngineOp::GetEngine( } LOG(WARNING) << "Engine creation for batch size " << batch_size << " failed " << status; + // Store an empty engine here so we don't try to build the same engine + // again. + cache.emplace(engine_input_shapes, empty_context); return &empty_context; } VLOG(1) << "Conversion is done"; From 3764ac1d6c6d037548ed608ce1e61b98f881d257 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Tue, 12 Mar 2019 09:32:34 -0700 Subject: [PATCH 2/2] Improve message when engine creation fails. Remove spammy warning --- .../tf2tensorrt/kernels/trt_engine_op.cc | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index e612d311bce..30f29902d73 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -376,9 +376,9 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, } EngineContext* engine_context = GetEngine(input_shapes, ctx); if (!engine_context->cuda_engine) { - LOG(WARNING) << "Engine retrieval for input shapes: " - << TensorShapeUtils::ShapeListString(input_shapes) - << " failed. Running native segment for " << name(); + VLOG(1) << "Engine retrieval for input shapes: " + << TensorShapeUtils::ShapeListString(input_shapes) + << " failed. Running native segment for " << name(); ExecuteNativeSegment(ctx, helper); return; } @@ -625,17 +625,12 @@ EngineContext* TRTEngineOp::GetEngine( partial_shapes, &logger, allocator, calibrator_.get(), &engine, use_calibration_, &convert_successfully); if (!status.ok()) { - if (convert_successfully) { - // This means it fail to build the engine even when the network is built - // successfully, probably due to internal issues. In this case we don't - // retry in the future. - cache.emplace(engine_input_shapes, absl::make_unique()); - } - LOG(WARNING) << "Engine creation for batch size " << batch_size - << " failed " << status; - // Store an empty engine here so we don't try to build the same engine - // again. - cache.emplace(engine_input_shapes, empty_context); + LOG(WARNING) << "Engine creation for " << name() << " failed. " + << "The native segment will be used instead. " + << "Reason: " << status; + // Store an empty engine in the cache for these input shapes so we don't + // try to build the same failing engine again. + cache.emplace(engine_input_shapes, absl::make_unique()); return &empty_context; } VLOG(1) << "Conversion is done";