Merge pull request #26618 from trevor-m:tmorris_tftrt_dont_rebuild_failed_engines

PiperOrigin-RevId: 238143344
This commit is contained in:
TensorFlower Gardener 2019-03-12 18:49:50 -07:00
commit f3954bf900

View File

@ -376,9 +376,9 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx,
} }
EngineContext* engine_context = GetEngine(input_shapes, ctx); EngineContext* engine_context = GetEngine(input_shapes, ctx);
if (!engine_context->cuda_engine) { if (!engine_context->cuda_engine) {
LOG(WARNING) << "Engine retrieval for input shapes: " VLOG(1) << "Engine retrieval for input shapes: "
<< TensorShapeUtils::ShapeListString(input_shapes) << TensorShapeUtils::ShapeListString(input_shapes)
<< " failed. Running native segment for " << name(); << " failed. Running native segment for " << name();
ExecuteNativeSegment(ctx, helper); ExecuteNativeSegment(ctx, helper);
return; return;
} }
@ -625,14 +625,12 @@ EngineContext* TRTEngineOp::GetEngine(
partial_shapes, &logger, allocator, calibrator_.get(), &engine, partial_shapes, &logger, allocator, calibrator_.get(), &engine,
use_calibration_, &convert_successfully); use_calibration_, &convert_successfully);
if (!status.ok()) { if (!status.ok()) {
if (convert_successfully) { LOG(WARNING) << "Engine creation for " << name() << " failed. "
// This means it fail to build the engine even when the network is built << "The native segment will be used instead. "
// successfully, probably due to internal issues. In this case we don't << "Reason: " << status;
// retry in the future. // Store an empty engine in the cache for these input shapes so we don't
cache.emplace(engine_input_shapes, absl::make_unique<EngineContext>()); // try to build the same failing engine again.
} cache.emplace(engine_input_shapes, absl::make_unique<EngineContext>());
LOG(WARNING) << "Engine creation for batch size " << batch_size
<< " failed " << status;
return &empty_context; return &empty_context;
} }
VLOG(1) << "Conversion is done"; VLOG(1) << "Conversion is done";