The auto-mixed-precision pass is actually run before TF-TRT while we want it to run after TF-TRT, see b/170160323.

*** Original change description ***

PR #42974: [TF-TRT] Adding AMP support for non converted OPs & native segments

Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/42974

This PR adds support for TF-TRT AMP on the Tensorflow OPs that are not converted by TF-TRT and the fallback native segments.

**Changes in the API:**
- `TrtConversionParams` obtains a new parameter: `allow_mixed_precision_on_unconverted_ops`, True by default. Can be turned off manually.
- This behavior is **only active** if `Trt...

***

PiperOrigin-RevId: 335658381
Change-Id: Ib3d56c8d22cbc8fad505a92bd3d9b7efea76d79d
This commit is contained in:
Bixia Zheng 2020-10-06 09:39:02 -07:00 committed by TensorFlower Gardener
parent ede1385636
commit 6b59f0662e
4 changed files with 10 additions and 49 deletions

View File

@ -296,10 +296,6 @@ h# Release 2.4.0
* `tf.debugging.assert_shapes()` now works on `SparseTensor`s (#36268).
* `TensorRT`
* Add parameter allow_mixed_precision_on_unconverted_ops to
TrtConversionParams.
* `tf.print`:
* Bug fix in `tf.print()` with `OrderedDict` where if an `OrderedDict`

View File

@ -24,10 +24,6 @@ namespace grappler {
enum class AutoMixedPrecisionMode { CUDA, MKL };
// Note: This is primarily used by the tf.experimental.tensorrt.Converter class
// to use mixed precision on ops not converted by TensorRT. It is also used for
// the soon-to-be-deprecated enable_mixed_precision_graph_rewrite API.
//
// Convert data types to float16 or bfloat16 where appropriate to improve
// performance on GPUs or CPUs.
class AutoMixedPrecision : public GraphOptimizer {

View File

@ -119,7 +119,7 @@ class TrtConversionParams(
"rewriter_config_template", "max_workspace_size_bytes",
"precision_mode", "minimum_segment_size", "is_dynamic_op",
"maximum_cached_engines", "use_calibration", "max_batch_size",
"allow_build_at_runtime", "allow_mixed_precision_on_unconverted_ops"
"allow_build_at_runtime"
])):
"""Parameters that are used for TF-TRT conversion.
@ -160,10 +160,6 @@ class TrtConversionParams(
inputs during runtime, then a new TensorRT engine is built at runtime if
allow_build_at_runtime=True, and otherwise native TF is used. This
argument is only effective if is_dynamic_op=True.
allow_mixed_precision_on_unconverted_ops: whether to allow TensorFlow to
use mixed precision on the operations which are not converted to inside
a TensorRT engine. This argument has a default value of True, and is
only effective if the requested `precision_mode` is lower than FP32.
"""
def __new__(cls,
@ -175,13 +171,13 @@ class TrtConversionParams(
maximum_cached_engines=1,
use_calibration=True,
max_batch_size=1,
allow_build_at_runtime=True,
allow_mixed_precision_on_unconverted_ops=True):
return super(TrtConversionParams, cls).__new__(
cls, rewriter_config_template, max_workspace_size_bytes, precision_mode,
minimum_segment_size, is_dynamic_op, maximum_cached_engines,
use_calibration, max_batch_size, allow_build_at_runtime,
allow_mixed_precision_on_unconverted_ops)
allow_build_at_runtime=True):
return super(TrtConversionParams,
cls).__new__(cls, rewriter_config_template,
max_workspace_size_bytes, precision_mode,
minimum_segment_size, is_dynamic_op,
maximum_cached_engines, use_calibration,
max_batch_size, allow_build_at_runtime)
DEFAULT_TRT_CONVERSION_PARAMS = TrtConversionParams()
@ -252,12 +248,6 @@ def _check_conversion_params(conversion_params, is_v2=False):
"allow_build_at_runtime=False. If building TensorRT engines "
"at runtime is desired, set is_dynamic_op=True."))
if not conversion_params.allow_mixed_precision_on_unconverted_ops:
tf_logging.warn("Mixed precision on OPs not converted by TF-TRT has been "
"deactivated. We recommend setting: "
"`allow_mixed_precision_on_unconverted_ops=True` for "
"performance reasons.")
def _check_trt_version_compatibility():
"""Check compatibility of TensorRT version.
@ -357,14 +347,6 @@ def get_tensorrt_rewriter_config(conversion_params,
rewriter_config_with_trt.CopyFrom(
conversion_params.rewriter_config_template)
if (conversion_params.allow_mixed_precision_on_unconverted_ops and
conversion_params.precision_mode != TrtPrecisionMode.FP32):
rewriter_config_with_trt.auto_mixed_precision = \
rewriter_config_pb2.RewriterConfig.ON
else:
rewriter_config_with_trt.auto_mixed_precision = \
rewriter_config_pb2.RewriterConfig.OFF
# Disabling optimizers should happen after CopyFrom the template
# otherwise the template can overwrite the disablement.
if disable_non_trt_optimizers:
@ -384,7 +366,6 @@ def get_tensorrt_rewriter_config(conversion_params,
rewriter_config_pb2.RewriterConfig.NO_MEM_OPT)
rewriter_config_with_trt.pin_to_host_optimization = off
rewriter_config_with_trt.auto_parallel.enable = False
rewriter_config_with_trt.auto_mixed_precision = off
return rewriter_config_with_trt
@ -458,8 +439,7 @@ class TrtGraphConverter(object):
minimum_segment_size=3,
is_dynamic_op=False,
maximum_cached_engines=1,
use_calibration=True,
allow_mixed_precision_on_unconverted_ops=True):
use_calibration=True):
"""Initialize the converter.
Args:
@ -498,10 +478,6 @@ class TrtGraphConverter(object):
will occur. Please note that accuracy may be negatively affected if
there is a mismatch between which tensors TRT quantizes and which
tensors were trained with fake quantization.
allow_mixed_precision_on_unconverted_ops: whether to allow TensorFlow to
use mixed precision on the operations which are not converted to inside
a TensorRT engine. This argument has a default value of True, and is
only effective if the requested `precision_mode` is lower than FP32.
Raises:
ValueError: if the combination of the parameters is invalid.
@ -561,10 +537,7 @@ class TrtGraphConverter(object):
maximum_cached_engines=maximum_cached_engines,
use_calibration=use_calibration,
max_batch_size=max_batch_size,
allow_build_at_runtime=True,
allow_mixed_precision_on_unconverted_ops=
allow_mixed_precision_on_unconverted_ops
)
allow_build_at_runtime=True)
_check_conversion_params(self._conversion_params)
def _run_conversion(self):

View File

@ -7,10 +7,6 @@ tf_class {
name: "allow_build_at_runtime"
mtype: "<type \'property\'>"
}
member {
name: "allow_mixed_precision_on_unconverted_ops"
mtype: "<type \'property\'>"
}
member {
name: "is_dynamic_op"
mtype: "<type \'property\'>"