The auto-mixed-precision pass is actually run before TF-TRT while we want it to run after TF-TRT, see b/170160323.
*** Original change description *** PR #42974: [TF-TRT] Adding AMP support for non converted OPs & native segments Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/42974 This PR adds support for TF-TRT AMP on the Tensorflow OPs that are not converted by TF-TRT and the fallback native segments. **Changes in the API:** - `TrtConversionParams` obtains a new parameter: `allow_mixed_precision_on_unconverted_ops`, True by default. Can be turned off manually. - This behavior is **only active** if `Trt... *** PiperOrigin-RevId: 335658381 Change-Id: Ib3d56c8d22cbc8fad505a92bd3d9b7efea76d79d
This commit is contained in:
parent
ede1385636
commit
6b59f0662e
@ -296,10 +296,6 @@ h# Release 2.4.0
|
||||
|
||||
* `tf.debugging.assert_shapes()` now works on `SparseTensor`s (#36268).
|
||||
|
||||
* `TensorRT`
|
||||
* Add parameter allow_mixed_precision_on_unconverted_ops to
|
||||
TrtConversionParams.
|
||||
|
||||
* `tf.print`:
|
||||
|
||||
* Bug fix in `tf.print()` with `OrderedDict` where if an `OrderedDict`
|
||||
|
@ -24,10 +24,6 @@ namespace grappler {
|
||||
|
||||
enum class AutoMixedPrecisionMode { CUDA, MKL };
|
||||
|
||||
// Note: This is primarily used by the tf.experimental.tensorrt.Converter class
|
||||
// to use mixed precision on ops not converted by TensorRT. It is also used for
|
||||
// the soon-to-be-deprecated enable_mixed_precision_graph_rewrite API.
|
||||
//
|
||||
// Convert data types to float16 or bfloat16 where appropriate to improve
|
||||
// performance on GPUs or CPUs.
|
||||
class AutoMixedPrecision : public GraphOptimizer {
|
||||
|
@ -119,7 +119,7 @@ class TrtConversionParams(
|
||||
"rewriter_config_template", "max_workspace_size_bytes",
|
||||
"precision_mode", "minimum_segment_size", "is_dynamic_op",
|
||||
"maximum_cached_engines", "use_calibration", "max_batch_size",
|
||||
"allow_build_at_runtime", "allow_mixed_precision_on_unconverted_ops"
|
||||
"allow_build_at_runtime"
|
||||
])):
|
||||
"""Parameters that are used for TF-TRT conversion.
|
||||
|
||||
@ -160,10 +160,6 @@ class TrtConversionParams(
|
||||
inputs during runtime, then a new TensorRT engine is built at runtime if
|
||||
allow_build_at_runtime=True, and otherwise native TF is used. This
|
||||
argument is only effective if is_dynamic_op=True.
|
||||
allow_mixed_precision_on_unconverted_ops: whether to allow TensorFlow to
|
||||
use mixed precision on the operations which are not converted to inside
|
||||
a TensorRT engine. This argument has a default value of True, and is
|
||||
only effective if the requested `precision_mode` is lower than FP32.
|
||||
"""
|
||||
|
||||
def __new__(cls,
|
||||
@ -175,13 +171,13 @@ class TrtConversionParams(
|
||||
maximum_cached_engines=1,
|
||||
use_calibration=True,
|
||||
max_batch_size=1,
|
||||
allow_build_at_runtime=True,
|
||||
allow_mixed_precision_on_unconverted_ops=True):
|
||||
return super(TrtConversionParams, cls).__new__(
|
||||
cls, rewriter_config_template, max_workspace_size_bytes, precision_mode,
|
||||
minimum_segment_size, is_dynamic_op, maximum_cached_engines,
|
||||
use_calibration, max_batch_size, allow_build_at_runtime,
|
||||
allow_mixed_precision_on_unconverted_ops)
|
||||
allow_build_at_runtime=True):
|
||||
return super(TrtConversionParams,
|
||||
cls).__new__(cls, rewriter_config_template,
|
||||
max_workspace_size_bytes, precision_mode,
|
||||
minimum_segment_size, is_dynamic_op,
|
||||
maximum_cached_engines, use_calibration,
|
||||
max_batch_size, allow_build_at_runtime)
|
||||
|
||||
|
||||
DEFAULT_TRT_CONVERSION_PARAMS = TrtConversionParams()
|
||||
@ -252,12 +248,6 @@ def _check_conversion_params(conversion_params, is_v2=False):
|
||||
"allow_build_at_runtime=False. If building TensorRT engines "
|
||||
"at runtime is desired, set is_dynamic_op=True."))
|
||||
|
||||
if not conversion_params.allow_mixed_precision_on_unconverted_ops:
|
||||
tf_logging.warn("Mixed precision on OPs not converted by TF-TRT has been "
|
||||
"deactivated. We recommend setting: "
|
||||
"`allow_mixed_precision_on_unconverted_ops=True` for "
|
||||
"performance reasons.")
|
||||
|
||||
|
||||
def _check_trt_version_compatibility():
|
||||
"""Check compatibility of TensorRT version.
|
||||
@ -357,14 +347,6 @@ def get_tensorrt_rewriter_config(conversion_params,
|
||||
rewriter_config_with_trt.CopyFrom(
|
||||
conversion_params.rewriter_config_template)
|
||||
|
||||
if (conversion_params.allow_mixed_precision_on_unconverted_ops and
|
||||
conversion_params.precision_mode != TrtPrecisionMode.FP32):
|
||||
rewriter_config_with_trt.auto_mixed_precision = \
|
||||
rewriter_config_pb2.RewriterConfig.ON
|
||||
else:
|
||||
rewriter_config_with_trt.auto_mixed_precision = \
|
||||
rewriter_config_pb2.RewriterConfig.OFF
|
||||
|
||||
# Disabling optimizers should happen after CopyFrom the template
|
||||
# otherwise the template can overwrite the disablement.
|
||||
if disable_non_trt_optimizers:
|
||||
@ -384,7 +366,6 @@ def get_tensorrt_rewriter_config(conversion_params,
|
||||
rewriter_config_pb2.RewriterConfig.NO_MEM_OPT)
|
||||
rewriter_config_with_trt.pin_to_host_optimization = off
|
||||
rewriter_config_with_trt.auto_parallel.enable = False
|
||||
rewriter_config_with_trt.auto_mixed_precision = off
|
||||
|
||||
return rewriter_config_with_trt
|
||||
|
||||
@ -458,8 +439,7 @@ class TrtGraphConverter(object):
|
||||
minimum_segment_size=3,
|
||||
is_dynamic_op=False,
|
||||
maximum_cached_engines=1,
|
||||
use_calibration=True,
|
||||
allow_mixed_precision_on_unconverted_ops=True):
|
||||
use_calibration=True):
|
||||
"""Initialize the converter.
|
||||
|
||||
Args:
|
||||
@ -498,10 +478,6 @@ class TrtGraphConverter(object):
|
||||
will occur. Please note that accuracy may be negatively affected if
|
||||
there is a mismatch between which tensors TRT quantizes and which
|
||||
tensors were trained with fake quantization.
|
||||
allow_mixed_precision_on_unconverted_ops: whether to allow TensorFlow to
|
||||
use mixed precision on the operations which are not converted to inside
|
||||
a TensorRT engine. This argument has a default value of True, and is
|
||||
only effective if the requested `precision_mode` is lower than FP32.
|
||||
|
||||
Raises:
|
||||
ValueError: if the combination of the parameters is invalid.
|
||||
@ -561,10 +537,7 @@ class TrtGraphConverter(object):
|
||||
maximum_cached_engines=maximum_cached_engines,
|
||||
use_calibration=use_calibration,
|
||||
max_batch_size=max_batch_size,
|
||||
allow_build_at_runtime=True,
|
||||
allow_mixed_precision_on_unconverted_ops=
|
||||
allow_mixed_precision_on_unconverted_ops
|
||||
)
|
||||
allow_build_at_runtime=True)
|
||||
_check_conversion_params(self._conversion_params)
|
||||
|
||||
def _run_conversion(self):
|
||||
|
@ -7,10 +7,6 @@ tf_class {
|
||||
name: "allow_build_at_runtime"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "allow_mixed_precision_on_unconverted_ops"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "is_dynamic_op"
|
||||
mtype: "<type \'property\'>"
|
||||
|
Loading…
Reference in New Issue
Block a user