diff --git a/docsrc/tutorials/getting_started_with_fx_path.rst b/docsrc/tutorials/getting_started_with_fx_path.rst index d54f3d91af..eb39fc0eef 100644 --- a/docsrc/tutorials/getting_started_with_fx_path.rst +++ b/docsrc/tutorials/getting_started_with_fx_path.rst @@ -34,7 +34,7 @@ Torch-TensorRT (FX Path) is in ``Beta`` phase and always recommended to work wit Converting a PyTorch Model to TensorRT Engine --------------------------------------------- -In general, users are welcome to use the ``lower_to_trt()`` to finish the conversion from a model to tensorRT engine. It is a wrapper API that consists of the major steps needed to finish this converison. Please refer to ``lower_example.py`` file in ``examples/fx``. +In general, users are welcome to use the ``compile()`` to finish the conversion from a model to tensorRT engine. It is a wrapper API that consists of the major steps needed to finish this converison. Please refer to ``lower_example.py`` file in ``examples/fx``. In this section, we will go through an example to illustrate the major steps that FX path uses. Users can refer to ``fx2trt_example.py`` file in ``examples/fx``. @@ -60,9 +60,9 @@ symbolically traced variables cannot be used as inputs to control flow This means the model contains dynamic control flow. Please refer to section “Dynamic Control Flow” in `FX guide `_. * **Step 2: Build TensorRT engine** -There are `two different modes `_ for how TensorRT handles batch dimension, explicit batch dimension and implicit batch dimension. This mode was used by early versions of TensorRT, and is now deprecated but continues to be supported for backwards compatibility. In explicit batch mode, all dimensions are explicit and can be dynamic, that is their length can change at execution time. Many new features, such as dynamic shapes and loops, are available only in this mode. User can still choose to use implicit batch mode when they set ``explicit_batch_dimension=False`` in ``lower_to_trt()``. We do not recommend to use it since it will lack of support in future TensorRT versions. +There are `two different modes `_ for how TensorRT handles batch dimension, explicit batch dimension and implicit batch dimension. This mode was used by early versions of TensorRT, and is now deprecated but continues to be supported for backwards compatibility. In explicit batch mode, all dimensions are explicit and can be dynamic, that is their length can change at execution time. Many new features, such as dynamic shapes and loops, are available only in this mode. User can still choose to use implicit batch mode when they set ``explicit_batch_dimension=False`` in ``compile()``. We do not recommend to use it since it will lack of support in future TensorRT versions. -Explicit batch is the default mode and it must be set for dynamic shape. For most of vision task, user can choose to enable ``dynamic_batch`` in ``lower_to_trt()`` if they want to get the similar effects as implicit mode where only batch dimension changes. It has some requirements: +Explicit batch is the default mode and it must be set for dynamic shape. For most of vision task, user can choose to enable ``dynamic_batch`` in ``compile()`` if they want to get the similar effects as implicit mode where only batch dimension changes. It has some requirements: 1. Shapes of inputs, outputs and activations are fixed except batch dimension. 2. Inputs, outputs and activations have batch dimension as the major dimension. 3. All the operators in the model do not modify batch dimension (permute, transpose, split, etc.) or compute over batch dimension (sum, softmax, etc.). diff --git a/examples/fx/lower_example.py b/examples/fx/lower_example.py index 71f15a2f88..7f3b374f44 100644 --- a/examples/fx/lower_example.py +++ b/examples/fx/lower_example.py @@ -4,7 +4,7 @@ import torch import torchvision -from torch_tensorrt.fx.lower import lower_to_trt +from torch_tensorrt.fx.lower import compile from torch_tensorrt.fx.utils import LowerPrecision @@ -183,7 +183,7 @@ def run_configuration_benchmark( time = benchmark_torch_function(conf.batch_iter, lambda: module(*input)) elif not conf.jit: # Run lowering eager mode benchmark - lowered_module = lower_to_trt( + lowered_module = compile( module, input, max_batch_size=conf.batch_size, diff --git a/examples/fx/torchdynamo_example.py b/examples/fx/torchdynamo_example.py index 6bb93f6d6e..a2e7627800 100644 --- a/examples/fx/torchdynamo_example.py +++ b/examples/fx/torchdynamo_example.py @@ -5,7 +5,7 @@ import torch import torchdynamo import torchvision -from torch_tensorrt.fx.lower import lower_to_trt +from torch_tensorrt.fx.lower import compile from torch_tensorrt.fx.utils import LowerPrecision from torchdynamo.optimizations import backends @@ -197,7 +197,7 @@ def run_configuration_benchmark( if conf.trt: # Run lowering eager mode benchmark - lowered_module = lower_to_trt( + lowered_module = compile( module, input, max_batch_size=conf.batch_size, diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py index f6487a4402..8b5f235531 100644 --- a/py/torch_tensorrt/_compile.py +++ b/py/torch_tensorrt/_compile.py @@ -7,7 +7,7 @@ from enum import Enum import torch_tensorrt.fx -from torch_tensorrt.fx.lower import lower_to_trt +import torch_tensorrt.fx.lower from torch_tensorrt.fx.utils import LowerPrecision @@ -140,7 +140,7 @@ def compile( else: raise ValueError(f"Precision {enabled_precisions} not supported on FX") - return lower_to_trt( + return torch_tensorrt.fx.lower.compile( module, inputs, lower_precision=lower_precision, diff --git a/py/torch_tensorrt/fx/__init__.py b/py/torch_tensorrt/fx/__init__.py index aeae62d86d..c1c42c446f 100644 --- a/py/torch_tensorrt/fx/__init__.py +++ b/py/torch_tensorrt/fx/__init__.py @@ -1,4 +1,6 @@ from .converters import * # noqa: F403 F401 +import logging + from .converter_registry import ( # noqa CONVERTERS, NO_EXPLICIT_BATCH_DIM_SUPPORT, @@ -9,3 +11,5 @@ from .input_tensor_spec import generate_input_specs, InputTensorSpec # noqa from .lower_setting import LowerSetting # noqa from .trt_module import TRTModule # noqa + +logging.basicConfig(level=logging.INFO) diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py index 387b4db841..deeee14178 100644 --- a/py/torch_tensorrt/fx/lower.py +++ b/py/torch_tensorrt/fx/lower.py @@ -25,7 +25,7 @@ Input = Sequence[Any] -def lower_to_trt( +def compile( module: nn.Module, input, max_batch_size: int = 2048, @@ -216,28 +216,32 @@ def create( ) ) - @decorate_method(validate_inference(atol=1e-1, rtol=1e-1)) def __call__( self, module: nn.Module, inputs: Input, additional_inputs: Optional[Input] = None, ) -> nn.Module: - module.eval() - - if ( - self.lower_pass_manager_builder.lower_setting.lower_precision - == LowerPrecision.FP16 - ): - module.half() - inputs = tuple( - x.half() if x is not None and x.dtype == torch.float32 else x - for x in inputs + lower_setting = self.lower_pass_manager_builder.lower_setting + atol = lower_setting.correctness_atol + rtol = lower_setting.correctness_rtol + + @validate_inference(atol=atol, rtol=rtol) + def do_lower(module: nn.Module, inputs: Input) -> nn.Module: + module.eval() + if ( + self.lower_pass_manager_builder.lower_setting.lower_precision + == LowerPrecision.FP16 + ): + module.half() + inputs = tuple( + x.half() if x is not None and x.dtype == torch.float32 else x + for x in inputs + ) + pm = self.lower_pass_manager_builder.build_trt_lower_pipeline( + inputs, additional_inputs ) - pm = self.lower_pass_manager_builder.build_trt_lower_pipeline( - inputs, additional_inputs - ) - - lower_result = pm(module) + lower_result = pm(module) + return lower_result - return lower_result + return do_lower(module, inputs) diff --git a/py/torch_tensorrt/fx/lower_setting.py b/py/torch_tensorrt/fx/lower_setting.py index c1d02229e3..b4ad86caee 100644 --- a/py/torch_tensorrt/fx/lower_setting.py +++ b/py/torch_tensorrt/fx/lower_setting.py @@ -70,6 +70,8 @@ class LowerSetting(LowerSettingBasic): dynamic_batch: enable the dynamic shape in TRT with dim=-1 for the 1st dimension. tactic_sources: tactic sources for TensorRT kernel selection. Default to None, meaning all possible tactic sources. + correctness_atol: absolute tolerance for correctness check + correctness_rtol: relative tolerance for correctness check """ input_specs: List[InputTensorSpec] = dc.field(default_factory=list) @@ -90,3 +92,5 @@ class LowerSetting(LowerSettingBasic): opt_profile_replica: int = 1 dynamic_batch: bool = True tactic_sources: Optional[int] = None + correctness_atol: float = 0.1 + correctness_rtol: float = 0.1 diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py index 0bfffd210f..1f3a39d836 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py @@ -1,5 +1,6 @@ -import torch import unittest + +import torch import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec diff --git a/py/torch_tensorrt/fx/test/passes/test_graph_opts.py b/py/torch_tensorrt/fx/test/passes/test_graph_opts.py index 9db4183e64..c91c456eb3 100644 --- a/py/torch_tensorrt/fx/test/passes/test_graph_opts.py +++ b/py/torch_tensorrt/fx/test/passes/test_graph_opts.py @@ -8,8 +8,6 @@ import torch_tensorrt.fx.tracer.acc_tracer.acc_tracer as acc_tracer from torch_tensorrt.fx.passes.graph_opts import common_subexpression_elimination -_LOGGER: logging.Logger = logging.getLogger(__name__) - _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py index e23ab5dd81..3ce3b7ade8 100644 --- a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py +++ b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py @@ -10,8 +10,6 @@ import torch_tensorrt.fx.diagnostics as diag -_LOGGER: logging.Logger = logging.getLogger(__name__) - _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py b/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py index 59d2f49042..ac0a02ac1d 100644 --- a/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py +++ b/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py @@ -37,6 +37,11 @@ def profile_trt_module( layer_info = json.loads(trt_mod.get_layer_info()) # pyre-ignore[29] shape_map = {} for layer in layer_info["Layers"]: + # if type is str, it means verbose_profile is off in interpreter.run() + # Theorectically, we can print profiling information without shape information + # but we choose to not print profiling information so we can use verbose_profile to control it + if type(layer) is str: + return name = layer["Name"] input_str = ", ".join( [str(x.get("Dimensions", "[]")) for x in layer.get("Inputs", [])]