fix: Add support for truncate_long_and_double in FX

gs-olive · gs-olive · commit 6e538054c2ba · 2023-05-30T12:40:42.000-07:00
- Add support and testing for `double` type inputs
diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py
@@ -41,6 +41,7 @@ def __init__(
         explicit_batch_dimension: bool = False,
         explicit_precision: bool = False,
         logger_level=None,
+        truncate_long_and_double=False,
     ):
         super().__init__(module)
 
@@ -70,6 +71,7 @@ def __init__(
 
         self.optimization_profiles: Optional[List] = None
         self.input_specs = input_specs
+        self.truncate_long_and_double = truncate_long_and_double
         self.input_specs_iter = 0
         self.validate_input_specs()
         self._cur_node_name: Optional[str] = None
@@ -306,7 +308,9 @@ def placeholder(self, target, args, kwargs):
                 self.optimization_profiles[i].set_shape(target, *shape_range)
 
         return self.network.add_input(
-            name=target, shape=tuple(shape), dtype=torch_dtype_to_trt(dtype)
+            name=target,
+            shape=tuple(shape),
+            dtype=torch_dtype_to_trt(dtype, self.truncate_long_and_double),
         )
 
     def call_module(self, target, args, kwargs):
diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py
@@ -43,6 +43,7 @@ def compile(
     use_experimental_fx_rt=False,
     correctness_atol=1e-1,
     correctness_rtol=1e-1,
+    truncate_long_and_double=False,
 ) -> nn.Module:
     """
     Takes in original module, input and lowering setting, run lowering workflow to turn module
@@ -62,6 +63,7 @@ def compile(
         cuda_graph_batch_size: Cuda graph batch size, default to be -1.
         dynamic_batch: batch dimension (dim=0) is dynamic.
         use_experimental_fx_rt: Uses the next generation TRTModule which supports both Python and TorchScript based execution (including in C++).
+        truncate_long_and_double: Whether to automatically truncate long and double-type tensor inputs to TRT Engines
     Returns:
         A torch.nn.Module lowered by TensorRT.
     """
@@ -85,6 +87,7 @@ def compile(
         use_experimental_rt=use_experimental_fx_rt,
         correctness_atol=correctness_atol,
         correctness_rtol=correctness_rtol,
+        truncate_long_and_double=truncate_long_and_double,
     )
     lowerer = Lowerer.create(lower_setting=lower_setting)
     return lowerer(module, input)
@@ -129,6 +132,7 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
             logger_level=trt.Logger.VERBOSE
             if self.lower_setting.verbose_log
             else trt.Logger.WARNING,
+            truncate_long_and_double=self.lower_setting.truncate_long_and_double,
         )
 
         interp_result: TRTInterpreterResult = interpreter.run(
diff --git a/py/torch_tensorrt/fx/lower_setting.py b/py/torch_tensorrt/fx/lower_setting.py
@@ -101,3 +101,4 @@ class LowerSetting(LowerSettingBasic):
     correctness_atol: float = 0.1
     correctness_rtol: float = 0.1
     use_experimental_rt: bool = False
+    truncate_long_and_double: bool = False
diff --git a/py/torch_tensorrt/fx/test/core/test_trt_module.py b/py/torch_tensorrt/fx/test/core/test_trt_module.py
@@ -72,6 +72,7 @@ def forward(self, x):
         interp = TRTInterpreter(
             mod,
             input_specs=InputTensorSpec.from_tensors(inputs),
+            truncate_long_and_double=True,
         )
         trt_mod = TRTModule(*interp.run(lower_precision=LowerPrecision.FP32))
         torch.save(trt_mod, "trt.pt")
@@ -99,6 +100,66 @@ def forward(self, x):
         interp = TRTInterpreter(
             mod,
             input_specs=InputTensorSpec.from_tensors(inputs),
+            truncate_long_and_double=True,
+        )
+        trt_mod = TRTModule(*interp.run(lower_precision=LowerPrecision.FP32))
+        st = trt_mod.state_dict()
+
+        new_trt_mod = TRTModule()
+        new_trt_mod.load_state_dict(st)
+
+        torch.testing.assert_close(
+            new_trt_mod(inputs[0].cuda()).cpu(),
+            ref_output,
+            rtol=1e-04,
+            atol=1e-04,
+            check_dtype=False,
+        )
+
+
+class TestTRTModuleFloat64Input(TestCase):
+    def test_save_and_load_trt_module(self):
+        class TestModule(torch.nn.Module):
+            def forward(self, x):
+                return x + x
+
+        inputs = [torch.randn(5, 5).double()]
+        mod = TestModule().eval()
+        ref_output = mod(*inputs)
+
+        mod = acc_tracer.trace(mod, inputs)
+        interp = TRTInterpreter(
+            mod,
+            input_specs=InputTensorSpec.from_tensors(inputs),
+            truncate_long_and_double=True,
+        )
+        trt_mod = TRTModule(*interp.run(lower_precision=LowerPrecision.FP32))
+        torch.save(trt_mod, "trt.pt")
+        reload_trt_mod = torch.load("trt.pt")
+
+        torch.testing.assert_close(
+            reload_trt_mod(inputs[0].cuda()).cpu(),
+            ref_output,
+            rtol=1e-04,
+            atol=1e-04,
+            check_dtype=False,
+        )
+        os.remove(f"{os.getcwd()}/trt.pt")
+
+    def test_save_and_load_state_dict(self):
+        class TestModule(torch.nn.Module):
+            def forward(self, x):
+                return x + x
+
+        inputs = [torch.randn(5, 5).double()]
+        mod = TestModule().eval()
+        ref_output = mod(*inputs)
+
+        mod = acc_tracer.trace(mod, inputs)
+        interp = TRTInterpreter(
+            mod,
+            input_specs=InputTensorSpec.from_tensors(inputs),
+            truncate_long_and_double=True,
         )
         trt_mod = TRTModule(*interp.run(lower_precision=LowerPrecision.FP32))
         st = trt_mod.state_dict()
diff --git a/py/torch_tensorrt/fx/trt_module.py b/py/torch_tensorrt/fx/trt_module.py
@@ -156,6 +156,15 @@ def forward(self, *inputs):
                         inputs = (
                             inputs[:i] + (inputs[i].to(torch.int32),) + inputs[i + 1 :]
                         )
+                    elif (
+                        inputs[i].dtype == torch.float64
+                        and self.input_dtypes[i] == torch.float32
+                    ):
+                        inputs = (
+                            inputs[:i]
+                            + (inputs[i].to(torch.float32),)
+                            + inputs[i + 1 :]
+                        )
 
                     assert (
                         inputs[i].dtype == self.input_dtypes[i]
diff --git a/py/torch_tensorrt/fx/utils.py b/py/torch_tensorrt/fx/utils.py
@@ -39,7 +39,9 @@ def from_str(label: str) -> Optional["LowerPrecision"]:
             return None
 
 
-def torch_dtype_to_trt(dtype: torch.dtype) -> TRTDataType:
+def torch_dtype_to_trt(
+    dtype: torch.dtype, truncate_long_and_double: bool = False
+) -> TRTDataType:
     """
     Convert PyTorch data types to TensorRT data types.
 
@@ -56,14 +58,31 @@ def torch_dtype_to_trt(dtype: torch.dtype) -> TRTDataType:
     elif dtype == torch.int32:
         return trt.int32
     elif dtype == torch.int64:
-        _LOGGER.warn(
-            "Detected Int64 Input, Casting to Int32 for TRT Engine Compatibility"
-        )
-        return trt.int32
+        if truncate_long_and_double:
+            _LOGGER.warn(
+                "Detected Int64 Input, Casting to Int32 for TRT Engine Compatibility"
+            )
+            return trt.int32
+        else:
+            raise TypeError(
+                "Detected Int64 Input which is not supported by tensorrt, enable compilation"
+                + "option truncate_long_and_double=True to cast input to Int32 for TRT Engine"
+            )
     elif dtype == torch.float16:
         return trt.float16
     elif dtype == torch.float32:
         return trt.float32
+    elif dtype == torch.float64:
+        if truncate_long_and_double:
+            _LOGGER.warn(
+                "Detected Float64 Input, Casting to Float32 for TRT Engine Compatibility"
+            )
+            return trt.float32
+        else:
+            raise TypeError(
+                "Detected Float64 Input which is not supported by tensorrt, enable compilation"
+                + "option truncate_long_and_double=True to cast input to Float32 for TRT Engine"
+            )
     else:
         raise TypeError("%s is not supported by tensorrt" % dtype)