fix: Add support for fake tensors

gs-olive · gs-olive · commit 38e80b162fd9 · 2023-05-26T09:35:05.000-07:00
- Refactor `to_numpy` function to handle non-tensor inputs, avoiding
fake tensor issue during compilation of constants
- Add regression test case to elicit behavior
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -14,16 +14,13 @@
 )
 from torch_tensorrt.dynamo.backend.conversion import convert_module
 
-from torch._dynamo.backends.common import fake_tensor_unsupported
-
 from torch._functorch.aot_autograd import aot_module_simplified, make_boxed_compiler
 
 
 logger = logging.getLogger(__name__)
 
 
 @td.register_backend(name="torch_tensorrt")
-@fake_tensor_unsupported
 def torch_tensorrt_backend(
     gm: torch.fx.GraphModule,
     sample_inputs: Sequence[torch.Tensor],
@@ -35,7 +32,6 @@ def torch_tensorrt_backend(
 
 
 @td.register_backend(name="aot_torch_tensorrt_aten")
-@fake_tensor_unsupported
 def aot_torch_tensorrt_aten_backend(
     gm: torch.fx.GraphModule,
     sample_inputs: Sequence[torch.Tensor],
@@ -55,7 +51,6 @@ def aot_torch_tensorrt_aten_backend(
     )
 
 
-@fake_tensor_unsupported
 def _pretraced_backend(
     gm: torch.fx.GraphModule,
     sample_inputs: Sequence[torch.Tensor],
diff --git a/py/torch_tensorrt/dynamo/backend/test/test_specialized_models.py b/py/torch_tensorrt/dynamo/backend/test/test_specialized_models.py
@@ -0,0 +1,111 @@
+from utils import lower_graph_testing
+from torch.testing._internal.common_utils import run_tests, TestCase
+import torch
+from torch_tensorrt.dynamo import compile
+
+
+class TestFakeTensors(TestCase):
+    def test_lowering_mul_int(self):
+        class MulInt(torch.nn.Module):
+            def forward(self, x):
+                return x * 7
+
+        # Operations expected to be included in the traced graph after decompositions
+        expected_ops = {
+            torch.ops.aten.mul.Tensor,
+        }
+
+        inputs = [
+            torch.rand(
+                3,
+                5,
+                7,
+            ).cuda(),
+        ]
+
+        fx_graph = torch.fx.symbolic_trace(MulInt())
+        _, expected_ops_unseen = lower_graph_testing(
+            fx_graph,
+            inputs,
+            expected_ops=expected_ops,
+            min_block_size=1,
+        )
+
+        self.assertEquals(
+            len(expected_ops_unseen),
+            0,
+            f"The following expected ops were not encountered: {expected_ops_unseen}",
+        )
+
+        torch._dynamo.reset()
+
+        # Validate that the results between Torch and Torch-TRT are similar
+        optimized_model = compile(
+            fx_graph, inputs, min_block_size=1, pass_through_build_failures=True
+        )
+        optimized_model_results = optimized_model(*inputs).detach().cpu()
+        torch_model_results = fx_graph(*inputs).detach().cpu()
+
+        max_diff = float(
+            torch.max(torch.abs(optimized_model_results - torch_model_results))
+        )
+        self.assertAlmostEqual(
+            max_diff,
+            0,
+            msg=f"MulInt TRT outputs don't match with the original model.",
+        )
+
+    def test_lowering_add_float(self):
+        class AddFloat(torch.nn.Module):
+            def forward(self, x):
+                return x + 84.0
+
+        # Operations expected to be included in the traced graph after decompositions
+        expected_ops = {
+            torch.ops.aten.add.Tensor,
+        }
+
+        inputs = [
+            torch.rand(
+                1,
+                5,
+                7,
+                9,
+            ).cuda(),
+        ]
+
+        fx_graph = torch.fx.symbolic_trace(AddFloat())
+        _, expected_ops_unseen = lower_graph_testing(
+            fx_graph,
+            inputs,
+            expected_ops=expected_ops,
+            min_block_size=1,
+        )
+
+        self.assertEquals(
+            len(expected_ops_unseen),
+            0,
+            f"The following expected ops were not encountered: {expected_ops_unseen}",
+        )
+
+        torch._dynamo.reset()
+
+        # Validate that the results between Torch and Torch-TRT are similar
+        optimized_model = compile(
+            fx_graph, inputs, min_block_size=1, pass_through_build_failures=True
+        )
+        optimized_model_results = optimized_model(*inputs).detach().cpu()
+        torch_model_results = fx_graph(*inputs).detach().cpu()
+
+        max_diff = float(
+            torch.max(torch.abs(optimized_model_results - torch_model_results))
+        )
+        self.assertAlmostEqual(
+            max_diff,
+            0,
+            msg=f"AddFloat TRT outputs don't match with the original model.",
+        )
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/py/torch_tensorrt/fx/converters/converter_utils.py b/py/torch_tensorrt/fx/converters/converter_utils.py
@@ -151,28 +151,37 @@ def extend_mod_attr_to_tuple(mod: torch.nn.Module, name: str, size: int):
     return extend_attr_to_tuple(val, size)
 
 
-def to_numpy(tensor: Optional[torch.Tensor]) -> Optional[np.ndarray]:
+def to_numpy(value: Optional[Union[torch.Tensor, int, float]]) -> Optional[np.ndarray]:
     """
     Convert a PyTorch Tensor to a Numpy Array. If the tensor is
     quantized it will be dequantized first.
 
     Args:
-        tensor (Optional[torch.Tensor]): A PyTorch tensor or None.
+        value (Optional[Union[torch.Tensor, int, float]]): A PyTorch tensor, int, or float
 
     Returns:
         A Numpy array.
     """
 
-    if tensor is None:
-        return tensor
+    if value is None:
+        return value
 
-    assert isinstance(
-        tensor, torch.Tensor
-    ), f"to_numpy can only be called on None or a torch.Tensor, got: {tensor}"
-    if tensor.is_quantized:
-        tensor = tensor.dequantize()
+    elif isinstance(value, torch.Tensor):
+        if value.is_quantized:
+            value = value.dequantize()
 
-    return tensor.cpu().detach().contiguous().numpy()
+        return value.cpu().detach().contiguous().numpy()
+
+    elif isinstance(value, int):
+        return np.array([value], dtype=np.int32)
+
+    elif isinstance(value, float):
+        return np.array([value], dtype=np.float32)
+
+    else:
+        raise AssertionError(
+            f"to_numpy can only be called on None, int, float, or torch.Tensor, got: {value}"
+        )
 
 
 def has_dynamic_shape(shape: Shape) -> bool:
@@ -244,11 +253,6 @@ def create_constant(
     Returns:
         A TensorRT ITensor that represents the given value.
     """
-    if isinstance(value, int):
-        value = torch.IntTensor([value])
-
-    if isinstance(value, float):
-        value = torch.Tensor([value])
 
     if dtype:
         value = value.to(dtype)