Fixed the issue in comments

cehongwang · cehongwang · commit a59d92d56139 · 2025-02-20T23:04:42.000Z
diff --git a/examples/dynamo/mutable_torchtrt_module_example.py b/examples/dynamo/mutable_torchtrt_module_example.py
@@ -14,6 +14,7 @@
 1. Sample workflow of Mutable Torch TensorRT Module with ResNet 18
 2. Save a Mutable Torch TensorRT Module
 3. Integration with Huggingface pipeline in LoRA use case
+4. Usage of dynamic shape with Mutable Torch TensorRT Module
 """
 
 import numpy as np
@@ -63,16 +64,14 @@
 # Saving Mutable Torch TensorRT Module
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-# Currently, saving is only enabled for C++ runtime, not python runtime.
+# Currently, saving is only when "use_python" = False in settings
 torch_trt.MutableTorchTensorRTModule.save(mutable_module, "mutable_module.pkl")
 reload = torch_trt.MutableTorchTensorRTModule.load("mutable_module.pkl")
 
 # %%
 # Stable Diffusion with Huggingface
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-# The LoRA checkpoint is from https://civitai.com/models/12597/moxin
-
 from diffusers import DiffusionPipeline
 
 with torch.no_grad():
@@ -111,3 +110,45 @@
     # Refit triggered
     image = pipe(prompt, negative_prompt=negative, num_inference_steps=30).images[0]
     image.save("./with_LoRA_mutable.jpg")
+
+
+# %%
+# Use Mutable Torch TensorRT module with dynamic shape
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+class Model(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, a, b, c={}):
+        x = torch.matmul(a, b)
+        x = torch.matmul(c["a"], c["b"].T)
+        print(c["b"][0])
+        x = 2 * c["b"]
+        return x
+
+
+device = "cuda:0"
+model = Model().eval().to(device)
+inputs = (torch.rand(10, 3).to(device), torch.rand(3, 30).to(device))
+kwargs = {
+    "c": {"a": torch.rand(10, 30).to(device), "b": torch.rand(10, 30).to(device)},
+}
+dim_0 = torch.export.Dim("dim", min=1, max=50)
+dim_1 = torch.export.Dim("dim", min=1, max=50)
+dim_2 = torch.export.Dim("dim2", min=1, max=50)
+args_dynamic_shapes = ({1: dim_1}, {0: dim_0})
+kwarg_dynamic_shapes = {
+    "c": {"a": {}, "b": {0: dim_2}},
+}
+# Export the model first with custom dynamic shape constraints
+model = torch_trt.MutableTorchTensorRTModule(model, debug=True, min_block_size=1)
+model.set_expected_dynamic_shape_range(args_dynamic_shapes, kwarg_dynamic_shapes)
+# Compile
+model(*inputs, **kwargs)
+# Change input shape
+inputs_2 = (torch.rand(10, 5).to(device), torch.rand(10, 30).to(device))
+kwargs_2 = {
+    "c": {"a": torch.rand(10, 30).to(device), "b": torch.rand(5, 30).to(device)},
+}
+# Run without recompiling
+model(*inputs_2, **kwargs_2)
diff --git a/py/torch_tensorrt/dynamo/_refit.py b/py/torch_tensorrt/dynamo/_refit.py
@@ -395,9 +395,12 @@ def refit_module_weights(
                     try:
                         weight_name_map = compiled_submodule.weight_name_map
                     except AttributeError:
-                        logger.warning(
-                            "The module was compiled with an old version of Torch-TensorRT. Rebuilding the weight map."
-                        )
+                        if not isinstance(
+                            compiled_submodule, torch.fx.graph_module.GraphModule
+                        ):
+                            logger.warning(
+                                "The module was compiled with an old version of Torch-TensorRT. Rebuilding the weight map."
+                            )
                     if not weight_name_map:
                         use_weight_map_cache = False
                         logger.warning(
diff --git a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py
@@ -196,12 +196,11 @@ def __init__(
         }
         self.arg_dynamic_shapes: Optional[tuple[Any]] = None
         self.kwarg_dynamic_shapes: Optional[dict[Any, Any]] = None
-        self.total_dynamic_shape: Optional[dict[Any, Any]] = None
 
         self.settings = CompilationSettings(**compilation_options)
         self.run_info: Optional[tuple[Any, ...]] = None
         self.state_dict_metadata: dict[str, torch.Size] = {}
-        self.store_state_dict_metadata()
+        self._store_state_dict_metadata()
 
         cls = self.__class__
         self.__class__ = type(
@@ -211,11 +210,31 @@ def __init__(
         )
         self.init_finished = True
 
-    def set_dynamic_shape_hint(
+    def set_expected_dynamic_shape_range(
         self,
         args_dynamic_shape: tuple[dict[Any, Any]],
         kwargs_dynamic_shape: dict[str, Any],
     ) -> None:
+        """
+        Set the dynamic shape range. The shape hint should EXACTLY follow arg_inputs and kwarg_inputs passed to the forward function
+        and should not omit any entries. If the dynamic shape is not required for the input, an empty dictionary should be given
+        as the shape hint for that input.
+
+        Example:
+        def forward(a, b, c=0, d=0):
+            pass
+
+        seq_len = torch.export.Dim("seq_len", min=1, max=10)
+        args_dynamic_shape = ({0: seq_len}, {}) # b does not have a dynamic shape
+        kwargs_dynamic_shape = {'c': {0, seq_len}, 'd': {}} # d does not have a dynamic shape
+        # Later when you call the function
+        forward(*(a, b), **{c:..., d:...})
+
+
+        Arguments:
+            args_dynamic_shape (tuple[dict[Any, Any]]): Dynamic shape hint for the arg_inputs,
+            kwargs_dynamic_shape: (dict[str, Any]): Dynamic shape hint for the kwarg_inputs
+        """
         assert isinstance(
             args_dynamic_shape, tuple
         ), "args dynamic shape has to be a tuple"
@@ -224,19 +243,31 @@ def set_dynamic_shape_hint(
         ), "args dynamic shape has to be a dictionary"
         self.kwarg_dynamic_shapes = kwargs_dynamic_shape
         self.arg_dynamic_shapes = args_dynamic_shape
-        self.total_dynamic_shape = self.kwarg_dynamic_shapes.copy()
-        signature = list(
-            inspect.signature(self.original_model.forward).parameters.keys()
-        )
-        for i, arg in enumerate(self.arg_dynamic_shapes):
-            self.total_dynamic_shape[signature[i]] = arg
-        self.refit_state.set_state(RefitFlag.NEEDS_RECOMPILE)
 
         # Clear cached inputs
         self.arg_inputs = tuple()
         self.kwarg_inputs = {}
 
-    def store_state_dict_metadata(self) -> None:
+        self.refit_state.set_state(RefitFlag.NEEDS_RECOMPILE)
+
+    def _get_total_dynamic_shapes(self) -> dict[str, Any] | None:
+        if not self.arg_dynamic_shapes and not self.kwarg_dynamic_shapes:
+            return None
+        total_dynamic_shape = {}
+        if self.arg_dynamic_shapes:
+            signature = list(
+                inspect.signature(self.original_model.forward).parameters.keys()
+            )
+            for i, arg in enumerate(self.arg_dynamic_shapes):
+                total_dynamic_shape[signature[i]] = arg
+
+        if self.kwarg_dynamic_shapes:
+            for kwargs, kwargs_dynamic_shape in self.kwarg_dynamic_shapes.items():
+                total_dynamic_shape[kwargs] = kwargs_dynamic_shape
+
+        return total_dynamic_shape
+
+    def _store_state_dict_metadata(self) -> None:
         for k, v in self.original_model.state_dict().items():
             self.state_dict_metadata[k] = v.shape
 
@@ -328,7 +359,7 @@ def compile(self) -> None:
             self.original_model,
             self.arg_inputs,
             kwargs=self.kwarg_inputs,
-            dynamic_shapes=self.total_dynamic_shape,
+            dynamic_shapes=self._get_total_dynamic_shapes(),
         )
         self.gm = dynamo_compile(
             self.exp_program,
@@ -340,40 +371,75 @@ def compile(self) -> None:
         torch.cuda.empty_cache()
 
     def _validate_inputs(self, *args: Any, **kwargs: Any) -> None:
+
+        if not self.arg_inputs:
+            logger.info("First time compilation initiated. This may take some time.")
+            self.refit_state.set_state(RefitFlag.NEEDS_RECOMPILE)
+            self._store_inputs(args, kwargs)
+            if self.arg_dynamic_shapes or self.kwarg_dynamic_shapes:
+                if not self._validates_dynamic_hints():
+                    logger.warning(
+                        "Invalid dynamic shape hint. Compiling module for the provided input shapes (static)"
+                    )
+                    self.arg_dynamic_shapes = None
+                    self.kwarg_dynamic_shapes = None
+            return
+
+        # If input does not equal or does not fall into dynamic shape range, recompile the engine
         try:
-            if (
-                not self.arg_inputs
-                or not MutableTorchTensorRTModule.check_inputs_equal(
-                    self.arg_inputs, args, dynamic_shapes=self.arg_dynamic_shapes
-                )
-                or not MutableTorchTensorRTModule.check_inputs_equal(
-                    self.kwarg_inputs, kwargs, dynamic_shapes=self.kwarg_dynamic_shapes
-                )
+            if not MutableTorchTensorRTModule._check_inputs_shape(
+                self.arg_inputs, args, dynamic_shapes=self.arg_dynamic_shapes
+            ) or not MutableTorchTensorRTModule._check_inputs_shape(
+                self.kwarg_inputs, kwargs, dynamic_shapes=self.kwarg_dynamic_shapes
             ):
                 logger.info("Input change detected.")
                 self.refit_state.set_state(RefitFlag.NEEDS_RECOMPILE)
-                self.store_inputs(args, kwargs)
+                self._store_inputs(args, kwargs)
         except DynamicShapeOutOfRangeException as e:
             logger.info("Input change detected.")
             logger.warning(e)
-            logger.warning("Recompiling the engine with static shape")
+            logger.warning(
+                "Provided inputs are outside the set expected shape range, recompiling module for the provided input shapes (static)"
+            )
             self.arg_dynamic_shapes = None
             self.kwarg_dynamic_shapes = None
-            self.total_dynamic_shape = None
             self.refit_state.set_state(RefitFlag.NEEDS_RECOMPILE)
-            self.store_inputs(args, kwargs)
+            self._store_inputs(args, kwargs)
 
-    def store_inputs(self, arg_inputs: Any, kwarg_inputs: Any) -> None:
+    def _validates_dynamic_hints(self) -> bool:
+        if self.arg_dynamic_shapes is None:
+            if self.arg_inputs:
+                logger.warning("arg_dynamic_shape is not provided!")
+        else:
+            if len(self.arg_dynamic_shapes) != len(self.arg_inputs):
+                logger.warning(
+                    f"Warning: The length of arg_inputs is {len(self.arg_inputs)} but the length of arg_dynamic_shape is {len(self.arg_dynamic_shapes)}!"
+                )
+                return False
+
+        if self.kwarg_dynamic_shapes is None:
+            if self.kwarg_inputs:
+                logger.warning("kwarg_dynamic_shape is not provided!")
+        else:
+            if self.kwarg_dynamic_shapes.keys() != self.kwarg_inputs.keys():
+                logger.warning(
+                    f"kwarg_inputs has {list(self.kwarg_inputs.keys())} but kwarg_dynamic_shape has {list(self.kwarg_dynamic_shapes.keys())}!"
+                )
+                return False
+
+        return True
+
+    def _store_inputs(self, arg_inputs: Any, kwarg_inputs: Any) -> None:
         self.arg_inputs = arg_inputs
         self.kwarg_inputs = kwarg_inputs
 
     @staticmethod
-    def process_kwarg_inputs(inputs: Any) -> Any:
+    def _process_kwarg_inputs(inputs: Any) -> Any:
         # Process kwarg inputs to be acceptable for Torch-TensorRT
         if isinstance(inputs, dict):
             # None should be excluded. AOT compile also does not allow dynamic control flow, bool is also excluded.
             return {
-                k: MutableTorchTensorRTModule.process_kwarg_inputs(v)
+                k: MutableTorchTensorRTModule._process_kwarg_inputs(v)
                 for k, v in inputs.items()
                 if (v is not None and not isinstance(v, bool))
             }
@@ -384,7 +450,10 @@ def process_kwarg_inputs(inputs: Any) -> Any:
         elif isinstance(inputs, (list, tuple)):
             if None not in inputs:
                 return type(inputs)(
-                    [MutableTorchTensorRTModule.process_kwarg_inputs(v) for v in inputs]
+                    [
+                        MutableTorchTensorRTModule._process_kwarg_inputs(v)
+                        for v in inputs
+                    ]
                 )
 
         raise ValueError(
@@ -394,7 +463,7 @@ def process_kwarg_inputs(inputs: Any) -> Any:
 
     def forward(self, *args: Any, **kwargs: Any) -> Any:
         # Step 1: Check whether the input shape has changed
-        kwargs = MutableTorchTensorRTModule.process_kwarg_inputs(kwargs)
+        kwargs = MutableTorchTensorRTModule._process_kwarg_inputs(kwargs)
         self._validate_inputs(*args, **kwargs)
 
         # Step 2: If the flag is unknown, it could be a recompile or refit.
@@ -406,7 +475,7 @@ def forward(self, *args: Any, **kwargs: Any) -> Any:
         if self.refit_state.get_state() == RefitFlag.NEEDS_RECOMPILE:
             logger.info("(Re)Compiling the engine...")
             self.compile()
-            self.store_state_dict_metadata()
+            self._store_state_dict_metadata()
             self.refit_state.set_state(RefitFlag.LIVE)
 
         elif self.refit_state.get_state() == RefitFlag.NEEDS_REFIT:
@@ -417,7 +486,7 @@ def forward(self, *args: Any, **kwargs: Any) -> Any:
                 logger.error(e)
                 logger.error("Model refit failed. Recompiling the graph module.")
                 self.compile()
-                self.store_state_dict_metadata()
+                self._store_state_dict_metadata()
             self.refit_state.set_state(RefitFlag.LIVE)
 
         result = self.gm(*args, **kwargs)
@@ -427,7 +496,7 @@ def forward(self, *args: Any, **kwargs: Any) -> Any:
 
     def to(self, device: str) -> None:
         logger.warning("Original PyTorch model is moved. CPU offload may failed.")
-        self.orignial_model.to(device)
+        self.original_model.to(device)
 
     def __deepcopy__(self, memo: Any) -> Any:
         cls = self.__class__
@@ -479,7 +548,7 @@ def __setattr__(self, name: str, value: Any) -> None:
             object.__setattr__(self, name, value)
 
     @staticmethod
-    def check_inputs_equal(
+    def _check_inputs_shape(
         input1: Any,
         input2: Any,
         dynamic_shapes: Any = None,
@@ -495,10 +564,13 @@ def check_inputs_equal(
                     return False
                 elif isinstance(a, torch.Tensor) and a.shape != b.shape:
                     if dynamic_shapes is None:
+                        logger.warning(
+                            "Dynamic shape is not properly set but the input shape is changed!"
+                        )
                         return False
                     else:
                         tensor_dynamic_shape = dynamic_shapes[i]
-                        if not MutableTorchTensorRTModule.check_tensor_shapes_with_dynamic_shapes(
+                        if not MutableTorchTensorRTModule._check_tensor_shapes_with_dynamic_shapes(
                             a, b, tensor_dynamic_shape
                         ):
                             return False
@@ -513,28 +585,34 @@ def check_inputs_equal(
                     return False
                 elif isinstance(va, torch.Tensor) and va.shape != vb.shape:
                     if dynamic_shapes is None:
+                        logger.warning(
+                            "Dynamic shape is not properly set but the input shape is changed!"
+                        )
                         return False
                     else:
                         tensor_dynamic_shape = dynamic_shapes[ka]
-                        if not MutableTorchTensorRTModule.check_tensor_shapes_with_dynamic_shapes(
+                        if not MutableTorchTensorRTModule._check_tensor_shapes_with_dynamic_shapes(
                             va, vb, tensor_dynamic_shape
                         ):
                             return False
                 elif isinstance(
                     va, (list, tuple, dict)
-                ) and not MutableTorchTensorRTModule.check_inputs_equal(
+                ) and not MutableTorchTensorRTModule._check_inputs_shape(
                     va, vb, dynamic_shapes[ka] if dynamic_shapes else None
                 ):
                     return False
         return True
 
     @staticmethod
-    def check_tensor_shapes_with_dynamic_shapes(
+    def _check_tensor_shapes_with_dynamic_shapes(
         t1: torch.tensor, t2: torch.tensor, dynamic_shape: dict[int, Any]
     ) -> bool:
         for (i, axis_0), axis_1 in zip(enumerate(t1.shape), t2.shape):
             if axis_0 != axis_1:
                 if i not in dynamic_shape:
+                    logger.warning(
+                        "Dynamic shape does not include the axis on which input changes!"
+                    )
                     return False
                 dyn = dynamic_shape[i]
                 if axis_1 > dyn.max or axis_1 < dyn.min:
diff --git a/tests/py/dynamo/runtime/test_mutable_torchtrt_module.py b/tests/py/dynamo/runtime/test_mutable_torchtrt_module.py