[Fix] TIR block name of dequantization (mlc-ai#1177)

junrushao · web-flow · commit 2ca7d15b7652 · 2023-11-02T11:30:28.000-07:00
diff --git a/python/mlc_chat/compiler/compiler_pass/fuse_decode_take.py b/python/mlc_chat/compiler/compiler_pass/fuse_decode_take.py
@@ -38,8 +38,9 @@ def transform_module(
         for g_var, func in mod.functions_items():
             name = g_var.name_hint
             if isinstance(func, tir.PrimFunc) and (("fused_decode" in name) and ("take" in name)):
-                mod = tvm.IRModule({"main": func})
-                sch = tir.Schedule(mod)
+                sch_mod = tvm.IRModule({"main": func})
+                sch_mod = tir.transform.ForceNarrowIndexToInt32()(sch_mod)
+                sch = tir.Schedule(sch_mod)
                 sch.compute_inline("decode")
                 mod[g_var] = sch.mod["main"]
         return mod
diff --git a/python/mlc_chat/compiler/quantization/group_quantization.py b/python/mlc_chat/compiler/quantization/group_quantization.py
@@ -1,6 +1,6 @@
 """The group quantization config"""
 from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
 
 import numpy as np
 from tvm import DataType, DataTypeCode
@@ -128,7 +128,7 @@ def _dequantize(
     ):
         tir_bin_mask = tir.const((1 << DataType(self.quantize_dtype).bits) - 1, self.storage_dtype)
         tir_max_int = tir.const(self.max_int_value, self.model_dtype)
-        dequantized_weight = te.compute(
+        return te.compute(
             shape=[weight.shape[0], weight.shape[1] * self.num_elem_per_storage]
             if out_shape is None
             else out_shape,
@@ -149,8 +149,8 @@ def _dequantize(
                 ),
                 scale[i, j // self.group_size],
             ),
+            name="decode",
         )
-        return dequantized_weight
 
     def quantize_weight(self, weight: NDArray) -> List[NDArray]:
         """
@@ -186,8 +186,10 @@ def quantize_weight(self, weight: NDArray) -> List[NDArray]:
             if target is None:
                 target = Target.from_device(dev)
             with target:
-                mod = dl.ApplyDefaultSchedule(  # pylint: disable=not-callable
-                    dl.gpu.Reduction(), dl.gpu.GeneralReduction(), dl.gpu.Fallback()
+                mod = dl.ApplyDefaultSchedule(  # type: ignore   # pylint: disable=not-callable
+                    dl.gpu.Reduction(),
+                    dl.gpu.GeneralReduction(),
+                    dl.gpu.Fallback(),
                 )(mod)
         elif device_type == "cpu":
             target = "llvm"
@@ -400,7 +402,7 @@ def from_multilinear(
             out_dtype=multi_linear.out_dtype,
         )
 
-    def forward(self, x: nn.Tensor) -> nn.Tensor:  # pylint: disable=invalid-name
+    def forward(self, x: nn.Tensor) -> Sequence[nn.Tensor]:  # pylint: disable=invalid-name
         """
         Forward method for multi linear layer.
 
diff --git a/python/mlc_chat/support/auto_config.py b/python/mlc_chat/support/auto_config.py
@@ -95,7 +95,7 @@ def detect_model_type(model_type: str, config: Path) -> "Model":
                 f"Please explicitly specify `--model-type` instead"
             )
         model_type = cfg["model_type"]
-        logger.info("%s Model type: %s", FOUND, model_type)
+        logger.info("%s model type: %s", FOUND, model_type)
     if model_type not in MODELS:
         raise ValueError(f"Unknown model type: {model_type}. Available ones: {list(MODELS.keys())}")
     return MODELS[model_type]

Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ def detect_model_type(model_type: str, config: Path) -> "Model":`
`95`	`95`	f"Please explicitly specify `--model-type` instead"
`96`	`96`	`)`
`97`	`97`	`model_type = cfg["model_type"]`
`98`		`- logger.info("%s Model type: %s", FOUND, model_type)`
	`98`	`+ logger.info("%s model type: %s", FOUND, model_type)`
`99`	`99`	`if model_type not in MODELS:`
`100`	`100`	`raise ValueError(f"Unknown model type: {model_type}. Available ones: {list(MODELS.keys())}")`
`101`	`101`	`return MODELS[model_type]`