Skip to content

Commit 62d0c03

Browse files
authored
Disable Disco for q4f16_ft and q8f16_ft quantization (mlc-ai#1094)
1 parent 9bf5723 commit 62d0c03

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

mlc_llm/core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ def build_model_from_args(args: argparse.Namespace):
597597
"`num_shards` should be used together with "
598598
"`--build-model-only` and `--convert-weight-only`"
599599
)
600+
use_ft_quant = args.quantization.name in ["q4f16_ft", "q8f16_ft"]
601+
if use_ft_quant:
602+
raise ValueError("Multi-GPU deployments are not available for ft quantization.")
600603
os.makedirs(args.artifact_path, exist_ok=True)
601604
if args.debug_dump:
602605
os.makedirs(os.path.join(args.artifact_path, "debug"), exist_ok=True)
@@ -614,7 +617,6 @@ def build_model_from_args(args: argparse.Namespace):
614617
config = json.load(i_f)
615618

616619
if not use_cache or args.convert_weight_only:
617-
618620
model_generators = {
619621
"llama": llama,
620622
"mistral": llama,

0 commit comments

Comments
 (0)