@@ -66,7 +66,16 @@ def rtn_entry(
66
66
* args ,
67
67
** kwargs ,
68
68
) -> torch .nn .Module :
69
- """The main entry to apply rtn quantization."""
69
+ """The main entry to apply rtn quantization.
70
+
71
+ Args:
72
+ model (torch.nn.Module): raw fp32 model or prepared model.
73
+ configs_mapping (Dict[Tuple[str, callable], RTNConfig]): per-op configuration.
74
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
75
+
76
+ Returns:
77
+ torch.nn.Module: prepared model or quantized model.
78
+ """
70
79
from neural_compressor .torch .algorithms .weight_only .rtn import RTNQuantizer
71
80
from neural_compressor .torch .algorithms .weight_only .save_load import save
72
81
@@ -115,6 +124,16 @@ def gptq_entry(
115
124
* args ,
116
125
** kwargs ,
117
126
) -> torch .nn .Module :
127
+ """The main entry to apply gptq quantization.
128
+
129
+ Args:
130
+ model (torch.nn.Module): raw fp32 model or prepared model.
131
+ configs_mapping (Dict[Tuple[str, callable], GPTQConfig]): per-op configuration.
132
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
133
+
134
+ Returns:
135
+ torch.nn.Module: prepared model or quantized model.
136
+ """
118
137
logger .info ("Quantize model with the GPTQ algorithm." )
119
138
from neural_compressor .torch .algorithms .weight_only .gptq import GPTQuantizer
120
139
from neural_compressor .torch .algorithms .weight_only .save_load import save
@@ -169,6 +188,16 @@ def static_quant_entry(
169
188
* args ,
170
189
** kwargs ,
171
190
) -> torch .nn .Module :
191
+ """The main entry to apply static quantization, includes pt2e quantization and ipex quantization.
192
+
193
+ Args:
194
+ model (torch.nn.Module): raw fp32 model or prepared model.
195
+ configs_mapping (Dict[Tuple[str, callable], StaticQuantConfig]): per-op configuration.
196
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
197
+
198
+ Returns:
199
+ torch.nn.Module: prepared model or quantized model.
200
+ """
172
201
if not is_ipex_imported ():
173
202
return pt2e_static_quant_entry (model , configs_mapping , mode , * args , ** kwargs )
174
203
logger .info ("Quantize model with the static quant algorithm." )
@@ -212,7 +241,23 @@ def static_quant_entry(
212
241
###################### PT2E Dynamic Quant Algo Entry ##################################
213
242
@register_algo (name = PT2E_DYNAMIC_QUANT )
214
243
@torch .no_grad ()
215
- def pt2e_dynamic_quant_entry (model : torch .nn .Module , configs_mapping , mode : Mode , * args , ** kwargs ) -> torch .nn .Module :
244
+ def pt2e_dynamic_quant_entry (
245
+ model : torch .nn .Module ,
246
+ configs_mapping ,
247
+ mode : Mode ,
248
+ * args ,
249
+ ** kwargs ,
250
+ ) -> torch .nn .Module :
251
+ """The main entry to apply pt2e dynamic quantization.
252
+
253
+ Args:
254
+ model (torch.nn.Module): raw fp32 model or prepared model.
255
+ configs_mapping: per-op configuration.
256
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
257
+
258
+ Returns:
259
+ torch.nn.Module: prepared model or quantized model.
260
+ """
216
261
logger .info ("Quantize model with the PT2E static quant algorithm." )
217
262
from neural_compressor .torch .algorithms .pt2e_quant .core import W8A8PT2EQuantizer
218
263
from neural_compressor .torch .algorithms .pt2e_quant .save_load import save
@@ -235,7 +280,23 @@ def pt2e_dynamic_quant_entry(model: torch.nn.Module, configs_mapping, mode: Mode
235
280
###################### PT2E Static Quant Algo Entry ##################################
236
281
@register_algo (name = PT2E_STATIC_QUANT )
237
282
@torch .no_grad ()
238
- def pt2e_static_quant_entry (model : torch .nn .Module , configs_mapping , mode : Mode , * args , ** kwargs ) -> torch .nn .Module :
283
+ def pt2e_static_quant_entry (
284
+ model : torch .nn .Module ,
285
+ configs_mapping ,
286
+ mode : Mode ,
287
+ * args ,
288
+ ** kwargs ,
289
+ ) -> torch .nn .Module :
290
+ """The main entry to apply pt2e static quantization.
291
+
292
+ Args:
293
+ model (torch.nn.Module): raw fp32 model or prepared model.
294
+ configs_mapping: per-op configuration.
295
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
296
+
297
+ Returns:
298
+ torch.nn.Module: prepared model or quantized model.
299
+ """
239
300
logger .info ("Quantize model with the PT2E static quant algorithm." )
240
301
from neural_compressor .torch .algorithms .pt2e_quant .core import W8A8PT2EQuantizer
241
302
from neural_compressor .torch .algorithms .pt2e_quant .save_load import save
@@ -264,6 +325,16 @@ def smooth_quant_entry(
264
325
* args ,
265
326
** kwargs ,
266
327
) -> torch .nn .Module :
328
+ """The main entry to apply smooth quantization.
329
+
330
+ Args:
331
+ model (torch.nn.Module): raw fp32 model or prepared model.
332
+ configs_mapping (Dict[Tuple[str, callable], SmoothQuantConfig]): per-op configuration.
333
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
334
+
335
+ Returns:
336
+ torch.nn.Module: prepared model or quantized model.
337
+ """
267
338
logger .info ("Quantize model with the smooth quant algorithm." )
268
339
from neural_compressor .torch .algorithms .smooth_quant import SmoothQuantQuantizer , TorchSmoothQuant
269
340
@@ -323,6 +394,16 @@ def awq_quantize_entry(
323
394
* args ,
324
395
** kwargs ,
325
396
) -> torch .nn .Module :
397
+ """The main entry to apply AWQ quantization.
398
+
399
+ Args:
400
+ model (torch.nn.Module): raw fp32 model or prepared model.
401
+ configs_mapping (Dict[Tuple[str, callable], AWQConfig]): per-op configuration.
402
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
403
+
404
+ Returns:
405
+ torch.nn.Module: prepared model or quantized model.
406
+ """
326
407
logger .info ("Quantize model with the AWQ algorithm." )
327
408
from neural_compressor .torch .algorithms .weight_only .awq import AWQQuantizer
328
409
from neural_compressor .torch .algorithms .weight_only .save_load import save
@@ -391,8 +472,22 @@ def awq_quantize_entry(
391
472
###################### TEQ Algo Entry ##################################
392
473
@register_algo (name = TEQ )
393
474
def teq_quantize_entry (
394
- model : torch .nn .Module , configs_mapping : Dict [Tuple [str , callable ], TEQConfig ], mode : Mode , * args , ** kwargs
475
+ model : torch .nn .Module ,
476
+ configs_mapping : Dict [Tuple [str , callable ], TEQConfig ],
477
+ mode : Mode ,
478
+ * args ,
479
+ ** kwargs ,
395
480
) -> torch .nn .Module :
481
+ """The main entry to apply TEQ quantization.
482
+
483
+ Args:
484
+ model (torch.nn.Module): raw fp32 model or prepared model.
485
+ configs_mapping (Dict[Tuple[str, callable], TEQConfig]): per-op configuration.
486
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
487
+
488
+ Returns:
489
+ torch.nn.Module: prepared model or quantized model.
490
+ """
396
491
from neural_compressor .torch .algorithms .weight_only .save_load import save
397
492
from neural_compressor .torch .algorithms .weight_only .teq import TEQuantizer
398
493
@@ -453,6 +548,16 @@ def autoround_quantize_entry(
453
548
* args ,
454
549
** kwargs ,
455
550
) -> torch .nn .Module :
551
+ """The main entry to apply AutoRound quantization.
552
+
553
+ Args:
554
+ model (torch.nn.Module): raw fp32 model or prepared model.
555
+ configs_mapping (Dict[Tuple[str, callable], AutoRoundConfig]): per-op configuration.
556
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
557
+
558
+ Returns:
559
+ torch.nn.Module: prepared model or quantized model.
560
+ """
456
561
from neural_compressor .torch .algorithms .weight_only .autoround import AutoRoundQuantizer
457
562
from neural_compressor .torch .algorithms .weight_only .save_load import save
458
563
@@ -530,6 +635,16 @@ def hqq_entry(
530
635
* args ,
531
636
** kwargs ,
532
637
) -> torch .nn .Module :
638
+ """The main entry to apply AutoRound quantization.
639
+
640
+ Args:
641
+ model (torch.nn.Module): raw fp32 model or prepared model.
642
+ configs_mapping (Dict[Tuple[str, callable], AutoRoundConfig]): per-op configuration.
643
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
644
+
645
+ Returns:
646
+ torch.nn.Module: prepared model or quantized model.
647
+ """
533
648
from neural_compressor .torch .algorithms .weight_only .hqq import HQQuantizer
534
649
from neural_compressor .torch .algorithms .weight_only .save_load import save
535
650
@@ -572,6 +687,16 @@ def mx_quant_entry(
572
687
* args ,
573
688
** kwargs ,
574
689
) -> torch .nn .Module :
690
+ """The main entry to apply AutoRound quantization.
691
+
692
+ Args:
693
+ model (torch.nn.Module): raw fp32 model or prepared model.
694
+ configs_mapping (Dict[Tuple[str, callable], AutoRoundConfig]): per-op configuration.
695
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
696
+
697
+ Returns:
698
+ torch.nn.Module: prepared model or quantized model.
699
+ """
575
700
logger .info ("Quantize model with the mx quant algorithm." )
576
701
from neural_compressor .torch .algorithms .mx_quant .mx import MXQuantizer
577
702
@@ -586,8 +711,21 @@ def mx_quant_entry(
586
711
###################### Mixed Precision Algo Entry ##################################
587
712
@register_algo (MIX_PRECISION )
588
713
def mix_precision_entry (
589
- model : torch .nn .Module , configs_mapping : Dict [Tuple [str ], MixPrecisionConfig ], * args , ** kwargs
714
+ model : torch .nn .Module ,
715
+ configs_mapping : Dict [Tuple [str ], MixPrecisionConfig ],
716
+ * args ,
717
+ ** kwargs ,
590
718
) -> torch .nn .Module :
719
+ """The main entry to apply Mixed Precision.
720
+
721
+ Args:
722
+ model (torch.nn.Module): raw fp32 model or prepared model.
723
+ configs_mapping (Dict[Tuple[str, callable], MixPrecisionConfig]): per-op configuration.
724
+ mode (Mode, optional): select from [PREPARE, CONVERT and QUANTIZE]. Defaults to Mode.QUANTIZE.
725
+
726
+ Returns:
727
+ torch.nn.Module: prepared model or quantized model.
728
+ """
591
729
# only support fp16 and bf16 now, more types might be added later
592
730
from neural_compressor .torch .algorithms .mix_precision import HalfPrecisionConverter
593
731
0 commit comments