Skip to content

Commit 0c52e12

Browse files
Kaihui-intelpre-commit-ci[bot]xin3he
authored
Add docstring for WOQ&LayerWise (#1938)
Signed-off-by: Kaihui-intel <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: xinhe <[email protected]>
1 parent 08914d6 commit 0c52e12

File tree

14 files changed

+599
-70
lines changed

14 files changed

+599
-70
lines changed

.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@
2020
/neural_compressor/torch/algorithms/pt2e_quant
2121
/neural_compressor/torch/export
2222
/neural_compressor/common
23-
/neural_compressor/torch/algorithms/weight_only/hqq
23+
/neural_compressor/torch/algorithms/weight_only
24+
/neural_compressor/torch/algorithms/layer_wise

neural_compressor/torch/algorithms/layer_wise/load.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ def load(
152152
# The first line of this docstring overrides the one Sphinx generates for the
153153
# documentation. We need it so that Sphinx doesn't leak `pickle`s path from
154154
# the build environment (e.g. `<module 'pickle' from '/leaked/path').
155-
156-
"""Load(f, map_location=None, pickle_module=pickle, *, weights_only=False, **pickle_load_args)
155+
"""Load(f, map_location=None, pickle_module=pickle, *, weights_only=False, **pickle_load_args).
157156
158157
Loads an object saved with :func:`torch.save` from a file.
159158
@@ -198,6 +197,8 @@ def load(
198197
pickle_load_args: (Python 3 only) optional keyword arguments passed over to
199198
:func:`pickle_module.load` and :func:`pickle_module.Unpickler`, e.g.,
200199
:attr:`errors=...`.
200+
prefix (str): the module prefix name.
201+
tensor_name (str): the tensor name.
201202
202203
.. warning::
203204
:func:`torch.load()` unless `weights_only` parameter is set to `True`,

neural_compressor/torch/algorithms/layer_wise/modified_pickle.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,13 @@ class PickleError(Exception):
9494

9595

9696
class PicklingError(PickleError):
97-
"""This exception is raised when an unpicklable object is passed to the
98-
dump() method."""
97+
"""This exception is raised when an unpicklable object is passed to the dump() method."""
9998

10099
pass
101100

102101

103102
class UnpicklingError(PickleError):
104-
"""This exception is raised when there is a problem unpickling an object,
105-
such as a security violation.
103+
"""This exception is raised when there is a problem unpickling an object, such as a security violation.
106104
107105
Note that other exceptions may also be raised during unpickling, including
108106
(but not necessarily limited to) AttributeError, EOFError, ImportError,
@@ -367,6 +365,7 @@ def whichmodule(obj, name): # pragma: no cover
367365

368366
def encode_long(x): # pragma: no cover
369367
r"""Encode a long to a two's complement little-endian binary string.
368+
370369
Note that 0 is a special case, returning an empty string, to save a
371370
byte in the LONG1 pickling context.
372371
@@ -503,7 +502,6 @@ def dump(self, obj):
503502

504503
def memoize(self, obj):
505504
"""Store an object in the memo."""
506-
507505
# The Pickler memo is a dictionary mapping object ids to 2-tuples
508506
# that contain the Unpickler memo key and the object being memoized.
509507
# The memo key is written to the pickle and will become

neural_compressor/torch/algorithms/layer_wise/utils.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,18 @@
3535

3636

3737
class QDQLayer(torch.nn.Module):
38+
"""Quantized and Dequantized Layer."""
39+
3840
def __init__(self, module, input_scale=None) -> None:
41+
"""Init the QDQLayer object."""
3942
super().__init__()
4043
self.quant = torch.ao.quantization.QuantStub()
4144
self.module = module
4245
self.dequant = torch.ao.quantization.DeQuantStub()
4346
self.input_scale = input_scale
4447

4548
def forward(self, X):
49+
"""Forward function."""
4650
if self.input_scale is not None:
4751
X = torch.mul(X, self.input_scale)
4852
X = self.quant(X)
@@ -220,6 +224,16 @@ def _get_path(pretrained_model_name_or_path):
220224

221225

222226
def load_value(model, param_name, path):
227+
"""Load the module value.
228+
229+
Args:
230+
model (torch.nn.module): torch model.
231+
param_name (str): module name.
232+
path (str): path to load state_dict per layer.
233+
234+
Returns:
235+
tensor: the module value.
236+
"""
223237
if "lm_head" in param_name and getattr(model.config, "tie_word_embeddings", True):
224238
input_embeddings = model.get_input_embeddings()
225239
modules = get_named_children(model)
@@ -235,6 +249,14 @@ def load_value(model, param_name, path):
235249

236250

237251
def load_module(model, module_name, path, device="cpu"):
252+
"""Load all named parameters of module.
253+
254+
Args:
255+
model (torch.nn.module): torch model.
256+
module_name (str): module name.
257+
path (str): path to load state_dict per layer.
258+
device (str, optional): module device. Defaults to "cpu".
259+
"""
238260
module = get_module(model, module_name)
239261
for n, p in module.named_parameters():
240262
param_name = module_name + "." + n
@@ -243,6 +265,18 @@ def load_module(model, module_name, path, device="cpu"):
243265

244266

245267
def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None):
268+
"""Register weight hooks for model.
269+
270+
Args:
271+
model (torch.nn.module): torch model.
272+
path (str): path to load state_dict per layer.
273+
device (str, optional): module device. Defaults to "cpu".
274+
clean_weight (bool, optional): to clean model weight. Defaults to True.
275+
saved_path (str, optional): path to save module weight. Defaults to None.
276+
277+
Returns:
278+
list: handlers.
279+
"""
246280
if saved_path:
247281
os.makedirs(saved_path, exist_ok=True)
248282

@@ -280,6 +314,7 @@ def hook(module, input, output):
280314

281315

282316
def clean_module_weight(module):
317+
"""Clean module weight."""
283318
if isinstance(module, QDQLayer):
284319
submodule = module.module
285320
else:

neural_compressor/torch/algorithms/weight_only/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
"""Weight-Only algorithms."""
1515

1616
from .save_load import save, load

neural_compressor/torch/algorithms/weight_only/autoround.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
"""AutoRound quantization."""
1515
import copy
1616
import json
1717
import time
@@ -28,6 +28,8 @@
2828

2929

3030
class AutoRoundQuantizer(Quantizer):
31+
"""AutoRound Quantizer."""
32+
3133
def __init__(
3234
self,
3335
quant_config: dict = {},
@@ -94,11 +96,11 @@ def __init__(
9496
lr_scheduler: The learning rate scheduler to be used.
9597
dataset (str): The default dataset name (default is "NeelNanda/pile-10k").
9698
enable_quanted_input (bool): Whether to use the output of the previous quantized block as
97-
the input for the current block (default is True).
99+
the input for the current block (default is True).
98100
enable_minmax_tuning (bool): Whether to enable weight min-max tuning (default is True).
99101
lr (float): The learning rate (default is None, will be set to 1.0/iters).
100102
minmax_lr (float): The learning rate for min-max tuning
101-
(default is None, it will be set to lr automatically).
103+
(default is None, it will be set to lr automatically).
102104
low_gpu_mem_usage (bool): Whether to use low GPU memory (default is True).
103105
iters (int): Number of iterations (default is 200).
104106
seqlen (int): Data length of the sequence for tuning (default is 2048).
@@ -111,7 +113,7 @@ def __init__(
111113
dynamic_max_gap (int): The dynamic maximum gap (default is -1).
112114
data_type (str): The data type to be used (default is "int").
113115
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
114-
have different choices.
116+
have different choices.
115117
multimodal(bool): Enable multimodal model quantization, (default is "False").
116118
act_bits (int): Number of bits for activation quantization. Default is 32.
117119
act_group_size (int): Group size for activation quantization. Default is None.
@@ -153,6 +155,7 @@ def __init__(
153155

154156
def prepare(self, model: torch.nn.Module, *args, **kwargs):
155157
"""Prepares a given model for quantization.
158+
156159
Args:
157160
model (torch.nn.Module): The model to be prepared.
158161
@@ -163,6 +166,14 @@ def prepare(self, model: torch.nn.Module, *args, **kwargs):
163166
return prepare_model
164167

165168
def convert(self, model: torch.nn.Module, *args, **kwargs):
169+
"""Convert the prepared model to a quantized model.
170+
171+
Args:
172+
model (torch.nn.Module): the prepared model
173+
174+
Returns:
175+
The quantized model.
176+
"""
166177
dataloader = CapturedDataloader(model.args_list, model.kwargs_list)
167178
model = model.orig_model
168179
rounder = AutoRound(
@@ -216,7 +227,7 @@ def get_dataloader(tokenizer, seqlen, dataset_name="NeelNanda/pile-10k", seed=42
216227
split (str, optional): The data split to use. Defaults to None.
217228
seed (int, optional): The random seed for reproducibility. Defaults to 42.
218229
bs (int, optional): The batch size. Defaults to 4.
219-
n_samples (int, optional): The total number of samples to include. Defaults to 512.
230+
nsamples (int, optional): The total number of samples to include. Defaults to 128.
220231
221232
Returns:
222233
DataLoader: The DataLoader for the calibrated dataset.

neural_compressor/torch/algorithms/weight_only/awq.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
"""AWQ quantization."""
1515
# Copied from neural_compressor/adaptor/torch_utils/awq.py
1616

1717
import copy
@@ -40,11 +40,16 @@ def _get_absorb_per_block(model, example_inputs, folding=False, weight_config={}
4040
"""Get absorbed layer per block.
4141
4242
Args:
43-
model (torch.nn.Module): input model
44-
example_inputs: example_inputs
43+
model (torch.nn.Module): input model.
44+
example_inputs (tensor/tuple/dict, optional): used to trace torch model.
45+
folding (bool, optional): whether only allow update scale when it can be fold
46+
to upper layer. Defaults to False.
47+
weight_config (dict, optional): the quantization configuration. Defaults to {}.
4548
4649
Returns:
47-
block_absorb_dict: dict of absorbed layer per block. eg. {0, [[absorbed_1, xx], [xx]], ...}
50+
block_absorb_dict: The dict of absorbed layer per block. eg. {0, [[absorbed_1, xx], [xx]], ...}
51+
absorb_layer_dict: The layer dict that scale can be absorbed. The dict is the inverse of
52+
block_absorb_dict for all blocks.
4853
"""
4954
block_absorb_dict = {} # record absorbed layer per block
5055
absorb_layer_dict = {} # record absorb layers for absorbed layers
@@ -94,10 +99,12 @@ def _get_absorb_dict(model, absorb_layer_dict):
9499
95100
Args:
96101
model (torch.nn.Module): input model
97-
absorb_layer_dict (dict): The layer dict that scale can be absorbed, default is {}.
102+
absorb_layer_dict (dict): The layer type dict that scale can be absorbed, default is {}.
98103
99104
Returns:
100105
block_absorb_dict: dict of absorbed layer per block. eg. {0, [[absorbed_1, xx], [xx]], ...}
106+
new_absorb_layer_dict: The layer dict that scale can be absorbed. The dict is the inverse of
107+
block_absorb_dict for all blocks.
101108
"""
102109
block_absorb_dict = {}
103110
block_prefix, block_num = get_block_prefix(model)
@@ -121,6 +128,15 @@ def _get_absorb_dict(model, absorb_layer_dict):
121128

122129
@torch.no_grad()
123130
def _get_weight_scale(weight, q_group_size=-1):
131+
"""Get scale for weight.
132+
133+
Args:
134+
weight (tensor): input weight
135+
q_group_size (int, optional): how many elements share one scale/zp. Defaults to -1.
136+
137+
Returns:
138+
scale: the scale of input weight.
139+
"""
124140
org_shape = weight.shape
125141
if q_group_size > 0:
126142
weight = weight.view(-1, q_group_size)
@@ -526,6 +542,8 @@ def module_inference(self, model, inputs):
526542

527543

528544
class AWQQuantizer(Quantizer):
545+
"""AWQ Quantizer."""
546+
529547
def __init__(self, quant_config: OrderedDict = {}, absorb_layer_dict: dict = {}):
530548
"""Init an AWQQuantizer object.
531549

0 commit comments

Comments
 (0)