11
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
-
14
+ """AutoRound quantization."""
15
15
import copy
16
16
import json
17
17
import time
28
28
29
29
30
30
class AutoRoundQuantizer (Quantizer ):
31
+ """AutoRound Quantizer."""
32
+
31
33
def __init__ (
32
34
self ,
33
35
quant_config : dict = {},
@@ -94,11 +96,11 @@ def __init__(
94
96
lr_scheduler: The learning rate scheduler to be used.
95
97
dataset (str): The default dataset name (default is "NeelNanda/pile-10k").
96
98
enable_quanted_input (bool): Whether to use the output of the previous quantized block as
97
- the input for the current block (default is True).
99
+ the input for the current block (default is True).
98
100
enable_minmax_tuning (bool): Whether to enable weight min-max tuning (default is True).
99
101
lr (float): The learning rate (default is None, will be set to 1.0/iters).
100
102
minmax_lr (float): The learning rate for min-max tuning
101
- (default is None, it will be set to lr automatically).
103
+ (default is None, it will be set to lr automatically).
102
104
low_gpu_mem_usage (bool): Whether to use low GPU memory (default is True).
103
105
iters (int): Number of iterations (default is 200).
104
106
seqlen (int): Data length of the sequence for tuning (default is 2048).
@@ -111,7 +113,7 @@ def __init__(
111
113
dynamic_max_gap (int): The dynamic maximum gap (default is -1).
112
114
data_type (str): The data type to be used (default is "int").
113
115
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
114
- have different choices.
116
+ have different choices.
115
117
multimodal(bool): Enable multimodal model quantization, (default is "False").
116
118
act_bits (int): Number of bits for activation quantization. Default is 32.
117
119
act_group_size (int): Group size for activation quantization. Default is None.
@@ -153,6 +155,7 @@ def __init__(
153
155
154
156
def prepare (self , model : torch .nn .Module , * args , ** kwargs ):
155
157
"""Prepares a given model for quantization.
158
+
156
159
Args:
157
160
model (torch.nn.Module): The model to be prepared.
158
161
@@ -163,6 +166,14 @@ def prepare(self, model: torch.nn.Module, *args, **kwargs):
163
166
return prepare_model
164
167
165
168
def convert (self , model : torch .nn .Module , * args , ** kwargs ):
169
+ """Convert the prepared model to a quantized model.
170
+
171
+ Args:
172
+ model (torch.nn.Module): the prepared model
173
+
174
+ Returns:
175
+ The quantized model.
176
+ """
166
177
dataloader = CapturedDataloader (model .args_list , model .kwargs_list )
167
178
model = model .orig_model
168
179
rounder = AutoRound (
@@ -216,7 +227,7 @@ def get_dataloader(tokenizer, seqlen, dataset_name="NeelNanda/pile-10k", seed=42
216
227
split (str, optional): The data split to use. Defaults to None.
217
228
seed (int, optional): The random seed for reproducibility. Defaults to 42.
218
229
bs (int, optional): The batch size. Defaults to 4.
219
- n_samples (int, optional): The total number of samples to include. Defaults to 512 .
230
+ nsamples (int, optional): The total number of samples to include. Defaults to 128 .
220
231
221
232
Returns:
222
233
DataLoader: The DataLoader for the calibrated dataset.
0 commit comments