Skip to content

Commit a92d70a

Browse files
xinhe3ulivne
authored andcommitted
[SW-192917] Update all HQT logic files with pre-commit check
Change-Id: I119dc8578cb10932fd1a8a674a8bdbf61f978e42 Signed-off-by: xinhe3 <[email protected]> (cherry picked from commit 099e984) Signed-off-by: xinhe3 <[email protected]>
1 parent 56a1a7e commit a92d70a

File tree

35 files changed

+494
-197
lines changed

35 files changed

+494
-197
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

neural_compressor/torch/algorithms/fp8_quant/_core/common.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
1-
import os
2-
import torch
3-
import json
4-
import numpy as np
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
515
import functools
616
import importlib.util
17+
import json
18+
import os
19+
20+
import numpy as np
21+
import torch
722

823
from .._quant_common.helper_modules import *
924
from .._quant_common.quant_config import get_hqt_config
@@ -117,7 +132,13 @@ def save_file(model, d, source_format, fname, mode):
117132
def module_convert(m, fcn):
118133
mt = ModuleConfig(
119134
tuple([fcn(x) for x in m.inputs]),
120-
tuple([fcn(m.outputs)],) if type(m.outputs) == np.ndarray else tuple([fcn(y) for y in m.outputs]),
135+
(
136+
tuple(
137+
[fcn(m.outputs)],
138+
)
139+
if type(m.outputs) == np.ndarray
140+
else tuple([fcn(y) for y in m.outputs])
141+
),
121142
{k: fcn(m.params[k]) for k in m.params},
122143
)
123144
return mt

neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
1-
import torch
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
215
import habana_frameworks.torch.core as htcore
316
import habana_frameworks.torch.utils.experimental as htexp
17+
import torch
18+
419
from .common import *
520

621
GAUDI2 = htexp.synDeviceType.synDeviceGaudi2
@@ -37,7 +52,7 @@ def get_default_exp_bias(dtype):
3752
def get_fullscale(dtype, exp_bias=None):
3853
default_exp_bias = get_default_exp_bias(dtype)
3954
fullscale = MAX_RANGE[dtype]
40-
exp_bias = default_exp_bias if exp_bias == None else exp_bias
55+
exp_bias = default_exp_bias if exp_bias is None else exp_bias
4156
fullscale = fullscale * (2 ** (default_exp_bias - exp_bias))
4257
return fullscale
4358

@@ -50,7 +65,7 @@ def get_fp8_hw_alligned_scales(dtype, device):
5065
exp_bias_set = EXP_BIAS_SETS.get((device, dtype), None)
5166
return (
5267
None
53-
if exp_bias_set == None
68+
if exp_bias_set is None
5469
else [x / MAX_RANGE[dtype] for x in get_fullscales_by_expbias_set(dtype, exp_bias_set)]
5570
)
5671

@@ -82,11 +97,11 @@ def scale_to_pow2(scale):
8297
return scale_pow2
8398

8499

85-
# Considering range of hw alligned scales: 2^a, 2^a+1,..., 2^b (a<b)
100+
# Considering range of hw aligned scales: 2^a, 2^a+1,..., 2^b (a<b)
86101
# we want to choose scale s for maxabs m such that 2^a <= s=2^x <= 2^b (for integer a<=x<=b)
87102
# and also 2^(x-1) < m <= 2^x
88-
# if m>=2^b then s=2^b, therefor min(_, 2^b)
89-
# if m<=2^a then s=2^a, therefor max(_, 2^a) --> 2^a <= min(max(_,2^a),2^b) <=2^b
103+
# if m>=2^b then s=2^b, therefore min(_, 2^b)
104+
# if m<=2^a then s=2^a, therefore max(_, 2^a) --> 2^a <= min(max(_,2^a),2^b) <=2^b
90105
# if s^a<m<2^b then m as a positive number can be written as m=2^y (y=log2(m))
91106
# if y is integer then y=ciel(y) we choose x=y so s=2^x=2^y=2^ciel(y)=2^ciel(log2(m))
92107
# else we choose x=ciel(y) and a<=x-1<y<x<=b and s=2^x=2^ciel(y)=2^ciel(log2(m))

neural_compressor/torch/algorithms/fp8_quant/_core/measure.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,27 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
import json
216
import os
3-
import torch
4-
import numpy as np
17+
518
import habana_frameworks.torch.core as htcore
19+
import numpy as np
20+
import torch
621

7-
from .._quant_common.quant_config import (
8-
QuantMode,
9-
ScaleMethod,
10-
MeasureExclude,
11-
get_hqt_config,
12-
set_hqt_config,
13-
)
14-
from .common import *
22+
from .._quant_common.quant_config import MeasureExclude, QuantMode, ScaleMethod, get_hqt_config, set_hqt_config
1523
from ..utils.logger import logger
24+
from .common import *
1625

1726
imod_dict = {}
1827
gmod_list = []
@@ -139,7 +148,7 @@ def get_mod_extra_config_dict(model):
139148
for name, mod in model.named_modules():
140149
if hasattr(mod, "_mod_extra_config"):
141150
if is_measure_done(mod._mod_extra_config):
142-
name = name.replace("_orig_mod.", "") # remove _orig_mod part added by dynamo mechanism
151+
name = name.replace("_orig_mod.", "") # remove _orig_mod part added by dynamo mechanism
143152
mcd[name] = mod._mod_extra_config
144153
else:
145154
logger.debug(
@@ -181,9 +190,7 @@ def measure_control_to_state_dict(mcd):
181190
sdl[mname]["params"] = dict()
182191
for param_name in mcd[mname].params:
183192
if mcd[mname].params[param_name].state is not None:
184-
sd[mname]["params"][param_name] = (
185-
mcd[mname].params[param_name].state.detach().cpu().float().numpy()
186-
)
193+
sd[mname]["params"][param_name] = mcd[mname].params[param_name].state.detach().cpu().float().numpy()
187194
sdl[mname]["params"][param_name] = (
188195
mcd[mname].params[param_name].state.detach().cpu().float().numpy().tolist()
189196
)
@@ -396,6 +403,13 @@ def is_used(self):
396403
observer_params = {
397404
"maxabs_per_channel": {
398405
"linear": ModuleConfig(({"dim": -1},), ({"dim": -1},), {"weight": {"dim": 0}}),
399-
"matmul": ModuleConfig(({"dim": -1}, {"dim": -2},), ({"dim": -1},), None),
406+
"matmul": ModuleConfig(
407+
(
408+
{"dim": -1},
409+
{"dim": -2},
410+
),
411+
({"dim": -1},),
412+
None,
413+
),
400414
}
401415
}

neural_compressor/torch/algorithms/fp8_quant/_core/quant_dequant.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
1-
import torch.nn as nn
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
215
from abc import abstractmethod
16+
17+
import torch.nn as nn
18+
319
from .common import *
420

521

neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import habana_frameworks.torch.core as htcore
116
import torch
217
import torch.nn as nn
3-
import habana_frameworks.torch.core as htcore
4-
from .._quant_common.quant_config import get_hqt_config
18+
519
from .._quant_common.helper_modules import PatchedUnmeasuredModule
6-
from .measure import load_measurements
7-
from .scale import scale_method_mapping, get_config, scaling_methods
8-
from .common import (
9-
mod_default_dict,
10-
generate_model_info,
11-
parent_child_mod_dict,
12-
UNMEASURED_MODELS,
13-
)
20+
from .._quant_common.quant_config import get_hqt_config
1421
from ..utils.logger import logger
22+
from .common import UNMEASURED_MODELS, generate_model_info, mod_default_dict, parent_child_mod_dict
23+
from .measure import load_measurements
24+
from .scale import get_config, scale_method_mapping, scaling_methods
1525

1626

1727
def patch_module(mod, qconfig, mod_dict, patched_mod=None):

neural_compressor/torch/algorithms/fp8_quant/_core/scale.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
1-
import torch
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
215
import numpy as np
16+
import torch
317

418
from .._quant_common.quant_config import ScaleMethod, set_hqt_config
5-
from .scale_methods import *
6-
from .quant_dequant import *
7-
8-
from .fp_utils import *
9-
from .common import *
1019
from ..utils.logger import logger
20+
from .common import *
21+
from .fp_utils import *
22+
from .quant_dequant import *
23+
from .scale_methods import *
1124

1225

1326
def matmul_scales_to_mod_config(mod, scales, params):
@@ -110,7 +123,7 @@ def get_config(
110123
mod_type_str = mod.__class__.__name__
111124
layer_type = mod_dict[mod_type_str].type
112125
if mname not in scales:
113-
logger.debug("Calcuating scales for layer %s", mname)
126+
logger.debug("Calculating scales for layer %s", mname)
114127
if mname not in measurement:
115128
qconfig[UNMEASURED_MODELS].append(mname)
116129
logger.debug(
@@ -119,7 +132,7 @@ def get_config(
119132
)
120133
continue
121134
layer_measure = measurement[mname] # ModuleConfig() of measurements
122-
scales[mname] = method[layer_type][0](mod, layer_measure, params) # ModuleConfig() of scales
135+
scales[mname] = method[layer_type][0](mod, layer_measure, params) # ModuleConfig() of scales
123136
if scales_file is not None:
124137
scales_obj[mname] = ModuleConfig(
125138
**format_functions_rec((torch.Tensor, scales_file_format))(scales[mname].__dict__)
@@ -368,18 +381,6 @@ def get_config(
368381
ScaleMethod.ACT_MAXABS_POW2_WEIGHTS_PCS_OPT_POW2,
369382
"maxabs",
370383
): "act_maxabs_pts_pow2_weights_opt_pcs_pow2",
371-
(
372-
ScaleMethod.ACT_MAXABS_POW2_WEIGHTS_PCS_OPT_POW2,
373-
"maxabs_per_channel",
374-
): "act_maxabs_pts_pow2_weights_opt_pcs_pow2",
375-
(
376-
ScaleMethod.WEAKSMOOTHQUANT_WEIGHTS_OUTPUT_CHANNEL_MAXABS_POW2,
377-
"maxabs_per_channel",
378-
): "weaksmoothquant_weights_maxabs_pow2",
379-
(
380-
ScaleMethod.SMOOTHQUANT_WEIGHTS_OUTPUT_CHANNEL_MAXABS_POW2,
381-
"maxabs_per_channel",
382-
): "smoothquant_weights_maxabs_pow2",
383384
(ScaleMethod.SMOOTHQUANT_OPT, "maxabs_per_channel"): "smoothquant_weights_opt_pow2",
384385
}
385386

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
from .max_abs import *
216
from .unit_scale import *
317
from .smooth_quant import *

neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/max_abs.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
import torch
216

3-
from ..fp_utils import *
417
from ..common import *
18+
from ..fp_utils import *
519

620

721
def linear_act_maxabs_pts_weight_maxabs_pts_pow2_hw_scales(mod, measurement, params):
@@ -140,7 +154,7 @@ def fsdpa_act_maxabs_pts_weight_maxabs_pts_pow2_scales(mod, measurement, params)
140154
for x in measurement.inputs
141155
]
142156
# fsdpa is combined out of - BMM1(Q,K) -> Softmax -> BMM2(AMAX,V)
143-
# during measure we recieve the amax value from the cguid and apply it during quant as input
157+
# during measure we receive the amax value from the cguid and apply it during quant as input
144158
input_scale.append(
145159
calc_maxabs_scale(
146160
torch.tensor(measurement.outputs[1], dtype=hp_dtype, device=device).max(),

neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/smooth_quant.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,22 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
import torch
216
from tqdm import tqdm
317

4-
from ..fp_utils import *
518
from ..common import *
19+
from ..fp_utils import *
620

721

822
def linear_smoothquant_weights_opt_pow2_scales(mod, measurement, params):

0 commit comments

Comments
 (0)