File tree Expand file tree Collapse file tree 1 file changed +3
-1
lines changed
neural_compressor/torch/algorithms/fp8_quant/_core Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -103,6 +103,7 @@ def get_config(
103
103
)
104
104
scales = convert_scales_to_tensors_dict (scales_obj , scales_file_format , params ["hp_dtype" ])
105
105
model_dict = dict (model .named_modules ())
106
+ save_file = False
106
107
for mname in mod_list :
107
108
mod = model_dict [mname ]
108
109
set_hqt_config (mod , top_level_config ) # set config in the module, as it consumed by the patched module
@@ -123,6 +124,7 @@ def get_config(
123
124
scales_obj [mname ] = ModuleConfig (
124
125
** format_functions_rec ((torch .Tensor , scales_file_format ))(scales [mname ].__dict__ )
125
126
)
127
+ save_file = True
126
128
127
129
logger .debug (
128
130
"Preparing quantization functions for layer %s layer_type=%s" ,
@@ -138,7 +140,7 @@ def get_config(
138
140
params ,
139
141
)
140
142
qconfig [mname ] = mod_extra_config
141
- if scales_file is not None :
143
+ if save_file and scales_file is not None :
142
144
save_scales (model , scales_obj , scales_file_format , scales_file + ".npz" )
143
145
save_scales (model , scales_obj , scales_file_format , scales_file + ".json" )
144
146
return qconfig
You can’t perform that action at this time.
0 commit comments