Skip to content

Commit 46a8601

Browse files
committed
quantize : assume the neutral prior is equal imatrix weights
1 parent 92383bf commit 46a8601

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

tools/quantize/quantize.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,8 +291,15 @@ static int load_imatrix(const std::string & imatrix_file, std::vector<std::strin
291291
for (int64_t j = 0; j < ne1; ++j) {
292292
const float count = ((const float *) counts->data)[j];
293293
if (count > 0.0f) {
294+
float sumw = 0.0f;
294295
for (int64_t i = 0; i < ne0; ++i) {
295-
e[j*ne0 + i] = (((const float *) sums->data)[j*ne0 + i] + prior_weight) / (count + prior_weight);
296+
sumw += ((const float *) sums->data)[j*ne0 + i];
297+
}
298+
// the neutral prior is equal weights, and it should reduce the variance by weighted-averaging with the mean
299+
const float prior_value = sumw / ne0;
300+
301+
for (int64_t i = 0; i < ne0; ++i) {
302+
e[j*ne0 + i] = (((const float *) sums->data)[j*ne0 + i] + prior_value * prior_weight) / (count + prior_weight);
296303
}
297304
} else {
298305
// Partial imatrix data, this tensor never got any input during calibration

0 commit comments

Comments
 (0)