From 8ac51cadab57f3d9b36813324000ac4ffbff3c1e Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Mon, 6 May 2024 10:58:35 +0100 Subject: [PATCH 1/6] Fixed save_imatrix to match old behaviour for MoE This fix is simple and clear, but unnecessarily doubles the memory overhead.. --- examples/imatrix/imatrix.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 71e7a727f1943..079e4ab3d02af 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -19,6 +19,7 @@ struct Stats { std::vector values; + std::vector counts; int ncall = 0; }; @@ -120,13 +121,14 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * auto & e = m_stats[wname]; - ++e.ncall; - // NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger - // using the following line, we can correct for that if needed by replacing the line above with: - //if (idx == t->src[0]->ne[0] - 1) ++e.ncall; + // We select top-k experts, the number of calls for the expert tensors will be k times larger. + // NOTE: This will trigger the "if (e.ncall > m_last_call)" save conditional on the first active expert. + // The commented out "if (idx == t->src[0]->ne[0] - 1) ++e.ncall;" doesn't work. + if (idx == 0) ++e.ncall; if (e.values.empty()) { e.values.resize(src1->ne[0]*n_as, 0); + e.counts.resize(src1->ne[0]*n_as, 0); // +++ } else if (e.values.size() != (size_t)src1->ne[0]*n_as) { fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as); @@ -153,6 +155,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * for (int j = 0; j < (int)src1->ne[0]; ++j) { e.values[e_start + j] += x[j]*x[j]; + e.counts[e_start + j]++; } } } @@ -170,6 +173,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * auto& e = m_stats[wname]; if (e.values.empty()) { e.values.resize(src1->ne[0], 0); + e.counts.resize(src1->ne[0], 0); } else if (e.values.size() != (size_t)src1->ne[0]) { fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]); @@ -183,6 +187,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * const float * x = data + row * src1->ne[0]; for (int j = 0; j < (int)src1->ne[0]; ++j) { e.values[j] += x[j]*x[j]; + e.counts[j]++; } } if (e.ncall > m_last_call) { @@ -222,7 +227,13 @@ void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) co out.write((const char *) &p.second.ncall, sizeof(p.second.ncall)); int nval = p.second.values.size(); out.write((const char *) &nval, sizeof(nval)); - if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float)); + if (nval > 0) { + std::vector tmp(nval); + for (int i = 0; i < nval; i++) { + tmp[i] = (p.second.values[i] / static_cast(p.second.counts[i])) * static_cast(p.second.ncall); + } + out.write((const char*)tmp.data(), nval*sizeof(float)) + } } // Write the number of call the matrix was computed with From 5e98f44e2f6e8fdd2187a8f3de0bb551b563d26a Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Mon, 6 May 2024 11:45:42 +0100 Subject: [PATCH 2/6] Fixed missing idx variable --- examples/imatrix/imatrix.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 079e4ab3d02af..4b88d88337c1b 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -124,11 +124,11 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // We select top-k experts, the number of calls for the expert tensors will be k times larger. // NOTE: This will trigger the "if (e.ncall > m_last_call)" save conditional on the first active expert. // The commented out "if (idx == t->src[0]->ne[0] - 1) ++e.ncall;" doesn't work. - if (idx == 0) ++e.ncall; + if (((int32_t *) t->op_params)[0] == 0) ++e.ncall; if (e.values.empty()) { e.values.resize(src1->ne[0]*n_as, 0); - e.counts.resize(src1->ne[0]*n_as, 0); // +++ + e.counts.resize(src1->ne[0]*n_as, 0); } else if (e.values.size() != (size_t)src1->ne[0]*n_as) { fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as); @@ -232,7 +232,7 @@ void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) co for (int i = 0; i < nval; i++) { tmp[i] = (p.second.values[i] / static_cast(p.second.counts[i])) * static_cast(p.second.ncall); } - out.write((const char*)tmp.data(), nval*sizeof(float)) + out.write((const char*)tmp.data(), nval*sizeof(float)); } } From c6d2bbbb17dba1d4213324384e6fd910c0a6c052 Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Mon, 6 May 2024 14:43:14 +0100 Subject: [PATCH 3/6] Unconditionally increment ncall Co-authored-by: slaren --- examples/imatrix/imatrix.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 4b88d88337c1b..ca0c0bbf6b359 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -121,10 +121,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * auto & e = m_stats[wname]; - // We select top-k experts, the number of calls for the expert tensors will be k times larger. - // NOTE: This will trigger the "if (e.ncall > m_last_call)" save conditional on the first active expert. - // The commented out "if (idx == t->src[0]->ne[0] - 1) ++e.ncall;" doesn't work. - if (((int32_t *) t->op_params)[0] == 0) ++e.ncall; + ++e.ncall; if (e.values.empty()) { e.values.resize(src1->ne[0]*n_as, 0); From 6674e79c71446476366092a00b2bdf69b9da9acf Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Tue, 7 May 2024 18:01:58 +0100 Subject: [PATCH 4/6] Fixed 2 bugs in save_imatrix() - Fixed segfault bug because the counts vector needed to be created. - Fixed pre-existing bug didn't actually add to the counts for "--combine" option. --- examples/imatrix/imatrix.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index ca0c0bbf6b359..9a8236ab51a3a 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -278,13 +278,27 @@ bool IMatrixCollector::load_imatrix(const char * imatrix_file, std::unordered_ma imatrix_data = {}; return false; } - e.values.resize(nval); - in.read((char*)e.values.data(), nval*sizeof(float)); + + // When re-called from load_imatrix() with add set, this will already be created. + if (e.values.empty()) { + e.values.resize(nval, 0); + e.counts.resize(nval, 0); + } + + std::vector tmp(nval); + in.read((char*)tmp.data(), nval*sizeof(float)); if (in.fail()) { printf("%s: failed reading data for entry %d\n",__func__,i); imatrix_data = {}; return false; } + + // Recreate the state as expected by save_imatrix(), and corerct for weighted sum. + for (int i = 0; i < nval; i++) { + e.values[i] += tmp[i]; + e.counts[i] += ncall; + } + e.ncall = ncall; } return true; From cb3d9bad5c054af4f9265bcd856402eefc875cb1 Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Tue, 7 May 2024 18:13:09 +0100 Subject: [PATCH 5/6] ncall needs summing too --- examples/imatrix/imatrix.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 9a8236ab51a3a..5f56a68b77d0e 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -297,9 +297,9 @@ bool IMatrixCollector::load_imatrix(const char * imatrix_file, std::unordered_ma for (int i = 0; i < nval; i++) { e.values[i] += tmp[i]; e.counts[i] += ncall; - } + } + e.ncall += ncall; - e.ncall = ncall; } return true; } From 6e05492fa6814e9fac45311cc26afd7965b4eda1 Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Tue, 7 May 2024 18:45:05 +0100 Subject: [PATCH 6/6] Trailing whitespace --- examples/imatrix/imatrix.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 5f56a68b77d0e..82b19fc4f3bae 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -284,7 +284,7 @@ bool IMatrixCollector::load_imatrix(const char * imatrix_file, std::unordered_ma e.values.resize(nval, 0); e.counts.resize(nval, 0); } - + std::vector tmp(nval); in.read((char*)tmp.data(), nval*sizeof(float)); if (in.fail()) { @@ -297,9 +297,9 @@ bool IMatrixCollector::load_imatrix(const char * imatrix_file, std::unordered_ma for (int i = 0; i < nval; i++) { e.values[i] += tmp[i]; e.counts[i] += ncall; - } + } e.ncall += ncall; - + } return true; }