From b5b73d14b30f5841b16ee75685233e36a57ed8f6 Mon Sep 17 00:00:00 2001 From: Nico Bosshard Date: Sun, 16 Feb 2025 17:03:57 +0100 Subject: [PATCH 1/2] imatrix : Allow partial data in imatrix --- examples/imatrix/imatrix.cpp | 39 ++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 4edc0bfacf1..6828a7acb75 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -32,6 +32,7 @@ struct Stats { std::vector values; std::vector counts; int ncall = 0; + int n_as = 1; }; class IMatrixCollector { @@ -124,11 +125,15 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * if (e.values.empty()) { e.values.resize(src1->ne[0]*n_as, 0); e.counts.resize(src1->ne[0]*n_as, 0); + e.n_as = n_as; } else if (e.values.size() != (size_t)src1->ne[0]*n_as) { LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as); exit(1); //GGML_ABORT("fatal error"); } + else if (e.n_as != n_as) { + LOG_ERR("%s: inconsistent n_as for %s (%d vs %d)\n", __func__, wname.c_str(), e.n_as, n_as); + } LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type); // loop over all possible experts, regardless if they are used or not in the batch for (int ex = 0; ex < n_as; ++ex) { @@ -247,8 +252,38 @@ void IMatrixCollector::save_imatrix(int ncall) const { } if (n_zeros > 0) { - LOG_WRN("%s: entry '%40s' has partial data (%.2f%%) - skipping\n", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all); - continue; + LOG_WRN("%s: entry '%40s' has partial data (%.2f%%)\n", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all); + bool store_it = false; + if (kv.second.n_as > 1) { + int n_per_expert = n_all / kv.second.n_as; + std::vector bad_experts; + bad_experts.reserve(kv.second.n_as); + for (int i = 0; i < kv.second.n_as; ++i) { + auto counts = kv.second.counts.data() + i*n_per_expert; + int nz_i = 0; + for (int j = 0; j < n_per_expert; ++j) { + if (counts[j] == 0) ++nz_i; + } + if (nz_i > 0) bad_experts.push_back(i); + } + LOG_WRN("%s: %d out of %d experts are missing data\n", __func__, int(bad_experts.size()), kv.second.n_as); + if (bad_experts.size() < round(kv.second.n_as * 0.05)) { + LOG_WRN("%s: %d out of %d experts are missing data - storing but be aware\n", __func__, int(bad_experts.size()), kv.second.n_as); + store_it = true; + for (auto i : bad_experts) { + auto counts = (int *)kv.second.counts.data() + i*n_per_expert; + auto values = (float *)kv.second.values.data() + i*n_per_expert; + for (int j = 0; j < n_per_expert; ++j) { + counts[j] = 1; + values[j] = 1; + } + } + } + } + if (!store_it) { + LOG_WRN("%s: Skipping expert with missing data!\n", __func__); + continue; + } } n_entries++; From 9640acc14a7e357b6a81ede11974efab70982986 Mon Sep 17 00:00:00 2001 From: Nico Bosshard Date: Sun, 16 Feb 2025 17:21:14 +0100 Subject: [PATCH 2/2] imatrix : Fixed compile warnings in allow partial data in imatrix code --- examples/imatrix/imatrix.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 6828a7acb75..91da45d2b75 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -271,8 +271,8 @@ void IMatrixCollector::save_imatrix(int ncall) const { LOG_WRN("%s: %d out of %d experts are missing data - storing but be aware\n", __func__, int(bad_experts.size()), kv.second.n_as); store_it = true; for (auto i : bad_experts) { - auto counts = (int *)kv.second.counts.data() + i*n_per_expert; - auto values = (float *)kv.second.values.data() + i*n_per_expert; + auto counts = const_cast(kv.second.counts.data()) + i * n_per_expert; + auto values = const_cast(kv.second.values.data()) + i * n_per_expert; for (int j = 0; j < n_per_expert; ++j) { counts[j] = 1; values[j] = 1;