imatrix : fix wname for mul_mat_id ops (#6271)

* imatrix : fix wname for mul_mat_id ops

* also filter tensor names in mul_mat_id ops

---------

Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
Georgi Gerganov 2024-03-24 16:18:45 +02:00 committed by GitHub
parent 7aed0ffe68
commit a0e584defd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -50,17 +50,11 @@ private:
void keep_imatrix(int ncall) const; void keep_imatrix(int ncall) const;
}; };
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { // remove any prefix and suffixes from the name
GGML_UNUSED(user_data); // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
static std::string filter_tensor_name(const char * name) {
const struct ggml_tensor * src0 = t->src[0];
const struct ggml_tensor * src1 = t->src[1];
std::string wname; std::string wname;
{ const char * p = strchr(name, '#');
// remove any prefix and suffixes from the name
// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
const char * p = strchr(src0->name, '#');
if (p != NULL) { if (p != NULL) {
p = p + 1; p = p + 1;
const char * q = strchr(p, '#'); const char * q = strchr(p, '#');
@ -70,9 +64,17 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
wname = p; wname = p;
} }
} else { } else {
wname = src0->name; wname = name;
}
} }
return wname;
}
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
GGML_UNUSED(user_data);
const struct ggml_tensor * src0 = t->src[0];
const struct ggml_tensor * src1 = t->src[1];
std::string wname = filter_tensor_name(src0->name);
// when ask is true, the scheduler wants to know if we are interested in data from this tensor // when ask is true, the scheduler wants to know if we are interested in data from this tensor
// if we return true, a follow-up call will be made with ask=false in which we can do the actual collection // if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
@ -112,6 +114,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
// this is necessary to guarantee equal number of "ncall" for each tensor // this is necessary to guarantee equal number of "ncall" for each tensor
for (int ex = 0; ex < n_as; ++ex) { for (int ex = 0; ex < n_as; ++ex) {
src0 = t->src[2 + ex]; src0 = t->src[2 + ex];
wname = filter_tensor_name(src0->name);
auto& e = m_stats[wname]; auto& e = m_stats[wname];
if (e.values.empty()) { if (e.values.empty()) {
e.values.resize(src1->ne[0], 0); e.values.resize(src1->ne[0], 0);