@@ -50,29 +50,31 @@ class IMatrixCollector {
50
50
void keep_imatrix (int ncall) const ;
51
51
};
52
52
53
+ // remove any prefix and suffixes from the name
54
+ // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
55
+ static std::string filter_tensor_name (const char * name) {
56
+ std::string wname;
57
+ const char * p = strchr (name, ' #' );
58
+ if (p != NULL ) {
59
+ p = p + 1 ;
60
+ const char * q = strchr (p, ' #' );
61
+ if (q != NULL ) {
62
+ wname = std::string (p, q - p);
63
+ } else {
64
+ wname = p;
65
+ }
66
+ } else {
67
+ wname = name;
68
+ }
69
+ return wname;
70
+ }
71
+
53
72
bool IMatrixCollector::collect_imatrix (struct ggml_tensor * t, bool ask, void * user_data) {
54
73
GGML_UNUSED (user_data);
55
74
56
75
const struct ggml_tensor * src0 = t->src [0 ];
57
76
const struct ggml_tensor * src1 = t->src [1 ];
58
-
59
- std::string wname;
60
- {
61
- // remove any prefix and suffixes from the name
62
- // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
63
- const char * p = strchr (src0->name , ' #' );
64
- if (p != NULL ) {
65
- p = p + 1 ;
66
- const char * q = strchr (p, ' #' );
67
- if (q != NULL ) {
68
- wname = std::string (p, q - p);
69
- } else {
70
- wname = p;
71
- }
72
- } else {
73
- wname = src0->name ;
74
- }
75
- }
77
+ std::string wname = filter_tensor_name (src0->name );
76
78
77
79
// when ask is true, the scheduler wants to know if we are interested in data from this tensor
78
80
// if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
@@ -112,6 +114,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
112
114
// this is necessary to guarantee equal number of "ncall" for each tensor
113
115
for (int ex = 0 ; ex < n_as; ++ex) {
114
116
src0 = t->src [2 + ex];
117
+ wname = filter_tensor_name (src0->name );
115
118
auto & e = m_stats[wname];
116
119
if (e.values .empty ()) {
117
120
e.values .resize (src1->ne [0 ], 0 );
0 commit comments