@@ -56,13 +56,31 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
5656 const struct ggml_tensor * src0 = t->src [0 ];
5757 const struct ggml_tensor * src1 = t->src [1 ];
5858
59+ std::string wname;
60+ {
61+ // remove any prefix and suffixes from the name
62+ // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
63+ const char * p = strchr (src0->name , ' #' );
64+ if (p != NULL ) {
65+ p = p + 1 ;
66+ const char * q = strchr (p, ' #' );
67+ if (q != NULL ) {
68+ wname = std::string (p, q - p);
69+ } else {
70+ wname = p;
71+ }
72+ } else {
73+ wname = src0->name ;
74+ }
75+ }
76+
5977 // when ask is true, the scheduler wants to know if we are interested in data from this tensor
6078 // if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
6179 if (ask) {
6280 if (t->op == GGML_OP_MUL_MAT_ID) return true ; // collect all indirect matrix multiplications
6381 if (t->op != GGML_OP_MUL_MAT) return false ;
6482 if (src1->ne [1 ] < 16 || src1->type != GGML_TYPE_F32) return false ;
65- if (!(strncmp (src0-> name , " blk. " , 4 ) == 0 || (m_params.collect_output_weight && strcmp (src0-> name , " output.weight" ) == 0 ))) return false ;
83+ if (!(wname. substr ( 0 , 4 ) == " blk. " || (m_params.collect_output_weight && wname == " output.weight" ))) return false ;
6684 return true ;
6785 }
6886
@@ -94,20 +112,20 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
94112 // this is necessary to guarantee equal number of "ncall" for each tensor
95113 for (int ex = 0 ; ex < n_as; ++ex) {
96114 src0 = t->src [2 + ex];
97- auto & e = m_stats[src0-> name ];
115+ auto & e = m_stats[wname ];
98116 if (e.values .empty ()) {
99117 e.values .resize (src1->ne [0 ], 0 );
100118 }
101119 else if (e.values .size () != (size_t )src1->ne [0 ]) {
102- fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , src0-> name , (int )e.values .size (), (int )src1->ne [0 ]);
120+ fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , wname. c_str () , (int )e.values .size (), (int )src1->ne [0 ]);
103121 exit (1 ); // GGML_ASSERT(false);
104122 }
105123 // NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger
106124 // using the following line, we can correct for that if needed
107125 // if (idx == t->src[0]->ne[0] - 1) ++e.ncall;
108126 ++e.ncall ;
109127 if (m_params.verbosity > 1 ) {
110- printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, src0-> name , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
128+ printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, wname. c_str () , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
111129 }
112130 for (int row = 0 ; row < (int )src1->ne [1 ]; ++row) {
113131 const int excur = m_ids[row*n_as + idx];
@@ -129,17 +147,17 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
129147 }
130148 }
131149 } else {
132- auto & e = m_stats[src0-> name ];
150+ auto & e = m_stats[wname ];
133151 if (e.values .empty ()) {
134152 e.values .resize (src1->ne [0 ], 0 );
135153 }
136154 else if (e.values .size () != (size_t )src1->ne [0 ]) {
137- fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , src0-> name , (int )e.values .size (), (int )src1->ne [0 ]);
155+ fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , wname. c_str () , (int )e.values .size (), (int )src1->ne [0 ]);
138156 exit (1 ); // GGML_ASSERT(false);
139157 }
140158 ++e.ncall ;
141159 if (m_params.verbosity > 1 ) {
142- printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, src0-> name , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
160+ printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, wname. c_str () , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
143161 }
144162 for (int row = 0 ; row < (int )src1->ne [1 ]; ++row) {
145163 const float * x = data + row * src1->ne [0 ];
0 commit comments