@@ -352,13 +352,13 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
352
352
std::vector<int32_t > extremes;
353
353
extremes.resize (n_layers);
354
354
std::fill (extremes.begin (), extremes.end (), 0 );
355
- if (anti_mode) {
356
- // No pointing in starting with first/last layer disabled.
357
- skip_types[0 ] = 15 ;
358
- skip_types[n_layers - 1 ] = 15 ;
359
- skips.push_back (0 ); skips.push_back (0 + n_layers);
360
- skips.push_back (n_layers - 1 ); skips.push_back (n_layers - 1 + n_layers);
361
- }
355
+ // if (anti_mode) {
356
+ // // No point in starting with first/last layer disabled.
357
+ // skip_types[0] = 15;
358
+ // skip_types[n_layers - 1] = 15;
359
+ // skips.push_back(0); skips.push_back(0 + n_layers);
360
+ // skips.push_back(n_layers - 1); skips.push_back(n_layers - 1 + n_layers);
361
+ // }
362
362
int32_t curr_best_layer = -1 , curr_best_type = 0 ;
363
363
double curr_best_ppl = -1 , ref_ppl = -1 ;
364
364
const int32_t mask = anti_mode ? 3 : 0 ;
@@ -389,7 +389,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
389
389
}
390
390
if (skip_layer >= n_layers) {
391
391
if (curr_best_layer == -1 ) break ;
392
- if (prune_target > 0 && pass_results.size () >= prune_target * 2 ) {
392
+ if (anti_mode || ( prune_target > 0 && pass_results.size () >= prune_target * 2 ) ) {
393
393
std::sort (pass_results.begin (), pass_results.end (),
394
394
[](const std::tuple<int32_t , int32_t , double > & a, const std::tuple<int32_t , int32_t , double > & b) {
395
395
if (anti_mode) return std::get<2 >(b) > std::get<2 >(a);
@@ -399,24 +399,26 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
399
399
const size_t num_prune = std::min (pass_results.size (), prune_target);
400
400
for (size_t temp = 0 , pruned = 0 ; temp < pass_results.size (); temp++) {
401
401
int32_t lidx = std::get<0 >(pass_results[temp]);
402
- if (lidx == curr_best_layer && std::get<1 >(pass_results[temp]) == curr_best_type) continue ;
403
- extremes[lidx] |= std::get<1 >(pass_results[temp]);
404
- printf (" \n Prune[%zu]: %d (%d) - %.2f\n " , pruned + 1 , lidx,
405
- std::get<1 >(pass_results[temp]), std::get<2 >(pass_results[temp]));
406
402
if (anti_mode) {
407
403
skip_types[lidx] |= std::get<1 >(pass_results[temp]);
408
404
skips.push_back (std::get<1 >(pass_results[temp]) == 1 ? lidx : lidx + n_layers);
409
405
}
406
+ if (lidx == curr_best_layer && std::get<1 >(pass_results[temp]) == curr_best_type) continue ;
407
+ extremes[lidx] |= std::get<1 >(pass_results[temp]);
408
+ printf (" \n Prune[%zu]: %d (%d) - %.2f\n " , pruned + 1 , lidx,
409
+ std::get<1 >(pass_results[temp]), std::get<2 >(pass_results[temp]));
410
410
if (++pruned >= num_prune) break ;
411
411
}
412
412
}
413
413
pass_results.clear ();
414
414
printf (" \n\n ADD %c%3d - ppl vs ref %.4f" ,
415
415
int (label[curr_best_type]), curr_best_layer,
416
416
curr_best_ppl - ref_ppl);
417
- if (!anti_mode && curr_best_ppl > ref_ppl * 1.75 ) break ;
418
- skip_types[curr_best_layer] += curr_best_type;
419
- skips.push_back (curr_best_type == 1 ? curr_best_layer : curr_best_layer + n_layers);
417
+ if (!anti_mode) {
418
+ if (curr_best_ppl > ref_ppl * 1.75 ) break ;
419
+ skip_types[curr_best_layer] += curr_best_type;
420
+ skips.push_back (curr_best_type == 1 ? curr_best_layer : curr_best_layer + n_layers);
421
+ }
420
422
curr_best_layer = -1 ;
421
423
curr_best_ppl = -1 ;
422
424
curr_best_type = 0 ;
0 commit comments