@@ -365,47 +365,6 @@ function gg_run_open_llama_3b_v2 {
365
365
366
366
cat $OUT /${ci} -imatrix.log | grep " Final" >> $OUT /${ci} -imatrix-sum.log
367
367
368
- # lora
369
- function compare_ppl {
370
- qnt=" $1 "
371
- ppl1=$( echo " $2 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
372
- ppl2=$( echo " $3 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
373
-
374
- if [ $( echo " $ppl1 < $ppl2 " | bc) -eq 1 ]; then
375
- printf ' - %s @ %s (FAIL: %s > %s)\n' " $qnt " " $ppl " " $ppl1 " " $ppl2 "
376
- return 20
377
- fi
378
-
379
- printf ' - %s @ %s %s OK\n' " $qnt " " $ppl1 " " $ppl2 "
380
- return 0
381
- }
382
-
383
- path_lora=" ../models-mnt/open-llama/3B-v2/lora"
384
- path_shakespeare=" ../models-mnt/shakespeare"
385
-
386
- shakespeare=" ${path_shakespeare} /shakespeare.txt"
387
- lora_shakespeare=" ${path_lora} /ggml-adapter-model.bin"
388
-
389
- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json
390
- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin
391
- gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt
392
-
393
- python3 ../convert-lora-to-ggml.py ${path_lora}
394
-
395
- # f16
396
- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-f16.log
397
- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-f16.log
398
- compare_ppl " f16 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
399
-
400
- # q8_0
401
- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-q8_0.log
402
- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0.log
403
- compare_ppl " q8_0 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
404
-
405
- # q8_0 + f16 lora-base
406
- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log
407
- compare_ppl " q8_0 / f16 base shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
408
-
409
368
set +e
410
369
}
411
370
@@ -416,7 +375,6 @@ function gg_sum_open_llama_3b_v2 {
416
375
gg_printf ' - status: %s\n' " $( cat $OUT /${ci} .exit) "
417
376
gg_printf ' - perplexity:\n%s\n' " $( cat $OUT /${ci} -ppl.log) "
418
377
gg_printf ' - imatrix:\n```\n%s\n```\n' " $( cat $OUT /${ci} -imatrix-sum.log) "
419
- gg_printf ' - lora:\n%s\n' " $( cat $OUT /${ci} -lora-ppl.log) "
420
378
gg_printf ' - f16: \n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-f16.log) "
421
379
gg_printf ' - q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q8_0.log) "
422
380
gg_printf ' - q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_0.log) "
@@ -429,11 +387,6 @@ function gg_sum_open_llama_3b_v2 {
429
387
gg_printf ' - q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_k.log) "
430
388
gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
431
389
gg_printf ' - save-load-state: \n```\n%s\n```\n' " $( cat $OUT /${ci} -save-load-state.log) "
432
- gg_printf ' - shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-f16.log) "
433
- gg_printf ' - shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log) "
434
- gg_printf ' - shakespeare (q8_0):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log) "
435
- gg_printf ' - shakespeare (q8_0 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0.log) "
436
- gg_printf ' - shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log) "
437
390
}
438
391
439
392
# open_llama_7b_v2
@@ -549,48 +502,6 @@ function gg_run_open_llama_7b_v2 {
549
502
550
503
cat $OUT /${ci} -imatrix.log | grep " Final" >> $OUT /${ci} -imatrix-sum.log
551
504
552
- # lora
553
- function compare_ppl {
554
- qnt=" $1 "
555
- ppl1=$( echo " $2 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
556
- ppl2=$( echo " $3 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
557
-
558
- if [ $( echo " $ppl1 < $ppl2 " | bc) -eq 1 ]; then
559
- printf ' - %s @ %s (FAIL: %s > %s)\n' " $qnt " " $ppl " " $ppl1 " " $ppl2 "
560
- return 20
561
- fi
562
-
563
- printf ' - %s @ %s %s OK\n' " $qnt " " $ppl1 " " $ppl2 "
564
- return 0
565
- }
566
-
567
- path_lora=" ../models-mnt/open-llama/7B-v2/lora"
568
- path_shakespeare=" ../models-mnt/shakespeare"
569
-
570
- shakespeare=" ${path_shakespeare} /shakespeare.txt"
571
- lora_shakespeare=" ${path_lora} /ggml-adapter-model.bin"
572
-
573
- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_config.json
574
- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_model.bin
575
- gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/shakespeare.txt
576
-
577
- python3 ../convert-lora-to-ggml.py ${path_lora}
578
-
579
- # f16
580
- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-f16.log
581
- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-f16.log
582
- compare_ppl " f16 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
583
-
584
- # currently not supported by the CUDA backend
585
- # q8_0
586
- # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
587
- # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
588
- # compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
589
-
590
- # q8_0 + f16 lora-base
591
- # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
592
- # compare_ppl "q8_0 / f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
593
-
594
505
set +e
595
506
}
596
507
@@ -601,7 +512,6 @@ function gg_sum_open_llama_7b_v2 {
601
512
gg_printf ' - status: %s\n' " $( cat $OUT /${ci} .exit) "
602
513
gg_printf ' - perplexity:\n%s\n' " $( cat $OUT /${ci} -ppl.log) "
603
514
gg_printf ' - imatrix:\n```\n%s\n```\n' " $( cat $OUT /${ci} -imatrix-sum.log) "
604
- gg_printf ' - lora:\n%s\n' " $( cat $OUT /${ci} -lora-ppl.log) "
605
515
gg_printf ' - f16: \n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-f16.log) "
606
516
gg_printf ' - q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q8_0.log) "
607
517
gg_printf ' - q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_0.log) "
@@ -614,11 +524,6 @@ function gg_sum_open_llama_7b_v2 {
614
524
gg_printf ' - q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_k.log) "
615
525
gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
616
526
gg_printf ' - save-load-state: \n```\n%s\n```\n' " $( cat $OUT /${ci} -save-load-state.log) "
617
- gg_printf ' - shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-f16.log) "
618
- gg_printf ' - shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log) "
619
- # gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)"
620
- # gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)"
621
- # gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
622
527
}
623
528
624
529
# bge-small
0 commit comments