|
1 | 1 | #include "ggml.h"
|
2 | 2 | #include "llama.h"
|
| 3 | + |
3 | 4 | #include <unordered_map>
|
4 | 5 | #include <vector>
|
5 | 6 | #include <cassert>
|
@@ -502,7 +503,7 @@ bool is_ggml_file(const char *filename) {
|
502 | 503 | return false;
|
503 | 504 | }
|
504 | 505 | uint32_t magic = file.read_u32();
|
505 |
| - return magic == LLAMA_FILE_MAGIC; |
| 506 | + return magic == GGUF_MAGIC; |
506 | 507 | }
|
507 | 508 |
|
508 | 509 | void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) {
|
@@ -590,75 +591,80 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
|
590 | 591 | if (file.fp == NULL) {
|
591 | 592 | return;
|
592 | 593 | }
|
593 |
| - // write_magic |
594 |
| - file.write_u32(LLAMA_FILE_MAGIC); // magic |
595 |
| - file.write_u32(LLAMA_FILE_VERSION); // version |
596 |
| - // write_hparams |
597 |
| - file.write_u32(model->hparams.n_vocab); |
598 |
| - file.write_u32(model->hparams.n_embd); |
599 |
| - file.write_u32(model->hparams.n_mult); |
600 |
| - file.write_u32(model->hparams.n_head); |
601 |
| - file.write_u32(model->hparams.n_layer); |
602 |
| - file.write_u32(model->hparams.n_rot); |
603 |
| - file.write_u32(LLAMA_FTYPE_ALL_F32); |
604 |
| - |
605 |
| - // write_vocab - for now we are just writing the existing BPE voc. assuming karpathy's vocabulary is the same. idk. |
606 |
| - uint32_t n_vocab = model->hparams.n_vocab; |
607 |
| - for (uint32_t i = 0; i < n_vocab; i++) { |
608 |
| - const auto & token_score = vocab->id_to_token.at(i); |
609 |
| - file.write_u32((uint32_t) token_score.tok.size()); |
610 |
| - file.write_raw(token_score.tok.data(), token_score.tok.size()); |
611 |
| - file.write_raw(&token_score.score, sizeof(token_score.score)); |
612 |
| - } |
613 |
| - |
614 |
| - // stuff AK weights into GG weights one by one. |
615 |
| - // w->token_embedding_table -> model->tok_embeddings |
616 |
| - // float* -> struct ggml_tensor |
617 |
| - stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table); |
618 |
| - stuff_karpathy_weights_into_gg(model->output, w->token_embedding_table); |
619 |
| - |
620 |
| - stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight); |
621 |
| - //print_row(model->norm, 0); |
622 |
| - |
623 |
| - // for rms-att-weight |
624 |
| - int row_length = model->hparams.n_embd; |
625 |
| - const auto & hparams = model->hparams; |
626 |
| - //int n_ff = model->hparams.n_embd; |
627 |
| - int n_ff = get_n_ff(&hparams); |
628 | 594 |
|
629 |
| - for (uint32_t i = 0; i < model->hparams.n_layer; ++i){ |
630 |
| - auto & layer = model->layers[i]; |
631 |
| - // 1d |
632 |
| - stuff_karpathy_weights_into_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]); |
633 |
| - stuff_karpathy_weights_into_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]); |
634 |
| - |
635 |
| - // from 3d matrix layer x dim x dim to 2d matrix dim x dim |
636 |
| - stuff_karpathy_weights_into_gg(layer.wq , &w->wq[i*row_length*row_length]); |
637 |
| - stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length*row_length]); |
638 |
| - stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length*row_length]); |
639 |
| - stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length*row_length]); |
640 |
| - |
641 |
| - stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length*n_ff]); |
642 |
| - stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff*row_length]); |
643 |
| - stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length*n_ff]); |
644 |
| - } |
645 |
| - // write tensors |
646 |
| - write_tensor(&file, model->tok_embeddings); |
647 |
| - write_tensor(&file, model->norm); |
648 |
| - write_tensor(&file, model->output); // ? |
649 |
| - for (uint32_t i = 0; i < model->hparams.n_layer; ++i) { |
650 |
| - auto & layer = model->layers[i]; |
651 |
| - |
652 |
| - write_tensor(&file, layer.attention_norm); |
653 |
| - write_tensor(&file, layer.wq); |
654 |
| - write_tensor(&file, layer.wk); |
655 |
| - write_tensor(&file, layer.wv); |
656 |
| - write_tensor(&file, layer.wo); |
657 |
| - write_tensor(&file, layer.ffn_norm); |
658 |
| - write_tensor(&file, layer.w1); |
659 |
| - write_tensor(&file, layer.w2); |
660 |
| - write_tensor(&file, layer.w3); |
661 |
| - } |
| 595 | +#pragma message("TODO: implement file saving using gguf") |
| 596 | + (void) vocab; |
| 597 | + (void) model; |
| 598 | + (void) w; |
| 599 | +// // write_magic |
| 600 | +// file.write_u32(LLAMA_FILE_MAGIC); // magic |
| 601 | +// file.write_u32(LLAMA_FILE_VERSION); // version |
| 602 | +// // write_hparams |
| 603 | +// file.write_u32(model->hparams.n_vocab); |
| 604 | +// file.write_u32(model->hparams.n_embd); |
| 605 | +// file.write_u32(model->hparams.n_mult); |
| 606 | +// file.write_u32(model->hparams.n_head); |
| 607 | +// file.write_u32(model->hparams.n_layer); |
| 608 | +// file.write_u32(model->hparams.n_rot); |
| 609 | +// file.write_u32(LLAMA_FTYPE_ALL_F32); |
| 610 | +// |
| 611 | +// // write_vocab - for now we are just writing the existing BPE voc. assuming karpathy's vocabulary is the same. idk. |
| 612 | +// uint32_t n_vocab = model->hparams.n_vocab; |
| 613 | +// for (uint32_t i = 0; i < n_vocab; i++) { |
| 614 | +// const auto & token_score = vocab->id_to_token.at(i); |
| 615 | +// file.write_u32((uint32_t) token_score.tok.size()); |
| 616 | +// file.write_raw(token_score.tok.data(), token_score.tok.size()); |
| 617 | +// file.write_raw(&token_score.score, sizeof(token_score.score)); |
| 618 | +// } |
| 619 | +// |
| 620 | +// // stuff AK weights into GG weights one by one. |
| 621 | +// // w->token_embedding_table -> model->tok_embeddings |
| 622 | +// // float* -> struct ggml_tensor |
| 623 | +// stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table); |
| 624 | +// stuff_karpathy_weights_into_gg(model->output, w->token_embedding_table); |
| 625 | +// |
| 626 | +// stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight); |
| 627 | +// //print_row(model->norm, 0); |
| 628 | +// |
| 629 | +// // for rms-att-weight |
| 630 | +// int row_length = model->hparams.n_embd; |
| 631 | +// const auto & hparams = model->hparams; |
| 632 | +// //int n_ff = model->hparams.n_embd; |
| 633 | +// int n_ff = get_n_ff(&hparams); |
| 634 | +// |
| 635 | +// for (uint32_t i = 0; i < model->hparams.n_layer; ++i){ |
| 636 | +// auto & layer = model->layers[i]; |
| 637 | +// // 1d |
| 638 | +// stuff_karpathy_weights_into_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]); |
| 639 | +// stuff_karpathy_weights_into_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]); |
| 640 | +// |
| 641 | +// // from 3d matrix layer x dim x dim to 2d matrix dim x dim |
| 642 | +// stuff_karpathy_weights_into_gg(layer.wq , &w->wq[i*row_length*row_length]); |
| 643 | +// stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length*row_length]); |
| 644 | +// stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length*row_length]); |
| 645 | +// stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length*row_length]); |
| 646 | +// |
| 647 | +// stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length*n_ff]); |
| 648 | +// stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff*row_length]); |
| 649 | +// stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length*n_ff]); |
| 650 | +// } |
| 651 | +// // write tensors |
| 652 | +// write_tensor(&file, model->tok_embeddings); |
| 653 | +// write_tensor(&file, model->norm); |
| 654 | +// write_tensor(&file, model->output); // ? |
| 655 | +// for (uint32_t i = 0; i < model->hparams.n_layer; ++i) { |
| 656 | +// auto & layer = model->layers[i]; |
| 657 | +// |
| 658 | +// write_tensor(&file, layer.attention_norm); |
| 659 | +// write_tensor(&file, layer.wq); |
| 660 | +// write_tensor(&file, layer.wk); |
| 661 | +// write_tensor(&file, layer.wv); |
| 662 | +// write_tensor(&file, layer.wo); |
| 663 | +// write_tensor(&file, layer.ffn_norm); |
| 664 | +// write_tensor(&file, layer.w1); |
| 665 | +// write_tensor(&file, layer.w2); |
| 666 | +// write_tensor(&file, layer.w3); |
| 667 | +// } |
662 | 668 | }
|
663 | 669 |
|
664 | 670 | struct train_params get_default_train_params() {
|
|
0 commit comments