|
49 | 49 |
|
50 | 50 | ADDED_TOKENS_FILE = 'added_tokens.json'
|
51 | 51 | FAST_TOKENIZER_FILE = 'tokenizer.json'
|
| 52 | +is_llama3_model = True |
52 | 53 |
|
53 | 54 | #
|
54 | 55 | # data types
|
@@ -821,6 +822,9 @@ def convert(name: str) -> LazyTensor:
|
821 | 822 | else:
|
822 | 823 | # split by rows
|
823 | 824 | axis = 0
|
| 825 | + global is_llama3_model |
| 826 | + if name.startswith('tok_embeddings.') and is_llama3_model: |
| 827 | + axis = 0 |
824 | 828 | concatenated_shape = list(lazy_tensors[0].shape)
|
825 | 829 | concatenated_shape[axis] = sum(tensor.shape[axis] for tensor in lazy_tensors)
|
826 | 830 |
|
@@ -1194,6 +1198,12 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
|
1194 | 1198 | tokens, scores, toktypes = self.extract_vocabulary_from_model(vocab)
|
1195 | 1199 |
|
1196 | 1200 | # Add extracted token information for model conversion
|
| 1201 | + # Tokenizer for LLaMA 3 |
| 1202 | + # Source: trust me bro |
| 1203 | + global is_llama3_model |
| 1204 | + if is_llama3_model: |
| 1205 | + self.gguf.add_tokenizer_model("gpt2") |
| 1206 | + self.gguf.add_tokenizer_pre("llama-bpe") |
1197 | 1207 | self.gguf.add_token_list(tokens)
|
1198 | 1208 | self.gguf.add_token_scores(scores)
|
1199 | 1209 | self.gguf.add_token_types(toktypes)
|
@@ -1662,10 +1672,14 @@ def main(args_in: list[str] | None = None) -> None:
|
1662 | 1672 | }[args.outtype]
|
1663 | 1673 |
|
1664 | 1674 | logger.info(f"params = {params}")
|
1665 |
| - |
1666 |
| - |
1667 |
| - import convert_llama_weights_to_hf |
1668 |
| - convert_llama_weights_to_hf.write_tokenizer(args.model, os.path.join(args.model, "tokenizer.model"), 3) |
| 1675 | + #TODO: add more bandaids for llama 3 detection |
| 1676 | + try: |
| 1677 | + global is_llama3_model |
| 1678 | + import convert_llama_weights_to_hf |
| 1679 | + convert_llama_weights_to_hf.write_tokenizer(args.model, os.path.join(args.model, "tokenizer.model"), 3) |
| 1680 | + is_llama3_model = True |
| 1681 | + except: |
| 1682 | + pass |
1669 | 1683 |
|
1670 | 1684 |
|
1671 | 1685 | model_parent_path = model_plus.paths[0].parent
|
|
0 commit comments