Skip to content

Commit 9e75c49

Browse files
committed
Merge branch 'master' into xsn/private_batch_api
2 parents f0ffd81 + 2cc4a5e commit 9e75c49

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+2633
-814
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ lcov-report/
4545
tags
4646
.build/
4747
build*
48+
release
49+
debug
4850
!build-info.cmake
4951
!build-info.cpp.in
5052
!build-info.sh

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
4040
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
4141
42-
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code
42+
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` (from clang-tools v15+) to format the added code
4343
- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
4444
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
4545
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggml-org/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
219219
- [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly
220220
- [llama-swap](https://github.com/mostlygeek/llama-swap) - transparent proxy that adds automatic model switching with llama-server
221221
- [Kalavai](https://github.com/kalavai-net/kalavai-client) - Crowdsource end to end LLM deployment at any scale
222-
222+
- [llmaz](https://github.com/InftyAI/llmaz) - ☸️ Easy, advanced inference platform for large language models on Kubernetes.
223223
</details>
224224

225225
<details>

convert_hf_to_gguf.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
699699
if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5":
700700
# ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
701701
res = "deepseek-r1-qwen"
702+
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
703+
# ref: https://huggingface.co/Xenova/gpt-4o
704+
res = "gpt-4o"
702705

703706
if res is None:
704707
logger.warning("\n")
@@ -2512,7 +2515,8 @@ def set_gguf_parameters(self):
25122515
rms_eps = self.find_hparam(["rms_norm_eps"])
25132516
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
25142517
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
2515-
rope_dims = n_embd // n_head
2518+
rot_pct = self.hparams.get("partial_rotary_factor", 1.0)
2519+
rope_dims = int(rot_pct * n_embd) // n_head
25162520

25172521
self.gguf_writer.add_context_length(max_pos_embds)
25182522
self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
@@ -2536,7 +2540,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
25362540
n_head = self.find_hparam(["num_attention_heads", "n_head"])
25372541
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
25382542
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
2539-
rope_dims = n_embd // n_head
2543+
rot_pct = self.hparams.get("partial_rotary_factor", 1.0)
2544+
rope_dims = int(rot_pct * n_embd) // n_head
25402545

25412546
# write rope scaling for long context (128k) model
25422547
rope_scaling = self.find_hparam(['rope_scaling'], True)
@@ -2565,7 +2570,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
25652570
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
25662571

25672572
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
2568-
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
2573+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}. long_factors = {len(long_factors)}, short_factors = {len(short_factors)}.')
25692574

25702575
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
25712576
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))

convert_hf_to_gguf_update.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class TOKENIZER_TYPE(IntEnum):
109109
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
110110
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112+
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
112113
]
113114

114115

@@ -131,6 +132,10 @@ def download_model(model):
131132

132133
files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
133134

135+
if name == "gpt-4o":
136+
# Xenova/gpt-4o is tokenizer-only, it does not contain config.json
137+
files = ["tokenizer.json", "tokenizer_config.json"]
138+
134139
if tokt == TOKENIZER_TYPE.SPM:
135140
files.append("tokenizer.model")
136141

0 commit comments

Comments
 (0)