We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3b8f1ec commit c71bfd7Copy full SHA for c71bfd7
llama.cpp
@@ -4592,7 +4592,7 @@ static bool llm_load_tensors(
4592
size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
4593
4594
// for moe merged tensors
4595
- ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer;
+ ctx_size += ggml_tensor_overhead()*n_layer*3;
4596
4597
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
4598
for (auto & it : buft_layer_count) {
0 commit comments