Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package org.beehive.gpullama3.model.loader;

import org.beehive.gpullama3.LlamaApp;
import org.beehive.gpullama3.Options;
import org.beehive.gpullama3.auxiliary.Timer;
import org.beehive.gpullama3.core.model.GGMLType;
import org.beehive.gpullama3.core.model.GGUF;
import org.beehive.gpullama3.core.model.tensor.ArrayFloatTensor;
Expand All @@ -21,6 +19,7 @@
import org.beehive.gpullama3.tokenizer.impl.Qwen3Tokenizer;
import org.beehive.gpullama3.tokenizer.impl.Tokenizer;
import org.beehive.gpullama3.tokenizer.vocabulary.Vocabulary;
import org.beehive.gpullama3.tornadovm.TornadoVMMasterPlan;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;

import java.io.IOException;
Expand All @@ -40,11 +39,9 @@ public Model loadModel() {
Map<String, Object> metadata = gguf.getMetadata();
String basename = (String) metadata.get("general.basename");

String modelName = "DeepSeek-R1-Distill-Qwen".equals(basename)
? "DeepSeek-R1-Distill-Qwen"
: "Qwen2.5";
String modelName = "DeepSeek-R1-Distill-Qwen".equals(basename) ? "DeepSeek-R1-Distill-Qwen" : "Qwen2.5";
Copy link

Copilot AI Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] This line exceeds typical line length conventions (appears to be over 100 characters). Consider breaking the ternary operator across multiple lines for better readability.

Suggested change
String modelName = "DeepSeek-R1-Distill-Qwen".equals(basename) ? "DeepSeek-R1-Distill-Qwen" : "Qwen2.5";
String modelName = "DeepSeek-R1-Distill-Qwen".equals(basename)
? "DeepSeek-R1-Distill-Qwen"
: "Qwen2.5";

Copilot uses AI. Check for mistakes.

try (var ignored = Timer.log("Load " + modelName + " model")) {
try {
// reuse method of Qwen3
Vocabulary vocabulary = loadQwen3Vocabulary(metadata);
boolean isDeepSeekR1DistillQwen = "DeepSeek-R1-Distill-Qwen".equals(metadata.get("general.basename"));
Expand All @@ -55,11 +52,8 @@ public Model loadModel() {
contextLength = modelContextLength;
}

int numberOfKeyValueHeads = metadata.containsKey("qwen2.attention.head_count_kv")
? (int) metadata.get("qwen2.attention.head_count_kv")
: (int) metadata.get("qwen2.attention.head_count");
Qwen2Configuration config = new Qwen2Configuration(
(int) metadata.get("qwen2.embedding_length"), // dim
int numberOfKeyValueHeads = metadata.containsKey("qwen2.attention.head_count_kv") ? (int) metadata.get("qwen2.attention.head_count_kv") : (int) metadata.get("qwen2.attention.head_count");
Copy link

Copilot AI Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] This line is excessively long (over 150 characters) and reduces readability. Consider breaking it into multiple lines or extracting the conditional logic into a separate variable.

Suggested change
int numberOfKeyValueHeads = metadata.containsKey("qwen2.attention.head_count_kv") ? (int) metadata.get("qwen2.attention.head_count_kv") : (int) metadata.get("qwen2.attention.head_count");
int numberOfKeyValueHeads;
if (metadata.containsKey("qwen2.attention.head_count_kv")) {
numberOfKeyValueHeads = (int) metadata.get("qwen2.attention.head_count_kv");
} else {
numberOfKeyValueHeads = (int) metadata.get("qwen2.attention.head_count");
}

Copilot uses AI. Check for mistakes.
Qwen2Configuration config = new Qwen2Configuration((int) metadata.get("qwen2.embedding_length"), // dim
(int) metadata.get("qwen2.feed_forward_length"), // hiddendim
(int) metadata.get("qwen2.block_count"), // numberOfLayers
(int) metadata.get("qwen2.attention.head_count"), // numberOfHeads
Expand All @@ -68,22 +62,17 @@ public Model loadModel() {
numberOfKeyValueHeads, // numberOfHeadsKey
numberOfKeyValueHeads, // numberOfHeadsValue

vocabulary.size(),
modelContextLength, contextLength,
false,
(float) metadata.get("qwen2.attention.layer_norm_rms_epsilon"),
(float) metadata.get("qwen2.rope.freq_base")
);
vocabulary.size(), modelContextLength, contextLength, false, (float) metadata.get("qwen2.attention.layer_norm_rms_epsilon"), (float) metadata.get("qwen2.rope.freq_base"));
Copy link

Copilot AI Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] This constructor call with multiple parameters on a single line is difficult to read and maintain. Consider formatting each parameter on a separate line for better readability.

Suggested change
vocabulary.size(), modelContextLength, contextLength, false, (float) metadata.get("qwen2.attention.layer_norm_rms_epsilon"), (float) metadata.get("qwen2.rope.freq_base"));
vocabulary.size(),
modelContextLength,
contextLength,
false,
(float) metadata.get("qwen2.attention.layer_norm_rms_epsilon"),
(float) metadata.get("qwen2.rope.freq_base")
);

Copilot uses AI. Check for mistakes.

Weights weights = null;
if (loadWeights) {
Map<String, GGMLTensorEntry> tensorEntries = GGUF.loadTensors(fileChannel, gguf.getTensorDataOffset(), gguf.getTensorInfos());
weights = loadWeights(tensorEntries, config);
}
// Qwen2.5-Coder uses <|endoftext|> as stop-token.
ChatTokens chatTokens = isDeepSeekR1DistillQwen ?
new ChatTokens( "<|begin▁of▁sentence|>", "", "", "<|end▁of▁sentence|>", "") :
new ChatTokens( "<|im_start|>", "<|im_end|>", "", "<|end_of_text|>", "<|endoftext|>");
ChatTokens chatTokens = isDeepSeekR1DistillQwen
? new ChatTokens("<|begin▁of▁sentence|>", "", "", "<|end▁of▁sentence|>", "")
: new ChatTokens("<|im_start|>", "<|im_end|>", "", "<|end_of_text|>", "<|endoftext|>");
return new Qwen2(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens));
} catch (IOException e) {
throw new RuntimeException(e);
Expand All @@ -108,7 +97,9 @@ public Weights loadWeights(Map<String, GGMLTensorEntry> tensorEntries, Configura
GGMLTensorEntry outputWeight = tensorEntries.getOrDefault("output.weight", tokenEmbeddings);

if (Options.getDefaultOptions().useTornadovm()) {
System.out.println("Loading model weights in TornadoVM format (loading " + outputWeight.ggmlType() + " -> " + GGMLType.F16 + ")");
if (TornadoVMMasterPlan.ENABLE_TORNADOVM_INIT_TIME) {
System.out.println("Loading model weights in TornadoVM format (loading " + outputWeight.ggmlType() + " -> " + GGMLType.F16 + ")");
}
return createTornadoVMWeights(tensorEntries, config, ropeFreqs, tokenEmbeddings, outputWeight);
} else {
return createStandardWeights(tensorEntries, config, ropeFreqs, tokenEmbeddings, outputWeight);
Expand Down