Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ llama-tornado --gpu --model beehive-llama-3.2-1b-instruct-fp16.gguf --prompt "te
@/home/mikepapadim/manchester/TornadoVM/bin/sdk/etc/exportLists/opencl-exports \
--add-modules ALL-SYSTEM,tornado.runtime,tornado.annotation,tornado.drivers.common,tornado.drivers.opencl \
-cp /home/mikepapadim/repos/gpu-llama3.java/target/gpu-llama3-1.0-SNAPSHOT.jar \
com.example.LlamaApp \
org.beehive.gpullama3.LlamaApp \
-m beehive-llama-3.2-1b-instruct-fp16.gguf \
--temperature 0.1 \
--top-p 0.95 \
Expand Down
2 changes: 1 addition & 1 deletion llama-tornado
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class LlamaRunner:
[
"-cp",
f"{self.llama_root}/target/gpu-llama3-1.0-SNAPSHOT.jar",
"com.example.LlamaApp",
"org.beehive.gpullama3.LlamaApp",
]
)
cmd.extend(module_config)
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.example</groupId>
<groupId>org.beehive.gpullama3</groupId>
<artifactId>gpu-llama3</artifactId>
<version>1.0-SNAPSHOT</version>

Expand Down Expand Up @@ -61,7 +61,7 @@
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.example.LlamaApp</mainClass>
<mainClass>org.beehive.gpullama3.LlamaApp</mainClass>
</transformer>
</transformers>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package com.example;
package org.beehive.gpullama3;

import com.example.aot.AOT;
import com.example.core.model.tensor.FloatTensor;
import com.example.inference.sampler.CategoricalSampler;
import com.example.inference.sampler.Sampler;
import com.example.inference.sampler.ToppSampler;
import com.example.model.loader.ModelLoader;
import com.example.model.Model;
import com.example.tornadovm.FloatArrayUtils;
import org.beehive.gpullama3.aot.AOT;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.inference.sampler.CategoricalSampler;
import org.beehive.gpullama3.inference.sampler.Sampler;
import org.beehive.gpullama3.inference.sampler.ToppSampler;
import org.beehive.gpullama3.model.loader.ModelLoader;
import org.beehive.gpullama3.model.Model;
import org.beehive.gpullama3.tornadovm.FloatArrayUtils;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;

import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example;
package org.beehive.gpullama3;

import java.io.PrintStream;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package com.example.aot;
package org.beehive.gpullama3.aot;

import com.example.auxiliary.Timer;
import com.example.core.model.GGUF;
import com.example.core.model.tensor.GGMLTensorEntry;
import com.example.model.loader.LlamaModelLoader;
import com.example.model.Model;
import com.example.Options;
import com.example.model.format.LlamaChatFormat;
import com.example.model.llama.Llama;
import com.example.inference.weights.Weights;
import com.example.tokenizer.impl.LlamaTokenizer;
import org.beehive.gpullama3.auxiliary.Timer;
import org.beehive.gpullama3.core.model.GGUF;
import org.beehive.gpullama3.core.model.tensor.GGMLTensorEntry;
import org.beehive.gpullama3.model.loader.LlamaModelLoader;
import org.beehive.gpullama3.model.Model;
import org.beehive.gpullama3.Options;
import org.beehive.gpullama3.model.format.LlamaChatFormat;
import org.beehive.gpullama3.model.llama.Llama;
import org.beehive.gpullama3.inference.weights.Weights;
import org.beehive.gpullama3.tokenizer.impl.LlamaTokenizer;

import java.io.IOException;
import java.nio.channels.FileChannel;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.auxiliary;
package org.beehive.gpullama3.auxiliary;

/**
* Record to store metrics from the last model run.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.auxiliary;
package org.beehive.gpullama3.auxiliary;

import java.util.function.IntConsumer;
import java.util.function.LongConsumer;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.auxiliary;
package org.beehive.gpullama3.auxiliary;

import java.util.concurrent.TimeUnit;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.auxiliary;
package org.beehive.gpullama3.auxiliary;

public class Tuple2<T, U> {
private final T first;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.auxiliary;
package org.beehive.gpullama3.auxiliary;

/** mask of a byte-sequence in UTF-8 encoding */
public record Utf8Mask(int mask, int pattern, int len) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
package com.example.core.model;

import com.example.core.types.Float16;
package org.beehive.gpullama3.core.model;

public enum GGMLType {
// Floating point types
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
package com.example.core.model;
package org.beehive.gpullama3.core.model;

import com.example.auxiliary.Timer;
import com.example.core.model.GGUF.GGUFTensorInfo;
import com.example.core.model.tensor.FloatTensor;
import com.example.core.model.tensor.GGMLTensorEntry;
import com.example.core.types.MetadataValueType;
import com.example.core.types.Pair;
import org.beehive.gpullama3.auxiliary.Timer;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.core.model.tensor.GGMLTensorEntry;
import org.beehive.gpullama3.core.types.MetadataValueType;
import org.beehive.gpullama3.core.types.Pair;

import java.io.FileNotFoundException;
import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
package com.example.core.model.tensor;
package org.beehive.gpullama3.core.model.tensor;

import com.example.core.model.GGMLType;
import org.beehive.gpullama3.core.model.GGMLType;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.VectorSpecies;

import java.lang.foreign.MemorySegment;
import java.util.Arrays;

import static com.example.LlamaApp.USE_VECTOR_API;

public final class ArrayFloatTensor extends FloatTensor {

final float[] values;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.example.core.model.tensor;
package org.beehive.gpullama3.core.model.tensor;

import com.example.core.model.GGMLType;
import org.beehive.gpullama3.core.model.GGMLType;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.VectorOperators;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.example.core.model.tensor;
package org.beehive.gpullama3.core.model.tensor;

import com.example.core.model.GGMLType;
import org.beehive.gpullama3.core.model.GGMLType;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.VectorSpecies;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package com.example.core.model.tensor;
package org.beehive.gpullama3.core.model.tensor;

import com.example.auxiliary.Parallel;
import com.example.core.model.GGMLType;
import org.beehive.gpullama3.auxiliary.Parallel;
import org.beehive.gpullama3.core.model.GGMLType;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.VectorShape;
import jdk.incubator.vector.VectorSpecies;
import sun.misc.Unsafe;

import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.lang.reflect.Field;
import java.util.Arrays;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.example.core.model.tensor;
package org.beehive.gpullama3.core.model.tensor;

import com.example.core.model.GGMLType;
import org.beehive.gpullama3.core.model.GGMLType;

import java.lang.foreign.MemorySegment;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.example.core.model.tensor;
package org.beehive.gpullama3.core.model.tensor;

import com.example.LlamaApp;
import com.example.core.model.GGMLType;
import com.example.core.types.Float16;
import org.beehive.gpullama3.LlamaApp;
import org.beehive.gpullama3.core.model.GGMLType;
import org.beehive.gpullama3.core.types.Float16;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.VectorOperators;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.example.core.model.tensor;
package org.beehive.gpullama3.core.model.tensor;


import com.example.core.model.GGMLType;
import com.example.core.types.Float16;
import org.beehive.gpullama3.core.model.GGMLType;
import org.beehive.gpullama3.core.types.Float16;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.VectorOperators;
Expand All @@ -12,8 +12,6 @@
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;

import static com.example.LlamaApp.USE_VECTOR_API;

public final class Q8_0FloatTensor extends FloatTensor {
Copy link

Copilot AI Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The removal of the static import import static com.example.LlamaApp.USE_VECTOR_API; without replacing it with the equivalent import from the new namespace will cause compilation errors if the USE_VECTOR_API constant is used in this class.

Copilot uses AI. Check for mistakes.

final int size;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.core.types;
package org.beehive.gpullama3.core.types;

public final class Float16 {
public static final int BYTES = 2;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.core.types;
package org.beehive.gpullama3.core.types;

public enum MetadataValueType {
// The value is a 8-bit unsigned integer.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.example.core.types;
package org.beehive.gpullama3.core.types;

public record Pair<First, Second>(First first, Second second) {
}
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
package com.example.inference;

import com.example.auxiliary.Parallel;
import com.example.core.model.tensor.FloatTensor;
import com.example.inference.state.Phi3State;
import com.example.inference.state.State;
import com.example.inference.weights.standard.Phi3StandardWeights;
import com.example.inference.weights.standard.Qwen3StandardWeights;
import com.example.inference.weights.standard.StandardWeights;
import com.example.inference.weights.tornado.TornadoWeights;
import com.example.model.Configuration;
import com.example.model.Model;
import com.example.model.phi3.Phi3Configuration;
import com.example.model.qwen3.Qwen3Configuration;
import com.example.tornadovm.TornadoVMMasterPlan;
package org.beehive.gpullama3.inference;

import org.beehive.gpullama3.auxiliary.Parallel;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.inference.state.Phi3State;
import org.beehive.gpullama3.inference.state.State;
import org.beehive.gpullama3.inference.weights.standard.Phi3StandardWeights;
import org.beehive.gpullama3.inference.weights.standard.Qwen3StandardWeights;
import org.beehive.gpullama3.inference.weights.standard.StandardWeights;
import org.beehive.gpullama3.inference.weights.tornado.TornadoWeights;
import org.beehive.gpullama3.model.Configuration;
import org.beehive.gpullama3.model.Model;
import org.beehive.gpullama3.model.phi3.Phi3Configuration;
import org.beehive.gpullama3.model.qwen3.Qwen3Configuration;
import org.beehive.gpullama3.tornadovm.TornadoVMMasterPlan;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;

import java.lang.foreign.MemorySegment;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package com.example.inference;

import com.example.auxiliary.LastRunMetrics;
import com.example.inference.sampler.Sampler;
import com.example.inference.state.State;
import com.example.model.Configuration;
import com.example.model.Model;
import com.example.tokenizer.impl.Tokenizer;
import com.example.tornadovm.TornadoVMMasterPlan;
package org.beehive.gpullama3.inference;

import org.beehive.gpullama3.auxiliary.LastRunMetrics;
import org.beehive.gpullama3.inference.sampler.Sampler;
import org.beehive.gpullama3.inference.state.State;
import org.beehive.gpullama3.model.Configuration;
import org.beehive.gpullama3.model.Model;
import org.beehive.gpullama3.tokenizer.impl.Tokenizer;
import org.beehive.gpullama3.tornadovm.TornadoVMMasterPlan;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;

import java.io.ByteArrayOutputStream;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.example.inference.operation;
package org.beehive.gpullama3.inference.operation;

import com.example.core.types.Pair;
import org.beehive.gpullama3.core.types.Pair;

public final class RoPE {
public static Pair<float[], float[]> precomputeFreqsCis(int contextLength, int headSize, double theta,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.example.inference.sampler;
package org.beehive.gpullama3.inference.sampler;

import com.example.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;

import java.util.random.RandomGenerator;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.example.inference.sampler;
package org.beehive.gpullama3.inference.sampler;

import com.example.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.example.inference.sampler;
package org.beehive.gpullama3.inference.sampler;

import com.example.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;

import java.util.Comparator;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.example.inference.state;
package org.beehive.gpullama3.inference.state;

import com.example.core.model.tensor.ArrayFloatTensor;
import com.example.core.model.tensor.FloatTensor;
import com.example.model.Configuration;
import org.beehive.gpullama3.core.model.tensor.ArrayFloatTensor;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.model.Configuration;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;
import uk.ac.manchester.tornado.api.types.arrays.IntArray;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.example.inference.state;
package org.beehive.gpullama3.inference.state;

import com.example.core.model.tensor.ArrayFloatTensor;
import com.example.core.model.tensor.FloatTensor;
import com.example.model.Configuration;
import com.example.model.phi3.Phi3Configuration;
import org.beehive.gpullama3.core.model.tensor.ArrayFloatTensor;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.model.Configuration;
import org.beehive.gpullama3.model.phi3.Phi3Configuration;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;
import uk.ac.manchester.tornado.api.types.arrays.IntArray;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.example.inference.state;
package org.beehive.gpullama3.inference.state;

import com.example.core.model.tensor.ArrayFloatTensor;
import com.example.core.model.tensor.FloatTensor;
import com.example.model.Configuration;
import com.example.model.qwen3.Qwen3Configuration;
import org.beehive.gpullama3.core.model.tensor.ArrayFloatTensor;
import org.beehive.gpullama3.core.model.tensor.FloatTensor;
import org.beehive.gpullama3.model.Configuration;
import org.beehive.gpullama3.model.qwen3.Qwen3Configuration;
import uk.ac.manchester.tornado.api.types.arrays.FloatArray;
import uk.ac.manchester.tornado.api.types.arrays.IntArray;

Expand Down
Loading