Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions LLama.Unittest/BasicTest.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
using System.Text;
using LLama.Common;
using LLama.Native;
using Xunit.Abstractions;

namespace LLama.Unittest
Expand Down
6 changes: 0 additions & 6 deletions LLama.Web/Common/ModelOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,6 @@ public class ModelOptions
/// <inheritdoc />
public string ModelPath { get; set; }

/// <inheritdoc />
public AdapterCollection LoraAdapters { get; set; } = new();

/// <inheritdoc />
public string LoraBase { get; set; } = string.Empty;

/// <inheritdoc />
public uint? Threads { get; set; }

Expand Down
56 changes: 0 additions & 56 deletions LLama/Abstractions/IModelParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using System.Collections;
using System.Collections.Generic;
using System.ComponentModel;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
Expand Down Expand Up @@ -69,67 +68,12 @@ public interface IModelParams
/// </summary>
bool VocabOnly { get; }

/// <summary>
/// List of LoRA adapters to apply
/// </summary>
AdapterCollection LoraAdapters { get; }

/// <summary>
/// base model path for the lora adapter (lora_base)
/// </summary>
string LoraBase { get; }

/// <summary>
/// Override specific metadata items in the model
/// </summary>
List<MetadataOverride> MetadataOverrides { get; }
}

/// <summary>
/// A LoRA adapter to apply to a model
/// </summary>
/// <param name="Path">Path to the LoRA file</param>
/// <param name="Scale">Strength of this LoRA</param>
public readonly record struct LoraAdapter(string Path, float Scale);

/// <summary>
/// A list of LoraAdapter objects
/// </summary>
public sealed class AdapterCollection
: List<LoraAdapter>, IEquatable<AdapterCollection>
{
/// <inheritdoc />
public bool Equals(AdapterCollection? other)
{
if (other == null)
return false;

return this.SequenceEqual(other);
}

/// <inheritdoc/>
public override bool Equals(object? obj)
{
return Equals(obj as AdapterCollection);
}

/// <inheritdoc/>
public override int GetHashCode()
{
unchecked
{
var hash = 17;
for (var i = 0; i < Count; i++)
{
hash += this[i].GetHashCode();
hash *= 7823;
}
return hash;
}
}
}


/// <summary>
/// A fixed size array to set the tensor splits across multiple GPUs
/// </summary>
Expand Down
6 changes: 0 additions & 6 deletions LLama/Common/ModelParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ public record ModelParams
/// <inheritdoc />
public string ModelPath { get; set; }

/// <inheritdoc />
public AdapterCollection LoraAdapters { get; set; } = new();

/// <inheritdoc />
public string LoraBase { get; set; } = string.Empty;

/// <inheritdoc />
public uint? Threads { get; set; }

Expand Down
7 changes: 5 additions & 2 deletions LLama/LLamaQuantizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public static bool Quantize(string srcFileName, string dstFilename, string ftype
private static bool ValidateFtype(LLamaFtype ftype)
{
// Validation copies from here:
// https://github.com/ggerganov/llama.cpp/blob/f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7/llama.cpp#L13450
// https://github.com/ggerganov/llama.cpp/blob/345c8c0c87a97c1595f9c8b14833d531c8c7d8df/src/llama.cpp#L15624

switch (ftype)
{
Expand Down Expand Up @@ -105,9 +105,12 @@ private static bool ValidateFtype(LLamaFtype ftype)

case LLamaFtype.MOSTLY_IQ3_S:
case LLamaFtype.MOSTLY_IQ3_M:

case LLamaFtype.MOSTLY_Q4_0_4_4:
case LLamaFtype.MOSTLY_Q4_0_4_8:
case LLamaFtype.MOSTLY_Q4_0_8_8:
return true;

case LLamaFtype.MOSTLY_Q4_1_SOME_F16:
case LLamaFtype.GUESSED:
default:
return false;
Expand Down
2 changes: 1 addition & 1 deletion LLama/LLamaSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
</ItemGroup>

<PropertyGroup>
<BinaryReleaseId>368645698ab648e390dc</BinaryReleaseId>
<BinaryReleaseId>345c8c0c87a97c1595f9c8b</BinaryReleaseId>
</PropertyGroup>

<PropertyGroup>
Expand Down
47 changes: 2 additions & 45 deletions LLama/LLamaWeights.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading;
Expand Down Expand Up @@ -72,17 +72,6 @@ public static LLamaWeights LoadFromFile(IModelParams @params)
{
using var pin = @params.ToLlamaModelParams(out var lparams);
var weights = SafeLlamaModelHandle.LoadFromFile(@params.ModelPath, lparams);

foreach (var adapter in @params.LoraAdapters)
{
if (string.IsNullOrEmpty(adapter.Path))
continue;
if (adapter.Scale <= 0)
continue;

weights.ApplyLoraFromFile(adapter.Path, adapter.Scale, @params.LoraBase);
}

return new LLamaWeights(weights);
}

Expand All @@ -100,14 +89,6 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
// don't touch the @params object inside the task, it might be changed
// externally! Save a copy of everything that we need later.
var modelPath = @params.ModelPath;
var loraBase = @params.LoraBase;
var loraAdapters = @params.LoraAdapters.ToArray();

// Determine the range to report for model loading. llama.cpp reports 0-1, but we'll remap that into a
// slightly smaller range to allow some space for reporting LoRA loading too.
var modelLoadProgressRange = 1f;
if (loraAdapters.Length > 0)
modelLoadProgressRange = 0.9f;

using (@params.ToLlamaModelParams(out var lparams))
{
Expand All @@ -119,7 +100,7 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
lparams.progress_callback = (progress, ctx) =>
{
// Update the progress reporter (remapping the value into the smaller range).
progressReporter?.Report(Math.Clamp(progress, 0, 1) * modelLoadProgressRange);
progressReporter?.Report(Math.Clamp(progress, 0, 1));

// If the user set a callback in the model params, call that and see if we should cancel
if (internalCallback != null && !internalCallback(progress, ctx))
Expand All @@ -141,30 +122,6 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
// Load the model
var weights = SafeLlamaModelHandle.LoadFromFile(modelPath, lparams);

// Apply the LoRA adapters
for (var i = 0; i < loraAdapters.Length; i++)
{
// Interrupt applying LoRAs if the token is cancelled
if (token.IsCancellationRequested)
{
weights.Dispose();
token.ThrowIfCancellationRequested();
}

// Don't apply invalid adapters
var adapter = loraAdapters[i];
if (string.IsNullOrEmpty(adapter.Path))
continue;
if (adapter.Scale <= 0)
continue;

weights.ApplyLoraFromFile(adapter.Path, adapter.Scale, loraBase);

// Report progress. Model loading reported progress from 0 -> 0.9, use
// the last 0.1 to represent all of the LoRA adapters being applied.
progressReporter?.Report(0.9f + (0.1f / loraAdapters.Length) * (i + 1));
}

// Update progress reporter to indicate completion
progressReporter?.Report(1);

Expand Down
4 changes: 4 additions & 0 deletions LLama/Native/LLamaAttentionType.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
namespace LLama.Native;

/// <summary>
///
/// </summary>
/// <remarks>llama_attention_type</remarks>
public enum LLamaAttentionType
{
Unspecified = -1,
Expand Down
9 changes: 5 additions & 4 deletions LLama/Native/LLamaFtype.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ namespace LLama.Native
/// <summary>
/// Supported model file types
/// </summary>
/// <remarks>C# representation of llama_ftype</remarks>
public enum LLamaFtype
{
/// <summary>
Expand Down Expand Up @@ -35,10 +36,10 @@ public enum LLamaFtype
/// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
MOSTLY_Q4_1 = 3,

/// <summary>
/// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
/// </summary>
MOSTLY_Q4_1_SOME_F16 = 4,
///// <summary>
///// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
///// </summary>
//MOSTLY_Q4_1_SOME_F16 = 4,

/// <summary>
/// Mostly 5 bit
Expand Down
2 changes: 1 addition & 1 deletion LLama/Native/LLamaTokenDataArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public void ApplyGrammar(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle? gra

using (LLamaTokenDataArrayNative.Create(this, out var st))
{
NativeApi.llama_sample_grammar(ctx, ref st, grammar);
NativeApi.llama_grammar_sample(grammar, ctx, ref st);
Sorted = st.sorted;
}
}
Expand Down
3 changes: 3 additions & 0 deletions LLama/Native/LLamaVocabPreType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,7 @@ internal enum LLamaVocabPreType
CHATGLM4 = 17,
VIKING = 18,
JAIS = 19,
TEKKEN = 20,
SMOLLM = 21,
CODESHELL = 22,
}
46 changes: 46 additions & 0 deletions LLama/Native/LoraAdapter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using System;

namespace LLama.Native;

/// <summary>
/// A LoRA adapter which can be applied to a context for a specific model
/// </summary>
public class LoraAdapter
{
/// <summary>
/// The model which this LoRA adapter was loaded with.
/// </summary>
public SafeLlamaModelHandle Model { get; }

/// <summary>
/// The full path of the file this adapter was loaded from
/// </summary>
public string Path { get; }

/// <summary>
/// Native pointer of the loaded adapter, will be automatically freed when the model is unloaded
/// </summary>
internal IntPtr Pointer { get; }

/// <summary>
/// Indicates if this adapter has been unloaded
/// </summary>
internal bool Loaded { get; private set; }

internal LoraAdapter(SafeLlamaModelHandle model, string path, IntPtr nativePtr)
{
Model = model;
Path = path;
Pointer = nativePtr;
Loaded = true;
}

/// <summary>
/// Unload this adapter
/// </summary>
public void Unload()
{
Loaded = false;
NativeApi.llama_lora_adapter_free(Pointer);
}
}
20 changes: 10 additions & 10 deletions LLama/Native/NativeApi.Grammar.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ public static partial class NativeApi
/// <param name="ctx"></param>
/// <param name="candidates"></param>
/// <param name="grammar"></param>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_sample_grammar(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, SafeLLamaGrammarHandle grammar);
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_grammar_sample(SafeLLamaGrammarHandle grammar, SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates);

/// <summary>
/// Accepts the sampled token into the grammar
/// </summary>
/// <param name="ctx"></param>
/// <param name="grammar"></param>
/// <param name="token"></param>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_grammar_accept_token(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle grammar, LLamaToken token);
/// <summary>
/// Accepts the sampled token into the grammar
/// </summary>
/// <param name="ctx"></param>
/// <param name="grammar"></param>
/// <param name="token"></param>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_grammar_accept_token(SafeLLamaGrammarHandle grammar, SafeLLamaContextHandle ctx, LLamaToken token);
}
}
7 changes: 7 additions & 0 deletions LLama/Native/NativeApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -445,5 +445,12 @@ public static void llama_log_set(NativeLogConfig.LLamaLogCallback logCallback)
/// <returns>Returns the split_prefix length.</returns>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern int llama_split_prefix(string split_prefix, nuint maxlen, string split_path, int split_no, int split_count);

/// <summary>
/// Manually free a LoRA adapter. loaded adapters will be free when the associated model is deleted
/// </summary>
/// <param name="adapter"></param>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_lora_adapter_free(IntPtr adapter);
}
}
Loading