diff --git a/LLama.Unittest/BasicTest.cs b/LLama.Unittest/BasicTest.cs
index 1d54a7e94..ea02280d6 100644
--- a/LLama.Unittest/BasicTest.cs
+++ b/LLama.Unittest/BasicTest.cs
@@ -1,6 +1,4 @@
-using System.Text;
 using LLama.Common;
-using LLama.Native;
 using Xunit.Abstractions;
 
 namespace LLama.Unittest
diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
index 3d75a0cbc..c8d23c2ed 100644
--- a/LLama.Web/Common/ModelOptions.cs
+++ b/LLama.Web/Common/ModelOptions.cs
@@ -47,12 +47,6 @@ public class ModelOptions
         /// <inheritdoc />
         public string ModelPath { get; set; }
 
-        /// <inheritdoc />
-        public AdapterCollection LoraAdapters { get; set; } = new();
-
-        /// <inheritdoc />
-        public string LoraBase { get; set; } = string.Empty;
-
         /// <inheritdoc />
         public uint? Threads { get; set; }
 
diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs
index e6a64c930..7dc28f671 100644
--- a/LLama/Abstractions/IModelParams.cs
+++ b/LLama/Abstractions/IModelParams.cs
@@ -2,7 +2,6 @@
 using System.Collections;
 using System.Collections.Generic;
 using System.ComponentModel;
-using System.Linq;
 using System.Text;
 using System.Text.Json;
 using System.Text.Json.Serialization;
@@ -69,67 +68,12 @@ public interface IModelParams
         /// </summary>
         bool VocabOnly { get; }
 
-        /// <summary>
-        /// List of LoRA adapters to apply
-        /// </summary>
-        AdapterCollection LoraAdapters { get; }
-
-        /// <summary>
-        /// base model path for the lora adapter (lora_base)
-        /// </summary>
-        string LoraBase { get; }
-
         /// <summary>
         /// Override specific metadata items in the model
         /// </summary>
         List<MetadataOverride> MetadataOverrides { get; }
     }
 
-    /// <summary>
-    /// A LoRA adapter to apply to a model
-    /// </summary>
-    /// <param name="Path">Path to the LoRA file</param>
-    /// <param name="Scale">Strength of this LoRA</param>
-    public readonly record struct LoraAdapter(string Path, float Scale);
-
-    /// <summary>
-    /// A list of LoraAdapter objects
-    /// </summary>
-    public sealed class AdapterCollection
-        : List<LoraAdapter>, IEquatable<AdapterCollection>
-    {
-        /// <inheritdoc />
-        public bool Equals(AdapterCollection? other)
-        {
-            if (other == null)
-                return false;
-
-            return this.SequenceEqual(other);
-        }
-
-        /// <inheritdoc/>
-        public override bool Equals(object? obj)
-        {
-            return Equals(obj as AdapterCollection);
-        }
-
-        /// <inheritdoc/>
-        public override int GetHashCode()
-        {
-            unchecked
-            {
-                var hash = 17;
-                for (var i = 0; i < Count; i++)
-                {
-                    hash += this[i].GetHashCode();
-                    hash *= 7823;
-                }
-                return hash;
-            }
-        }
-    }
-
-
     /// <summary>
     /// A fixed size array to set the tensor splits across multiple GPUs
     /// </summary>
diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index b93959a49..2f2ab4eb6 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -39,12 +39,6 @@ public record ModelParams
         /// <inheritdoc />
         public string ModelPath { get; set; }
 
-        /// <inheritdoc />
-        public AdapterCollection LoraAdapters { get; set; } = new();
-
-        /// <inheritdoc />
-        public string LoraBase { get; set; } = string.Empty;
-
         /// <inheritdoc />
         public uint? Threads { get; set; }
 
diff --git a/LLama/LLamaQuantizer.cs b/LLama/LLamaQuantizer.cs
index 2ddbe2273..23f0f8b4e 100644
--- a/LLama/LLamaQuantizer.cs
+++ b/LLama/LLamaQuantizer.cs
@@ -62,7 +62,7 @@ public static bool Quantize(string srcFileName, string dstFilename, string ftype
         private static bool ValidateFtype(LLamaFtype ftype)
         {
             // Validation copies from here:
-            // https://github.com/ggerganov/llama.cpp/blob/f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7/llama.cpp#L13450
+            // https://github.com/ggerganov/llama.cpp/blob/345c8c0c87a97c1595f9c8b14833d531c8c7d8df/src/llama.cpp#L15624
 
             switch (ftype)
             {
@@ -105,9 +105,12 @@ private static bool ValidateFtype(LLamaFtype ftype)
 
                 case LLamaFtype.MOSTLY_IQ3_S:
                 case LLamaFtype.MOSTLY_IQ3_M:
+
+                case LLamaFtype.MOSTLY_Q4_0_4_4:
+                case LLamaFtype.MOSTLY_Q4_0_4_8:
+                case LLamaFtype.MOSTLY_Q4_0_8_8:
                     return true;
 
-                case LLamaFtype.MOSTLY_Q4_1_SOME_F16:
                 case LLamaFtype.GUESSED:
                 default:
                     return false;
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 272c81e37..efc639e72 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -53,7 +53,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>368645698ab648e390dc</BinaryReleaseId>
+    <BinaryReleaseId>345c8c0c87a97c1595f9c8b</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/LLama/LLamaWeights.cs b/LLama/LLamaWeights.cs
index 8646e4d93..a134f7aca 100644
--- a/LLama/LLamaWeights.cs
+++ b/LLama/LLamaWeights.cs
@@ -1,4 +1,4 @@
-﻿using System;
+using System;
 using System.Collections.Generic;
 using System.Text;
 using System.Threading;
@@ -72,17 +72,6 @@ public static LLamaWeights LoadFromFile(IModelParams @params)
         {
             using var pin = @params.ToLlamaModelParams(out var lparams);
             var weights = SafeLlamaModelHandle.LoadFromFile(@params.ModelPath, lparams);
-
-            foreach (var adapter in @params.LoraAdapters)
-            {
-                if (string.IsNullOrEmpty(adapter.Path))
-                    continue;
-                if (adapter.Scale <= 0)
-                    continue;
-
-                weights.ApplyLoraFromFile(adapter.Path, adapter.Scale, @params.LoraBase);
-            }
-
             return new LLamaWeights(weights);
         }
 
@@ -100,14 +89,6 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
             // don't touch the @params object inside the task, it might be changed
             // externally! Save a copy of everything that we need later.
             var modelPath = @params.ModelPath;
-            var loraBase = @params.LoraBase;
-            var loraAdapters = @params.LoraAdapters.ToArray();
-
-            // Determine the range to report for model loading. llama.cpp reports 0-1, but we'll remap that into a
-            // slightly smaller range to allow some space for reporting LoRA loading too.
-            var modelLoadProgressRange = 1f;
-            if (loraAdapters.Length > 0)
-                modelLoadProgressRange = 0.9f;
 
             using (@params.ToLlamaModelParams(out var lparams))
             {
@@ -119,7 +100,7 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
                     lparams.progress_callback = (progress, ctx) =>
                     {
                         // Update the progress reporter (remapping the value into the smaller range).
-                        progressReporter?.Report(Math.Clamp(progress, 0, 1) * modelLoadProgressRange);
+                        progressReporter?.Report(Math.Clamp(progress, 0, 1));
 
                         // If the user set a callback in the model params, call that and see if we should cancel
                         if (internalCallback != null && !internalCallback(progress, ctx))
@@ -141,30 +122,6 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
                         // Load the model
                         var weights = SafeLlamaModelHandle.LoadFromFile(modelPath, lparams);
 
-                        // Apply the LoRA adapters
-                        for (var i = 0; i < loraAdapters.Length; i++)
-                        {
-                            // Interrupt applying LoRAs if the token is cancelled
-                            if (token.IsCancellationRequested)
-                            {
-                                weights.Dispose();
-                                token.ThrowIfCancellationRequested();
-                            }
-
-                            // Don't apply invalid adapters
-                            var adapter = loraAdapters[i];
-                            if (string.IsNullOrEmpty(adapter.Path))
-                                continue;
-                            if (adapter.Scale <= 0)
-                                continue;
-
-                            weights.ApplyLoraFromFile(adapter.Path, adapter.Scale, loraBase);
-
-                            // Report progress. Model loading reported progress from 0 -> 0.9, use
-                            // the last 0.1 to represent all of the LoRA adapters being applied.
-                            progressReporter?.Report(0.9f + (0.1f / loraAdapters.Length) * (i + 1));
-                        }
-
                         // Update progress reporter to indicate completion
                         progressReporter?.Report(1);
 
diff --git a/LLama/Native/LLamaAttentionType.cs b/LLama/Native/LLamaAttentionType.cs
index 543f89b47..f26c73278 100644
--- a/LLama/Native/LLamaAttentionType.cs
+++ b/LLama/Native/LLamaAttentionType.cs
@@ -1,5 +1,9 @@
 namespace LLama.Native;
 
+/// <summary>
+/// 
+/// </summary>
+/// <remarks>llama_attention_type</remarks>
 public enum LLamaAttentionType
 {
     Unspecified = -1,
diff --git a/LLama/Native/LLamaFtype.cs b/LLama/Native/LLamaFtype.cs
index 15216b71b..4e05de999 100644
--- a/LLama/Native/LLamaFtype.cs
+++ b/LLama/Native/LLamaFtype.cs
@@ -3,6 +3,7 @@ namespace LLama.Native
     /// <summary>
     /// Supported model file types
     /// </summary>
+    /// <remarks>C# representation of llama_ftype</remarks>
     public enum LLamaFtype
     {
         /// <summary>
@@ -35,10 +36,10 @@ public enum LLamaFtype
         /// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
         MOSTLY_Q4_1 = 3,
 
-        /// <summary>
-        /// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
-        /// </summary>
-        MOSTLY_Q4_1_SOME_F16 = 4,
+        ///// <summary>
+        ///// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
+        ///// </summary>
+        //MOSTLY_Q4_1_SOME_F16 = 4,
 
         /// <summary>
         /// Mostly 5 bit
diff --git a/LLama/Native/LLamaTokenDataArray.cs b/LLama/Native/LLamaTokenDataArray.cs
index cc2515639..c341cca83 100644
--- a/LLama/Native/LLamaTokenDataArray.cs
+++ b/LLama/Native/LLamaTokenDataArray.cs
@@ -97,7 +97,7 @@ public void ApplyGrammar(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle? gra
 
             using (LLamaTokenDataArrayNative.Create(this, out var st))
             {
-                NativeApi.llama_sample_grammar(ctx, ref st, grammar);
+                NativeApi.llama_grammar_sample(grammar, ctx, ref st);
                 Sorted = st.sorted;
             }
         }
diff --git a/LLama/Native/LLamaVocabPreType.cs b/LLama/Native/LLamaVocabPreType.cs
index 6a3c89f53..3e5bc287c 100644
--- a/LLama/Native/LLamaVocabPreType.cs
+++ b/LLama/Native/LLamaVocabPreType.cs
@@ -27,4 +27,7 @@ internal enum LLamaVocabPreType
     CHATGLM4 = 17,
     VIKING = 18,
     JAIS = 19,
+    TEKKEN = 20,
+    SMOLLM = 21,
+    CODESHELL = 22,
 }
\ No newline at end of file
diff --git a/LLama/Native/LoraAdapter.cs b/LLama/Native/LoraAdapter.cs
new file mode 100644
index 000000000..27d142275
--- /dev/null
+++ b/LLama/Native/LoraAdapter.cs
@@ -0,0 +1,46 @@
+using System;
+
+namespace LLama.Native;
+
+/// <summary>
+/// A LoRA adapter which can be applied to a context for a specific model
+/// </summary>
+public class LoraAdapter
+{
+    /// <summary>
+    /// The model which this LoRA adapter was loaded with.
+    /// </summary>
+    public SafeLlamaModelHandle Model { get; }
+
+    /// <summary>
+    /// The full path of the file this adapter was loaded from
+    /// </summary>
+    public string Path { get; }
+
+    /// <summary>
+    /// Native pointer of the loaded adapter, will be automatically freed when the model is unloaded
+    /// </summary>
+    internal IntPtr Pointer { get; }
+
+    /// <summary>
+    /// Indicates if this adapter has been unloaded
+    /// </summary>
+    internal bool Loaded { get; private set; }
+
+    internal LoraAdapter(SafeLlamaModelHandle model, string path, IntPtr nativePtr)
+    {
+        Model = model;
+        Path = path;
+        Pointer = nativePtr;
+        Loaded = true;
+    }
+
+    /// <summary>
+    /// Unload this adapter
+    /// </summary>
+    public void Unload()
+    {
+        Loaded = false;
+        NativeApi.llama_lora_adapter_free(Pointer);
+    }
+}
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.Grammar.cs b/LLama/Native/NativeApi.Grammar.cs
index 2e9811371..8e5512e1c 100644
--- a/LLama/Native/NativeApi.Grammar.cs
+++ b/LLama/Native/NativeApi.Grammar.cs
@@ -35,16 +35,16 @@ public static partial class NativeApi
 		/// <param name="ctx"></param>
 		/// <param name="candidates"></param>
 		/// <param name="grammar"></param>
-		[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
-        public static extern void llama_sample_grammar(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, SafeLLamaGrammarHandle grammar);
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern void llama_grammar_sample(SafeLLamaGrammarHandle grammar, SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates);
 
-		/// <summary>
-		/// Accepts the sampled token into the grammar
-		/// </summary>
-		/// <param name="ctx"></param>
-		/// <param name="grammar"></param>
-		/// <param name="token"></param>
-		[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
-        public static extern void llama_grammar_accept_token(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle grammar, LLamaToken token);
+/// <summary>
+        /// Accepts the sampled token into the grammar
+        /// </summary>
+        /// <param name="ctx"></param>
+        /// <param name="grammar"></param>
+        /// <param name="token"></param>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern void llama_grammar_accept_token(SafeLLamaGrammarHandle grammar, SafeLLamaContextHandle ctx, LLamaToken token);
 	}
 }
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index 41d3a130b..f7b97bead 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -445,5 +445,12 @@ public static void llama_log_set(NativeLogConfig.LLamaLogCallback logCallback)
         /// <returns>Returns the split_prefix length.</returns>
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
         public static extern int llama_split_prefix(string split_prefix, nuint maxlen, string split_path, int split_no, int split_count);
+
+        /// <summary>
+        /// Manually free a LoRA adapter. loaded adapters will be free when the associated model is deleted
+        /// </summary>
+        /// <param name="adapter"></param>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern void llama_lora_adapter_free(IntPtr adapter);
     }
 }
diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
index c520a61d1..628936352 100644
--- a/LLama/Native/SafeLLamaContextHandle.cs
+++ b/LLama/Native/SafeLLamaContextHandle.cs
@@ -352,6 +352,58 @@ static SafeLLamaContextHandle()
         /// <param name="ctx"></param>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern void llama_synchronize(SafeLLamaContextHandle ctx);
+
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        private static extern int llama_lora_adapter_set(SafeLLamaContextHandle context, IntPtr adapter, float scale);
+
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        private static extern int llama_lora_adapter_remove(SafeLLamaContextHandle context, IntPtr adapter);
+
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        private static extern int llama_lora_adapter_clear(SafeLLamaContextHandle context);
+        #endregion
+
+        #region LoRA
+        /// <summary>
+        /// Add a LoRA adapter to this context
+        /// </summary>
+        /// <param name="lora"></param>
+        /// <param name="scale"></param>
+        /// <exception cref="ArgumentException"></exception>
+        /// <exception cref="RuntimeError"></exception>
+        public void AddLoraAdapter(LoraAdapter lora, float scale)
+        {
+            if (lora.Model != ModelHandle)
+                throw new ArgumentException("Cannot add LoRA adapter which was loaded for a different model");
+            if (!lora.Loaded)
+                throw new ArgumentException("Cannot add LoRA adapter which has been unloaded");
+
+            var err = llama_lora_adapter_set(this, lora.Pointer, scale);
+            if (err != 0)
+                throw new RuntimeError("Failed to set lora adapter");
+        }
+
+        /// <summary>
+        /// Remove a LoRA adapter from this context
+        /// </summary>
+        /// <param name="lora"></param>
+        /// <returns>Indicates if the lora was in this context and was remove</returns>
+        public bool RemoveLoraAdapter(LoraAdapter lora)
+        {
+            if (lora.Model != ModelHandle)
+                return false;
+
+            var err = llama_lora_adapter_remove(this, lora.Pointer);
+            return err == 0;
+        }
+
+        /// <summary>
+        /// Remove all LoRA adapters from this context
+        /// </summary>
+        public void ClearLoraAdapters()
+        {
+            llama_lora_adapter_clear(this);
+        }
         #endregion
 
         /// <summary>
@@ -734,6 +786,16 @@ public void KvCacheSequenceDivide(LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int
         {
             NativeApi.llama_kv_cache_seq_div(this, seq, p0, p1, divisor);
         }
+
+        /// <summary>
+        /// Returns the largest position present in the KV cache for the specified sequence
+        /// </summary>
+        /// <param name="seq"></param>
+        /// <returns></returns>
+        public LLamaPos KvCacheMaxPosition(LLamaSeqId seq)
+        {
+            return NativeApi.llama_kv_cache_seq_pos_max(this, seq);
+        }
         #endregion
     }
 }
diff --git a/LLama/Native/SafeLLamaGrammarHandle.cs b/LLama/Native/SafeLLamaGrammarHandle.cs
index 53c8fda28..0a81afc12 100644
--- a/LLama/Native/SafeLLamaGrammarHandle.cs
+++ b/LLama/Native/SafeLLamaGrammarHandle.cs
@@ -111,7 +111,7 @@ public SafeLLamaGrammarHandle Clone()
         /// <param name="token"></param>
         public void AcceptToken(SafeLLamaContextHandle ctx, LLamaToken token)
         {
-            NativeApi.llama_grammar_accept_token(ctx, this, token);
+            NativeApi.llama_grammar_accept_token(this, ctx, token);
         }
     }
 }
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
index 0e55e8eea..eaf76421a 100644
--- a/LLama/Native/SafeLlamaModelHandle.cs
+++ b/LLama/Native/SafeLlamaModelHandle.cs
@@ -3,6 +3,7 @@
 using System.Diagnostics;
 using System.IO;
 using System.Text;
+using System.Threading;
 using LLama.Exceptions;
 
 namespace LLama.Native
@@ -111,8 +112,7 @@ protected override bool ReleaseHandle()
         /// <param name="modelPath"></param>
         /// <param name="lparams"></param>
         /// <returns></returns>
-        /// <exception cref="InvalidOperationException"></exception>
-        /// <exception cref="LoadWeightsFailedException"></exception>
+        /// <exception cref="RuntimeError"></exception>
         public static SafeLlamaModelHandle LoadFromFile(string modelPath, LLamaModelParams lparams)
         {
             // Try to open the model file, this will check:
@@ -433,39 +433,32 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         [return: MarshalAs(UnmanagedType.U1)]
         private static extern bool llama_model_has_encoder(SafeLlamaModelHandle model);
+
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        private static extern IntPtr llama_lora_adapter_init(SafeLlamaModelHandle model, string path);
         #endregion
 
         #region LoRA
         /// <summary>
-        /// Apply a LoRA adapter to a loaded model
+        /// Load a LoRA adapter from file. The adapter will be associated with this model but will not be applied
         /// </summary>
-        /// <param name="lora"></param>
-        /// <param name="scale"></param>
-        /// <param name="modelBase">A path to a higher quality model to use as a base for the layers modified by the
-        /// adapter. Can be NULL to use the current loaded model.</param>
-        /// <param name="threads"></param>
+        /// <param name="path"></param>
+        /// <returns></returns>
         /// <exception cref="InvalidOperationException"></exception>
-        /// <exception cref="RuntimeError"></exception>
-        public void ApplyLoraFromFile(string lora, float scale, string? modelBase = null, int? threads = null)
+        public LoraAdapter LoadLoraFromFile(string path)
         {
+            path = Path.GetFullPath(path);
+
             // Try to open the model file, this will check:
             // - File exists (automatically throws FileNotFoundException)
             // - File is readable (explicit check)
             // This provides better error messages that llama.cpp, which would throw an access violation exception in both cases.
-            using (var fs = new FileStream(lora, FileMode.Open))
+            using (var fs = new FileStream(path, FileMode.Open))
                 if (!fs.CanRead)
-                    throw new InvalidOperationException($"LoRA file '{lora}' is not readable");
-
-            var err = llama_model_apply_lora_from_file(
-                this,
-                lora,
-                scale,
-                string.IsNullOrEmpty(modelBase) ? null : modelBase,
-                threads ?? Math.Max(1, Environment.ProcessorCount / 2)
-            );
-
-            if (err != 0)
-                throw new RuntimeError($"Failed to apply lora adapter (err={err}).");
+                    throw new InvalidOperationException($"LoRA file '{path}' is not readable");
+
+            var ptr = llama_lora_adapter_init(this, path);
+            return new LoraAdapter(this, path, ptr);
         }
         #endregion