Skip to content

Commit ea12ff4

Browse files
authored
Merge pull request #502 from vikramvee/Examples
Updated Examples
2 parents 859160d + ebd853f commit ea12ff4

11 files changed

+985
-19
lines changed

docs/Examples/BatchDecoding.md

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
# Batch decoding
2+
3+
```cs
4+
using System.Diagnostics;
5+
using System.Text;
6+
using LLama.Common;
7+
using LLama.Native;
8+
using LLama.Sampling;
9+
10+
public class BatchedDecoding
11+
{
12+
private const int n_parallel = 8;
13+
private const int n_len = 32;
14+
15+
public static async Task Run()
16+
{
17+
Console.Write("Please input your model path: ");
18+
var modelPath = Console.ReadLine();
19+
20+
Console.WriteLine("Prompt (leave blank to select automatically):");
21+
var prompt = Console.ReadLine();
22+
if (string.IsNullOrWhiteSpace(prompt))
23+
prompt = "Not many people know that";
24+
25+
// Load model
26+
var parameters = new ModelParams(modelPath);
27+
28+
using var model = LLamaWeights.LoadFromFile(parameters);
29+
30+
// Tokenize prompt
31+
var prompt_tokens = model.Tokenize(prompt, true, false, Encoding.UTF8);
32+
var n_kv_req = prompt_tokens.Length + (n_len - prompt_tokens.Length) * n_parallel;
33+
34+
// Create a context
35+
parameters.ContextSize = (uint)model.ContextSize;
36+
parameters.BatchSize = (uint)Math.Max(n_len, n_parallel);
37+
using var context = model.CreateContext(parameters);
38+
39+
var n_ctx = context.ContextSize;
40+
41+
// make sure the KV cache is big enough to hold all the prompt and generated tokens
42+
if (n_kv_req > n_ctx)
43+
{
44+
await Console.Error.WriteLineAsync($"error: n_kv_req ({n_kv_req}) > n_ctx, the required KV cache size is not big enough\n");
45+
await Console.Error.WriteLineAsync(" either reduce n_parallel or increase n_ctx\n");
46+
return;
47+
}
48+
49+
var batch = new LLamaBatch();
50+
51+
// evaluate the initial prompt
52+
batch.AddRange(prompt_tokens, 0, LLamaSeqId.Zero, true);
53+
54+
if (await context.DecodeAsync(batch) != DecodeResult.Ok)
55+
{
56+
await Console.Error.WriteLineAsync("llama_decode failed");
57+
return;
58+
}
59+
60+
// assign the system KV cache to all parallel sequences
61+
// this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
62+
for (var i = 1; i < n_parallel; ++i)
63+
{
64+
context.NativeHandle.KvCacheSequenceCopy((LLamaSeqId)0, (LLamaSeqId)i, 0, batch.TokenCount);
65+
}
66+
67+
if (n_parallel > 1)
68+
{
69+
Console.WriteLine();
70+
Console.WriteLine($"generating {n_parallel} sequences...");
71+
}
72+
73+
// remember the batch index of the last token for each parallel sequence
74+
// we need this to determine which logits to sample from
75+
List<int> i_batch = new();
76+
for (var i = 0; i < n_parallel; i++)
77+
i_batch.Add(batch.TokenCount - 1);
78+
79+
// Create per-stream decoder and sampler
80+
var decoders = new StreamingTokenDecoder[n_parallel];
81+
var samplers = new ISamplingPipeline[n_parallel];
82+
for (var i = 0; i < n_parallel; i++)
83+
{
84+
decoders[i] = new StreamingTokenDecoder(context);
85+
samplers[i] = new DefaultSamplingPipeline
86+
{
87+
Temperature = 0.1f + (float)i / n_parallel,
88+
MinP = 0.25f,
89+
};
90+
}
91+
92+
var n_cur = batch.TokenCount;
93+
var n_decode = 0;
94+
95+
var timer = new Stopwatch();
96+
timer.Start();
97+
while (n_cur <= n_len)
98+
{
99+
batch.Clear();
100+
101+
for (var i = 0; i < n_parallel; i++)
102+
{
103+
// Skip completed streams
104+
if (i_batch[i] < 0)
105+
continue;
106+
107+
// Use the sampling pipeline to select a token
108+
var new_token_id = samplers[i].Sample(
109+
context.NativeHandle,
110+
context.NativeHandle.GetLogitsIth(i_batch[i]),
111+
Array.Empty<LLamaToken>()
112+
);
113+
114+
// Finish this stream early if necessary
115+
if (new_token_id == model.EndOfSentenceToken || new_token_id == model.NewlineToken)
116+
{
117+
i_batch[i] = -1;
118+
Console.WriteLine($"Completed Stream {i} early");
119+
continue;
120+
}
121+
122+
// Add this token to the decoder, so it will be turned into text
123+
decoders[i].Add(new_token_id);
124+
125+
i_batch[i] = batch.TokenCount;
126+
127+
// push this new token for next evaluation
128+
batch.Add(new_token_id, n_cur, (LLamaSeqId)i, true);
129+
130+
n_decode++;
131+
}
132+
133+
// Check if all streams are finished
134+
if (batch.TokenCount == 0)
135+
{
136+
break;
137+
}
138+
139+
n_cur++;
140+
141+
// evaluate the current batch with the transformer model
142+
if (await context.DecodeAsync(batch) != 0)
143+
{
144+
await Console.Error.WriteLineAsync("failed to eval");
145+
return;
146+
}
147+
}
148+
149+
timer.Stop();
150+
Console.ForegroundColor = ConsoleColor.Yellow;
151+
Console.WriteLine();
152+
Console.WriteLine($"Decoded {n_decode} tokens in {timer.ElapsedMilliseconds}ms");
153+
Console.WriteLine($"Rate: {n_decode / timer.Elapsed.TotalSeconds:##.000} tokens/second");
154+
155+
var index = 0;
156+
foreach (var stream in decoders)
157+
{
158+
var text = stream.Read();
159+
160+
Console.ForegroundColor = ConsoleColor.Green;
161+
Console.Write($"{index++}. {prompt}");
162+
Console.ForegroundColor = ConsoleColor.Red;
163+
Console.WriteLine(text);
164+
}
165+
166+
Console.WriteLine("Press any key to exit demo");
167+
Console.ReadKey(true);
168+
}
169+
}
170+
```

docs/Examples/ChatChineseGB2312.md

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# Chat Chinese
2+
3+
```cs
4+
using System.Text;
5+
using LLama.Common;
6+
7+
public class ChatChineseGB2312
8+
{
9+
private static string ConvertEncoding(string input, Encoding original, Encoding target)
10+
{
11+
byte[] bytes = original.GetBytes(input);
12+
var convertedBytes = Encoding.Convert(original, target, bytes);
13+
return target.GetString(convertedBytes);
14+
}
15+
16+
public static async Task Run()
17+
{
18+
// Register provider for GB2312 encoding
19+
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
20+
21+
Console.ForegroundColor = ConsoleColor.Yellow;
22+
Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
23+
" to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
24+
Console.ForegroundColor = ConsoleColor.White;
25+
26+
Console.Write("Please input your model path: ");
27+
var modelPath = Console.ReadLine();
28+
29+
var parameters = new ModelParams(modelPath)
30+
{
31+
ContextSize = 1024,
32+
Seed = 1337,
33+
GpuLayerCount = 5,
34+
Encoding = Encoding.UTF8
35+
};
36+
using var model = LLamaWeights.LoadFromFile(parameters);
37+
using var context = model.CreateContext(parameters);
38+
var executor = new InteractiveExecutor(context);
39+
40+
ChatSession session;
41+
if (Directory.Exists("Assets/chat-with-kunkun-chinese"))
42+
{
43+
Console.ForegroundColor = ConsoleColor.Yellow;
44+
Console.WriteLine("Loading session from disk.");
45+
Console.ForegroundColor = ConsoleColor.White;
46+
47+
session = new ChatSession(executor);
48+
session.LoadSession("Assets/chat-with-kunkun-chinese");
49+
}
50+
else
51+
{
52+
var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json");
53+
ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
54+
55+
session = new ChatSession(executor, chatHistory);
56+
}
57+
58+
session
59+
.WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户", "坤坤"));
60+
61+
InferenceParams inferenceParams = new InferenceParams()
62+
{
63+
Temperature = 0.9f,
64+
AntiPrompts = new List<string> { "用户:" }
65+
};
66+
67+
Console.ForegroundColor = ConsoleColor.Yellow;
68+
Console.WriteLine("The chat session has started.");
69+
70+
// show the prompt
71+
Console.ForegroundColor = ConsoleColor.White;
72+
Console.Write("用户:");
73+
Console.ForegroundColor = ConsoleColor.Green;
74+
string userInput = Console.ReadLine() ?? "";
75+
76+
while (userInput != "exit")
77+
{
78+
// Convert the encoding from gb2312 to utf8 for the language model
79+
// and later saving to the history json file.
80+
userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
81+
82+
if (userInput == "save")
83+
{
84+
session.SaveSession("Assets/chat-with-kunkun-chinese");
85+
Console.ForegroundColor = ConsoleColor.Yellow;
86+
Console.WriteLine("Session saved.");
87+
}
88+
else if (userInput == "regenerate")
89+
{
90+
Console.ForegroundColor = ConsoleColor.Yellow;
91+
Console.WriteLine("Regenerating last response ...");
92+
93+
await foreach (
94+
var text
95+
in session.RegenerateAssistantMessageAsync(
96+
inferenceParams))
97+
{
98+
Console.ForegroundColor = ConsoleColor.White;
99+
100+
// Convert the encoding from utf8 to gb2312 for the console output.
101+
Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
102+
}
103+
}
104+
else
105+
{
106+
await foreach (
107+
var text
108+
in session.ChatAsync(
109+
new ChatHistory.Message(AuthorRole.User, userInput),
110+
inferenceParams))
111+
{
112+
Console.ForegroundColor = ConsoleColor.White;
113+
Console.Write(text);
114+
}
115+
}
116+
117+
Console.ForegroundColor = ConsoleColor.Green;
118+
userInput = Console.ReadLine() ?? "";
119+
120+
Console.ForegroundColor = ConsoleColor.White;
121+
}
122+
}
123+
}
124+
125+
```

docs/Examples/ChatSessionStripRoleName.md

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,54 @@ public class ChatSessionStripRoleName
1313
public static void Run()
1414
{
1515
Console.Write("Please input your model path: ");
16-
string modelPath = Console.ReadLine();
17-
var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
18-
InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
19-
ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8));
16+
var modelPath = Console.ReadLine();
17+
18+
var parameters = new ModelParams(modelPath)
19+
{
20+
ContextSize = 1024,
21+
Seed = 1337,
22+
GpuLayerCount = 5
23+
};
24+
using var model = LLamaWeights.LoadFromFile(parameters);
25+
using var context = model.CreateContext(parameters);
26+
var executor = new InteractiveExecutor(context);
27+
28+
var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json");
29+
ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
30+
31+
ChatSession session = new(executor, chatHistory);
32+
session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
33+
new string[] { "User:", "Assistant:" },
34+
redundancyLength: 8));
35+
36+
InferenceParams inferenceParams = new InferenceParams()
37+
{
38+
Temperature = 0.9f,
39+
AntiPrompts = new List<string> { "User:" }
40+
};
2041

2142
Console.ForegroundColor = ConsoleColor.Yellow;
22-
Console.WriteLine("The chat session has started. The role names won't be printed.");
23-
Console.ForegroundColor = ConsoleColor.White;
43+
Console.WriteLine("The chat session has started.");
2444

25-
while (true)
45+
// show the prompt
46+
Console.ForegroundColor = ConsoleColor.Green;
47+
string userInput = Console.ReadLine() ?? "";
48+
49+
while (userInput != "exit")
2650
{
27-
foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
51+
await foreach (
52+
var text
53+
in session.ChatAsync(
54+
new ChatHistory.Message(AuthorRole.User, userInput),
55+
inferenceParams))
2856
{
57+
Console.ForegroundColor = ConsoleColor.White;
2958
Console.Write(text);
3059
}
3160

3261
Console.ForegroundColor = ConsoleColor.Green;
33-
prompt = Console.ReadLine();
62+
userInput = Console.ReadLine() ?? "";
63+
3464
Console.ForegroundColor = ConsoleColor.White;
3565
}
3666
}

0 commit comments

Comments
 (0)