Skip to content

Commit bb03290

Browse files
bachittleggerganov
andauthored
examples : iOS example with swift ui (#4159)
* copy to llama.cpp as subdir * attempt enabling metal, fails * ggml metal compiles! * Update README.md * initial conversion to new format, utf8 errors? * bug fixes, but now has an invalid memory access :( * added O3, now has insufficient memory access * begin sync with master * update to match latest code, new errors * fixed it! * fix for loop conditionals, increase result size * fix current workflow errors * attempt a llama.swiftui workflow * Update .github/workflows/build.yml Co-authored-by: Georgi Gerganov <[email protected]> --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent f3b2698 commit bb03290

File tree

16 files changed

+829
-0
lines changed

16 files changed

+829
-0
lines changed

.github/workflows/build.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,17 @@ jobs:
498498
path: |
499499
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
500500
501+
ios-xcode-build:
502+
runs-on: macos-latest
503+
504+
steps:
505+
- name: Checkout code
506+
uses: actions/checkout@v3
507+
508+
- name: Build Xcode project
509+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
510+
511+
501512
# freeBSD-latest:
502513
# runs-on: macos-12
503514
# steps:

examples/llama.swiftui/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
xcuserdata

examples/llama.swiftui/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# llama.swiftui
2+
3+
Local inference of llama.cpp on an iPhone.
4+
So far I only tested with starcoder 1B model, but it can most likely handle 7B models as well.
5+
6+
https://github.com/bachittle/llama.cpp/assets/39804642/e290827a-4edb-4093-9642-2a5e399ec545
7+
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import Foundation
2+
3+
// import llama
4+
5+
enum LlamaError: Error {
6+
case couldNotInitializeContext
7+
}
8+
9+
actor LlamaContext {
10+
private var model: OpaquePointer
11+
private var context: OpaquePointer
12+
private var batch: llama_batch
13+
private var tokens_list: [llama_token]
14+
15+
var n_len: Int32 = 512
16+
var n_cur: Int32 = 0
17+
var n_decode: Int32 = 0
18+
19+
init(model: OpaquePointer, context: OpaquePointer) {
20+
self.model = model
21+
self.context = context
22+
self.tokens_list = []
23+
self.batch = llama_batch_init(512, 0, 1)
24+
}
25+
26+
deinit {
27+
llama_free(context)
28+
llama_free_model(model)
29+
llama_backend_free()
30+
}
31+
32+
static func createContext(path: String) throws -> LlamaContext {
33+
llama_backend_init(false)
34+
let model_params = llama_model_default_params()
35+
36+
let model = llama_load_model_from_file(path, model_params)
37+
guard let model else {
38+
print("Could not load model at \(path)")
39+
throw LlamaError.couldNotInitializeContext
40+
}
41+
var ctx_params = llama_context_default_params()
42+
ctx_params.seed = 1234
43+
ctx_params.n_ctx = 2048
44+
ctx_params.n_threads = 8
45+
ctx_params.n_threads_batch = 8
46+
47+
let context = llama_new_context_with_model(model, ctx_params)
48+
guard let context else {
49+
print("Could not load context!")
50+
throw LlamaError.couldNotInitializeContext
51+
}
52+
53+
return LlamaContext(model: model, context: context)
54+
}
55+
56+
func get_n_tokens() -> Int32 {
57+
return batch.n_tokens;
58+
}
59+
60+
func completion_init(text: String) {
61+
print("attempting to complete \"\(text)\"")
62+
63+
tokens_list = tokenize(text: text, add_bos: true)
64+
65+
let n_ctx = llama_n_ctx(context)
66+
let n_kv_req = tokens_list.count + (Int(n_len) - tokens_list.count)
67+
68+
print("\n n_len = \(n_len), n_ctx = \(n_ctx), n_kv_req = \(n_kv_req)")
69+
70+
if n_kv_req > n_ctx {
71+
print("error: n_kv_req > n_ctx, the required KV cache size is not big enough")
72+
}
73+
74+
for id in tokens_list {
75+
print(token_to_piece(token: id))
76+
}
77+
78+
// batch = llama_batch_init(512, 0) // done in init()
79+
batch.n_tokens = Int32(tokens_list.count)
80+
81+
for i1 in 0..<batch.n_tokens {
82+
let i = Int(i1)
83+
batch.token[i] = tokens_list[i]
84+
batch.pos[i] = i1
85+
batch.n_seq_id[Int(i)] = 1
86+
batch.seq_id[Int(i)]![0] = 0
87+
batch.logits[i] = 0
88+
}
89+
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
90+
91+
if llama_decode(context, batch) != 0 {
92+
print("llama_decode() failed")
93+
}
94+
95+
n_cur = batch.n_tokens
96+
}
97+
98+
func completion_loop() -> String {
99+
var new_token_id: llama_token = 0
100+
101+
let n_vocab = llama_n_vocab(model)
102+
let logits = llama_get_logits_ith(context, batch.n_tokens - 1)
103+
104+
var candidates = Array<llama_token_data>()
105+
candidates.reserveCapacity(Int(n_vocab))
106+
107+
for token_id in 0..<n_vocab {
108+
candidates.append(llama_token_data(id: token_id, logit: logits![Int(token_id)], p: 0.0))
109+
}
110+
candidates.withUnsafeMutableBufferPointer() { buffer in
111+
var candidates_p = llama_token_data_array(data: buffer.baseAddress, size: buffer.count, sorted: false)
112+
113+
new_token_id = llama_sample_token_greedy(context, &candidates_p)
114+
}
115+
116+
if new_token_id == llama_token_eos(context) || n_cur == n_len {
117+
print("\n")
118+
return ""
119+
}
120+
121+
let new_token_str = token_to_piece(token: new_token_id)
122+
print(new_token_str)
123+
// tokens_list.append(new_token_id)
124+
125+
batch.n_tokens = 0
126+
127+
batch.token[Int(batch.n_tokens)] = new_token_id
128+
batch.pos[Int(batch.n_tokens)] = n_cur
129+
batch.n_seq_id[Int(batch.n_tokens)] = 1
130+
batch.seq_id[Int(batch.n_tokens)]![0] = 0
131+
batch.logits[Int(batch.n_tokens)] = 1 // true
132+
batch.n_tokens += 1
133+
134+
n_decode += 1
135+
136+
n_cur += 1
137+
138+
if llama_decode(context, batch) != 0 {
139+
print("failed to evaluate llama!")
140+
}
141+
142+
return new_token_str
143+
}
144+
145+
func clear() {
146+
tokens_list.removeAll()
147+
}
148+
149+
private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
150+
let n_tokens = text.count + (add_bos ? 1 : 0)
151+
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
152+
let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos, false)
153+
154+
var swiftTokens: [llama_token] = []
155+
for i in 0..<tokenCount {
156+
swiftTokens.append(tokens[Int(i)])
157+
}
158+
159+
tokens.deallocate()
160+
161+
return swiftTokens
162+
}
163+
164+
private func token_to_piece(token: llama_token) -> String {
165+
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 8)
166+
result.initialize(repeating: Int8(0), count: 8)
167+
168+
let _ = llama_token_to_piece(model, token, result, 8)
169+
170+
let resultStr = String(cString: result)
171+
172+
result.deallocate()
173+
174+
return resultStr
175+
}
176+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
//
2+
// Use this file to import your target's public headers that you would like to expose to Swift.
3+
//
4+
5+
#import "llama.h"

0 commit comments

Comments
 (0)