Skip to content

Commit 3bfa3b4

Browse files
committed
Fix convert script, warnings alpaca instructions, default params
1 parent 715d292 commit 3bfa3b4

File tree

4 files changed

+23
-17
lines changed

4 files changed

+23
-17
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,15 +193,15 @@ First, download the `ggml` Alpaca model into the `./models` folder:
193193
```
194194
# use one of these
195195
# TODO: add a script to simplify the download
196-
curl -o ggml2-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
197-
curl -o ggml2-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
198-
curl -o ggml2-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
196+
curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
197+
curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
198+
curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
199199
```
200200

201201
Now run the `main` tool like this:
202202

203203
```
204-
./main -m ./models/ggml2-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins
204+
./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins
205205
```
206206

207207
Sample run:
@@ -218,7 +218,7 @@ Sample run:
218218
There 26 letters in the English Alphabet
219219
> What is the most common way of transportation in Amsterdam?
220220
The majority (54%) are using public transit. This includes buses, trams and metros with over 100 lines throughout the city which make it very accessible for tourists to navigate around town as well as locals who commute by tram or metro on a daily basis
221-
> List 5 words that start with "ca".
221+
> List 5 words that start with "ca".
222222
cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach.
223223
>
224224
```

alpaca.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
# Temporary script - will be removed in the future
44
#
55

6-
./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.96 --repeat_penalty 1 -t 7
6+
./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7

convert-pth-to-ggml.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
def parse_args():
2828

2929
parser = argparse.ArgumentParser(description='Convert a LLaMA model checkpoint to a ggml compatible file')
30-
parser.add_argument('dir_model', help='directory containing the model checkpoint')
31-
parser.add_argument('ftype', type=int, choices=[0, 1], default=1, help='file type (0: float32, 1: float16)')
32-
parser.add_argument('vocab_only', type=bool, default=False, help='only write vocab to file')
30+
parser.add_argument('dir_model', help='directory containing the model checkpoint')
31+
parser.add_argument('ftype', help='file type (0: float32, 1: float16)', type=int, choices=[0, 1], default=1)
32+
parser.add_argument('vocab_only', help='only write vocab to file', type=int, default=0, nargs='?')
3333
return parser.parse_args()
3434

3535
def get_n_parts(dim):
@@ -135,6 +135,8 @@ def main():
135135

136136
hparams, tokenizer = load_hparams_and_tokenizer(dir_model)
137137

138+
print(args)
139+
138140
# if only writing vocab to file
139141
if args.vocab_only:
140142

main.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,23 +165,27 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
165165
// load vocab
166166
{
167167
std::string word;
168+
std::vector<char> tmp(64);
169+
168170
for (int i = 0; i < model.hparams.n_vocab; i++) {
169171
uint32_t len;
170172
fin.read((char *) &len, sizeof(len));
171173

172174
word.resize(len);
173-
fin.read((char *) word.data(), len);
175+
if (len > 0) {
176+
tmp.resize(len);
177+
fin.read(tmp.data(), len);
178+
word.assign(tmp.data(), len);
179+
} else {
180+
word.clear();
181+
}
174182

175183
float score;
176184
fin.read((char *) &score, sizeof(score));
177185

178186
vocab.token_to_id[word] = i;
179187
vocab.id_to_token[i] = word;
180188
vocab.score[i] = score;
181-
182-
//if (i < 30000) {
183-
// fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
184-
//}
185189
}
186190
}
187191

@@ -974,7 +978,7 @@ int main(int argc, char ** argv) {
974978
n_past += embd.size();
975979
embd.clear();
976980

977-
if (embd_inp.size() <= input_consumed) {
981+
if ((int) embd_inp.size() <= input_consumed) {
978982
// out of user input, sample next token
979983
const float top_k = params.top_k;
980984
const float top_p = params.top_p;
@@ -1011,7 +1015,7 @@ int main(int argc, char ** argv) {
10111015
--remaining_tokens;
10121016
} else {
10131017
// some user input remains from prompt or interaction, forward it to processing
1014-
while (embd_inp.size() > input_consumed) {
1018+
while ((int) embd_inp.size() > input_consumed) {
10151019
embd.push_back(embd_inp[input_consumed]);
10161020
last_n_tokens.erase(last_n_tokens.begin());
10171021
last_n_tokens.push_back(embd_inp[input_consumed]);
@@ -1036,7 +1040,7 @@ int main(int argc, char ** argv) {
10361040

10371041
// in interactive mode, and not currently processing queued inputs;
10381042
// check if we should prompt the user for more
1039-
if (params.interactive && embd_inp.size() <= input_consumed) {
1043+
if (params.interactive && (int) embd_inp.size() <= input_consumed) {
10401044
// check for reverse prompt
10411045
for (auto antiprompt_inp : antipromptv_inp) {
10421046
if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) {

0 commit comments

Comments
 (0)