Skip to content

Commit 353ec25

Browse files
authored
We could use std::unordered_map over std::map (#305)
* Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token; * fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size()); * Removed include <map> * Nest struct token score inside gpt_vocab * renamed token to tok
1 parent 89d5d90 commit 353ec25

File tree

4 files changed

+36
-24
lines changed

4 files changed

+36
-24
lines changed

main.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#include <cstring>
1010
#include <fstream>
1111
#include <iostream>
12-
#include <map>
1312
#include <string>
1413
#include <vector>
1514

@@ -69,7 +68,7 @@ void set_console_state(console_state new_st)
6968
static const int EOS_TOKEN_ID = 2;
7069

7170
// determine number of model parts based on the dimension
72-
static const std::map<int, int> LLAMA_N_PARTS = {
71+
static const std::unordered_map<int, int> LLAMA_N_PARTS = {
7372
{ 4096, 1 },
7473
{ 5120, 2 },
7574
{ 6656, 4 },
@@ -123,7 +122,7 @@ struct llama_model {
123122

124123
//
125124
struct ggml_context * ctx;
126-
std::map<std::string, struct ggml_tensor *> tensors;
125+
std::unordered_map<std::string, struct ggml_tensor *> tensors;
127126
};
128127

129128
// load the model's weights from a file
@@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
208207
// load vocab
209208
{
210209
std::string word;
210+
vocab.id_to_token.resize(model.hparams.n_vocab);
211211
std::vector<char> tmp(64);
212212

213213
for (int i = 0; i < model.hparams.n_vocab; i++) {
@@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
227227
fin.read((char *) &score, sizeof(score));
228228

229229
vocab.token_to_id[word] = i;
230-
vocab.id_to_token[i] = word;
231-
vocab.score[i] = score;
230+
231+
auto &tok_score = vocab.id_to_token[i];
232+
tok_score.tok = word;
233+
tok_score.score = score;
232234
}
233235
}
234236

@@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) {
10281030
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
10291031
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
10301032
for (int i = 0; i < (int) embd_inp.size(); i++) {
1031-
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
1033+
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
10321034
}
10331035
fprintf(stderr, "\n");
10341036
if (params.interactive) {
@@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) {
11541156
// display text
11551157
if (!input_noecho) {
11561158
for (auto id : embd) {
1157-
printf("%s", vocab.id_to_token[id].c_str());
1159+
printf("%s", vocab.id_to_token[id].tok.c_str());
11581160
}
11591161
fflush(stdout);
11601162
}
@@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) {
11691171
// check for reverse prompt
11701172
std::string last_output;
11711173
for (auto id : last_n_tokens) {
1172-
last_output += vocab.id_to_token[id];
1174+
last_output += vocab.id_to_token[id].tok;
11731175
}
11741176

11751177
// Check if each of the reverse prompts appears at the end of the output.

quantize.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include <cstdio>
99
#include <cstring>
1010
#include <fstream>
11-
#include <map>
1211
#include <string>
1312
#include <vector>
1413
#include <regex>
@@ -130,6 +129,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
130129
}
131130

132131
std::string word;
132+
vocab.id_to_token.resize(n_vocab);
133133
for (int i = 0; i < n_vocab; i++) {
134134
uint32_t len;
135135
finp.read ((char *) &len, sizeof(len));
@@ -144,8 +144,10 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
144144
fout.write((char *) &score, sizeof(score));
145145

146146
vocab.token_to_id[word] = i;
147-
vocab.id_to_token[i] = word;
148-
vocab.score[i] = score;
147+
148+
auto &tok_score = vocab.id_to_token[i];
149+
tok_score.tok = word;
150+
tok_score.score = score;
149151
}
150152
}
151153

utils.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
155155
}
156156
}
157157

158-
std::map<std::string, int32_t> json_parse(const std::string & fname) {
159-
std::map<std::string, int32_t> result;
158+
std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
159+
std::unordered_map<std::string, int32_t> result;
160160

161161
// read file into string
162162
std::string json;
@@ -360,16 +360,16 @@ struct llama_tokenizer {
360360
return;
361361
}
362362

363-
auto score = vocab_.score.find((*token).second);
364-
365-
if (score == vocab_.score.end()) {
363+
if (static_cast<size_t>((*token).second) >= vocab_.id_to_token.size()) {
366364
return;
367365
}
368366

367+
const auto &tok_score = vocab_.id_to_token[(*token).second];
368+
369369
llama_sp_bigram bigram;
370370
bigram.left = left;
371371
bigram.right = right;
372-
bigram.score = (*score).second;
372+
bigram.score = tok_score.score;
373373
bigram.size = text.size();
374374
work_queue_.push(bigram);
375375
}
@@ -393,6 +393,8 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
393393
std::string word;
394394
std::vector<char> tmp(64);
395395

396+
vocab.id_to_token.resize(n_vocab);
397+
396398
for (int i = 0; i < n_vocab; i++) {
397399
uint32_t len;
398400
fin.read((char *) &len, sizeof(len));
@@ -410,8 +412,10 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
410412
fin.read((char *) &score, sizeof(score));
411413

412414
vocab.token_to_id[word] = i;
413-
vocab.id_to_token[i] = word;
414-
vocab.score[i] = score;
415+
416+
auto &tok_score = vocab.id_to_token[i];
417+
tok_score.tok = word;
418+
tok_score.score = score;
415419
}
416420

417421
return true;

utils.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#pragma once
44

55
#include <string>
6-
#include <map>
6+
#include <unordered_map>
77
#include <vector>
88
#include <random>
99
#include <thread>
@@ -65,15 +65,19 @@ struct llama_vocab {
6565
using id = int32_t;
6666
using token = std::string;
6767

68-
std::map<token, id> token_to_id;
69-
std::map<id, token> id_to_token;
70-
std::map<id, float> score;
68+
struct token_score {
69+
token tok;
70+
float score;
71+
};
72+
73+
std::unordered_map<token, id> token_to_id;
74+
std::vector<token_score> id_to_token;
7175
};
7276

7377
void replace(std::string & str, const std::string & needle, const std::string & replacement);
7478

7579
// poor-man's JSON parsing
76-
std::map<std::string, int32_t> json_parse(const std::string & fname);
80+
std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
7781

7882
// TODO: temporary until #77 is merged, need this now for some tokenizer tests
7983
bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);

0 commit comments

Comments
 (0)