@@ -112,13 +112,20 @@ int main(int argc, char ** argv) {
112
112
// tokenize the prompts and trim
113
113
std::vector<std::vector<int32_t >> inputs;
114
114
for (const auto & prompt : prompts) {
115
- auto inp = ::llama_tokenize (ctx, prompt, true );
115
+ auto inp = ::llama_tokenize (ctx, prompt, true , false );
116
116
if (inp.size () > n_batch) {
117
117
inp.resize (n_batch);
118
118
}
119
119
inputs.push_back (inp);
120
120
}
121
121
122
+ // add eos if not present
123
+ for (auto & inp : inputs) {
124
+ if (inp.empty () || inp.back () != llama_token_eos (model)) {
125
+ inp.push_back (llama_token_eos (model));
126
+ }
127
+ }
128
+
122
129
// tokenization stats
123
130
if (params.verbose_prompt ) {
124
131
for (int i = 0 ; i < (int ) inputs.size (); i++) {
@@ -172,7 +179,7 @@ int main(int argc, char ** argv) {
172
179
for (int j = 0 ; j < n_prompts; j++) {
173
180
fprintf (stdout, " embedding %d: " , j);
174
181
for (int i = 0 ; i < std::min (16 , n_embd); i++) {
175
- fprintf (stdout, " %f " , emb[j * n_embd + i]);
182
+ fprintf (stdout, " %9.6f " , emb[j * n_embd + i]);
176
183
}
177
184
fprintf (stdout, " \n " );
178
185
}
0 commit comments