File tree 4 files changed +14
-6
lines changed
examples/server/tests/features 4 files changed +14
-6
lines changed Original file line number Diff line number Diff line change @@ -23,6 +23,13 @@ Feature: llama.cpp server
23
23
"""
24
24
Then embeddings are generated
25
25
26
+ Scenario : Tokenize / Detokenize complex
27
+ When tokenizing:
28
+ """
29
+ España is a èspciâl café über naïve résumé cañón élite cañas Barça 例子 東京 こんにちは 你好 中国
30
+ """
31
+ Then tokens can be detokenize and is equivalent False
32
+
26
33
Scenario : OAI Embeddings compatibility
27
34
Given a model bert-bge-small
28
35
When an OAI compatible embeddings computation request for:
Original file line number Diff line number Diff line change @@ -91,7 +91,7 @@ Feature: llama.cpp server
91
91
"""
92
92
What is the capital of France ?
93
93
"""
94
- Then tokens can be detokenize
94
+ Then tokens can be detokenize and is equivalent True
95
95
96
96
Scenario : Models available
97
97
Given available models
Original file line number Diff line number Diff line change @@ -670,9 +670,10 @@ async def step_tokenize(context):
670
670
context .tokens = tokenize_json ['tokens' ]
671
671
672
672
673
- @step ('tokens can be detokenize' )
673
+ @step ('tokens can be detokenize and is equivalent {equivalent} ' )
674
674
@async_run_until_complete
675
- async def step_detokenize (context ):
675
+ async def step_detokenize (context , equivalent ):
676
+ equivalent = equivalent == 'True'
676
677
assert len (context .tokens ) > 0
677
678
async with aiohttp .ClientSession () as session :
678
679
async with session .post (f'{ context .base_url } /detokenize' ,
@@ -682,8 +683,8 @@ async def step_detokenize(context):
682
683
assert response .status == 200
683
684
detokenize_json = await response .json ()
684
685
# SPM tokenizer adds a whitespace prefix: https://github.com/google/sentencepiece/issues/15
685
- assert context . tokenized_text == detokenize_json [ 'content' ]. strip ()
686
-
686
+ if equivalent :
687
+ assert context . tokenized_text == detokenize_json [ 'content' ]. strip ()
687
688
688
689
@step ('an OPTIONS request is sent from {origin}' )
689
690
@async_run_until_complete
Original file line number Diff line number Diff line change @@ -492,7 +492,7 @@ std::vector<uint32_t> sort_by_canonical_class(std::vector<uint32_t> & cpts) {
492
492
std::vector<uint32_t > canonical_decomposition_cpts (std::vector<uint32_t > & cpts, uint32_t starting_offset) {
493
493
std::vector<uint32_t > result;
494
494
for (auto i = starting_offset; i < cpts.size (); i++) {
495
- auto it = unicode_map_nfd.equal_range (cpts[i]);
495
+ const auto & it = unicode_map_nfd.equal_range (cpts[i]);
496
496
if (it.first != it.second ) {
497
497
uint offset = 0 ;
498
498
for (auto jt = it.first ; jt != it.second ; jt++) {
You can’t perform that action at this time.
0 commit comments