@@ -1323,7 +1323,9 @@ def _create_completion(
1323
1323
1324
1324
completion_id : str = f"cmpl-{ str (uuid .uuid4 ())} "
1325
1325
created : int = int (time .time ())
1326
- completion_tokens : List [int ] = []
1326
+ # If prompt is empty, initialize completion with BOS token to avoid
1327
+ # detokenization including a space at the beginning of the completion
1328
+ completion_tokens : List [int ] = [] if len (prompt ) > 0 else [self .token_bos ()]
1327
1329
# Add blank space to start of prompt to match OG llama tokenizer
1328
1330
prompt_tokens : List [int ] = (
1329
1331
(
@@ -1459,6 +1461,8 @@ def _create_completion(
1459
1461
# not sure how to handle this branch when dealing
1460
1462
# with CJK output, so keep it unchanged
1461
1463
for token in remaining_tokens :
1464
+ if token == self .token_bos ():
1465
+ continue
1462
1466
token_end_position += len (self .detokenize ([token ]))
1463
1467
# Check if stop sequence is in the token
1464
1468
if token_end_position > (
@@ -1582,6 +1586,8 @@ def _create_completion(
1582
1586
1583
1587
logprobs_or_none : Optional [CompletionLogprobs ] = None
1584
1588
if logprobs is not None :
1589
+ if token == self .token_bos ():
1590
+ continue
1585
1591
token_str = self .detokenize ([token ]).decode (
1586
1592
"utf-8" , errors = "ignore"
1587
1593
)
@@ -1709,6 +1715,8 @@ def _create_completion(
1709
1715
for token , token_str , logprobs_token in zip (
1710
1716
all_tokens , all_token_strs , all_logprobs
1711
1717
):
1718
+ if token == self .token_bos ():
1719
+ continue
1712
1720
text_offsets .append (text_offset )
1713
1721
text_offset += len (token_str )
1714
1722
tokens .append (token_str )
0 commit comments