diff --git a/examples/server/server.cpp b/examples/server/server.cpp index badeb9121324f..7813a2957d6bc 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1182,7 +1182,7 @@ struct server_context { bool process_token(completion_token_output & result, server_slot & slot) { // remember which tokens were sampled - used for repetition penalties during sampling - const std::string token_str = llama_token_to_piece(ctx, result.tok, false); + const std::string token_str = llama_token_to_piece(ctx, result.tok, params.special); slot.sampled = result.tok; // search stop word and delete it