@@ -320,3 +320,24 @@ def format_chatml(
320
320
_messages .append ((_roles ["assistant" ], None ))
321
321
_prompt = _format_chatml (system_message , _messages , _sep )
322
322
return ChatFormatterResponse (prompt = _prompt )
323
+
324
+ # eg, export HF_MODEL=mistralai/Mistral-7B-Instruct-v0.1
325
+ @register_chat_format ("autotokenizer" )
326
+ def format_autotokenizer (
327
+ messages : List [llama_types .ChatCompletionRequestMessage ],
328
+ ** kwargs : Any ,
329
+ ) -> ChatFormatterResponse :
330
+ # https://huggingface.co/docs/transformers/main/chat_templating
331
+ # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1#instruction-format
332
+ # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/blob/main/tokenizer_config.json
333
+ import os
334
+ from transformers import AutoTokenizer
335
+ huggingFaceModel = os .getenv ("HF_MODEL" ) # eg, mistralai/Mistral-7B-Instruct-v0.1
336
+ print (huggingFaceModel )
337
+ if not huggingFaceModel :
338
+ raise Exception ("HF_MODEL needs to be set in env to use chat format 'autotokenizer'" )
339
+ tokenizer = AutoTokenizer .from_pretrained (huggingFaceModel )
340
+ tokenizer .use_default_system_prompt = False
341
+ _prompt = tokenizer .apply_chat_template (messages , tokenize = False )
342
+ # Return formatted prompt and eos token by default
343
+ return ChatFormatterResponse (prompt = _prompt , stop = tokenizer .eos_token )
0 commit comments