You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
pass# the processor already has a valid chat template
115
+
elifmodel.config.model_type=="paligemma":
116
+
processor.chat_template="""{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] if item['type'] == 'text' %}{{ item['text'] }}<|im_end|>{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}"""
117
+
elifmodel.config.model_type=="llava":
118
+
processor.chat_template="""{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}"""
119
+
100
120
iftokenizer.pad_tokenisNone:
101
121
tokenizer.pad_token=tokenizer.eos_token
102
122
ifargs.ignore_bias_buffers:
@@ -124,27 +144,9 @@
124
144
ds[key] =ds[key].select(range(50))
125
145
126
146
defprocess(row):
127
-
# The prompt can be either a string or a list. In some datasets, the prompt is just a common string
128
-
# for both rejected and chosen (already included in chosen and rejected) and is not meant to be used
129
-
# separately. In other datasets, the prompt is intended to be used as a prefix for rejected and chosen,
130
-
# and in such cases, it is properly formatted as a list with keys "role" and "content".
131
-
# Example 1:
132
-
# row = {"prompt": "What does detox mean?",
133
-
# "chosen": [{"content": "What does detox mean?", "role": "user"}, {"content": "It means to get rid of the toxins.", "role": "assistant"}],
134
-
# "rejected": [{"content": "What does detox mean?", "role": "assistant"}, {"content": "I don't know.", "role": "user"}]}
0 commit comments