You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
@@ -1771,17 +1783,37 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
1771
1783
}
1772
1784
1773
1785
prompt = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n";
1774
-
1775
-
system_prompt_length = prompt.size();
1776
-
1777
1786
prompt += img_prompt;
1787
+
1788
+
prompt_attn_range.first = prompt.size();
1778
1789
prompt += conditioner_params.text;
1790
+
prompt_attn_range.second = prompt.size();
1791
+
1779
1792
prompt += "<|im_end|>\n<|im_start|>assistant\n";
1793
+
} elseif (sd_version_is_flux2(version)) {
1794
+
prompt_template_encode_start_idx = 0;
1795
+
out_layers = {10, 20, 30};
1796
+
1797
+
prompt = "[SYSTEM_PROMPT]You are an AI that reasons about image descriptions. You give structured responses focusing on object relationships, object\nattribution and actions without speculation.[/SYSTEM_PROMPT][INST]";
1798
+
1799
+
prompt_attn_range.first = prompt.size();
1800
+
prompt += conditioner_params.text;
1801
+
prompt_attn_range.second = prompt.size();
1802
+
1803
+
prompt += "[/INST]";
1780
1804
} else {
1781
-
prompt = "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n" + conditioner_params.text + "<|im_end|>\n<|im_start|>assistant\n";
1805
+
prompt_template_encode_start_idx = 34;
1806
+
1807
+
prompt = "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n";
1808
+
1809
+
prompt_attn_range.first = prompt.size();
1810
+
prompt += conditioner_params.text;
1811
+
prompt_attn_range.second = prompt.size();
1812
+
1813
+
prompt += "<|im_end|>\n<|im_start|>assistant\n";
1782
1814
}
1783
1815
1784
-
auto tokens_and_weights = tokenize(prompt, 0, system_prompt_length, false);
1816
+
auto tokens_and_weights = tokenize(prompt, prompt_attn_range, 0, false);
1785
1817
auto& tokens = std::get<0>(tokens_and_weights);
1786
1818
auto& weights = std::get<1>(tokens_and_weights);
1787
1819
@@ -1790,11 +1822,12 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
1790
1822
1791
1823
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
0 commit comments