Apply rebase edits and remove ggml_cont call that is now unnecessary

Tianyue-Zhao · Tianyue-Zhao · commit 06a07194b176 · 2025-09-09T00:12:21.000Z
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -18620,7 +18620,7 @@ struct llm_build_cogvlm : public llm_graph_context {
 
         ggml_tensor * inp_pos = build_inp_pos();
 
-        auto * inp_attn = build_attn_inp_kv_unified();
+        auto * inp_attn = build_attn_inp_kv();
 
         // check ubatch to see if we have input tokens (text)
         // or an input embedding vector (image)
@@ -18662,15 +18662,13 @@ struct llm_build_cogvlm : public llm_graph_context {
                     qkv->nb[1], 0);
                 ggml_tensor * Kcur = ggml_view_3d(ctx0, qkv, n_embd_head, n_head_kv, n_tokens, n_embd_head * sizeof(float),
                     qkv->nb[1], n_embd * ggml_element_size(qkv));
-                ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, qkv, n_embd, n_tokens,
-                    qkv->nb[1], 2 * n_embd * ggml_element_size(qkv)));
-
-                Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
+                ggml_tensor * Vcur = ggml_view_3d(ctx0, qkv, n_embd_head, n_head_kv, n_tokens, n_embd_head * sizeof(float),
+                    qkv->nb[1], 2 * n_embd * ggml_element_size(qkv));
 
                 Qcur = ggml_rope(ctx0, Qcur, inp_pos, n_embd_head, rope_type);
                 Kcur = ggml_rope(ctx0, Kcur, inp_pos, n_embd_head, rope_type);
 
-                cur = build_attn(inp_attn, wo, nullptr, Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+                cur = build_attn(inp_attn, wo, nullptr, Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
                 cb(cur, "attn_out", il);
             }
 
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
@@ -1645,16 +1645,12 @@ struct clip_graph {
 
             cur = ggml_add(ctx0, cur, layer.qkv_b);
 
-            ggml_tensor * Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_pos,
-                cur->nb[1], 0));
-            ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_pos,
-                cur->nb[1], n_embd * sizeof(float)));
-            ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_pos,
-                cur->nb[1], 2 * n_embd * sizeof(float)));
-
-            Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_pos);
-            Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_pos);
-            Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_pos);
+            ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
+                cur->nb[1], 0);
+            ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
+                cur->nb[1], n_embd * sizeof(float));
+            ggml_tensor * Vcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
+                cur->nb[1], 2 * n_embd * sizeof(float));
 
             cb(Qcur, "Qcur", il);
             cb(Kcur, "Kcur", il);
@@ -3968,7 +3964,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
             } break;
         case PROJECTOR_TYPE_COGVLM:
             {
-                n_patches_sq += 2;
+                n_patches += 2;
             } break;
         default:
             GGML_ABORT("unsupported projector type");