@@ -567,76 +567,6 @@ static void debug_dump_img_embed(struct llava_context * ctx_llava, model_output_
567567 }
568568}
569569
570-
571- static void dump_win_attn_mask () {
572- const int image_size_width = 196 ;
573- const int image_size_height = 140 ;
574- const int patch_size = 14 ;
575- const int attn_window_size = 112 ;
576-
577- const int merge_ratio = 2 ;
578- const int ipw = image_size_width / patch_size;
579- const int iph = image_size_height / patch_size;
580- const int pw = image_size_width / patch_size / merge_ratio;
581- const int ph = image_size_height / patch_size / merge_ratio;
582- const int grid_window = attn_window_size / patch_size / merge_ratio;
583- /*
584- pw * ph = number of tokens output by ViT after apply patch merger
585- ipw * ipw = number of vision token been processed inside ViT
586- */
587-
588- std::vector<int > idx (ph * pw);
589- std::vector<int > inv_idx (ph * pw);
590- int dst = 0 ;
591- // [num_vision_tokens, num_vision_tokens] attention mask tensor
592- int ne = pow (ipw * iph, 2 );
593- std::vector<float > mask (ne, std::numeric_limits<float >::lowest ());
594- int mask_row = 0 ;
595-
596- for (int y = 0 ; y < ph; y+=grid_window)
597- {
598- for (int x = 0 ; x < pw; x+=grid_window)
599- {
600- const int win_h = std::min (grid_window, ph - y);
601- const int win_w = std::min (grid_window, pw - x);
602- const int dst_0 = dst;
603- // group all tokens belong to the same window togather (to a continue range)
604- for (int dy = 0 ; dy < win_h; dy++) {
605- for (int dx = 0 ; dx < win_w; dx++) {
606- const int src = (y + dy) * pw + (x + dx);
607- assert (src < (int )idx.size ());
608- assert (dst < (int )inv_idx.size ());
609- idx[src] = dst;
610- inv_idx[dst] = src;
611- dst++;
612- }
613- }
614-
615- for (int r=0 ; r < win_h * win_w * merge_ratio * merge_ratio; r++) {
616- int row_offset = mask_row * (ipw * iph);
617- std::fill (
618- mask.begin () + row_offset + (dst_0 * merge_ratio * merge_ratio),
619- mask.begin () + row_offset + (dst * merge_ratio * merge_ratio),
620- 0.0 );
621- mask_row++;
622- }
623- }
624- }
625-
626- auto output_path = " win_attn_mask_fp32.bin" ;
627-
628- std::ofstream outFile (output_path, std::ios::binary);
629- if (outFile.is_open ()) {
630- outFile.write (reinterpret_cast <const char *>(mask.data ()), ne * sizeof (float ));
631-
632- outFile.close ();
633- std::cout << " Data successfully written to " << output_path << std::endl;
634- } else {
635- std::cerr << " Error opening file!" << std::endl;
636- }
637- }
638-
639-
640570#endif
641571
642572
0 commit comments