Merge pull request EvolvingLMMs-Lab#117 from teowu/main

Luodian · web-flow · commit d49a032fa769 · 2024-06-15T17:30:59.000+08:00
LongVideoBench for LMMs-Eval
diff --git a/lmms_eval/models/llava_vid.py b/lmms_eval/models/llava_vid.py
@@ -96,6 +96,7 @@ def __init__(
         self.mm_spatial_pool_out_channels = int(mm_spatial_pool_out_channels)
         self.mm_spatial_pool_mode = mm_spatial_pool_mode
         self.max_frames_num = int(max_frames_num)
+        print(self.max_frames_num)
         if self.overwrite == True:
             overwrite_config = {}
             overwrite_config["mm_resampler_type"] = self.mm_resampler_type
@@ -404,7 +405,7 @@ def generate_until(self, requests) -> List[str]:
                     attention_mask=attention_masks,
                     modalities="video",
                     use_cache=self.use_cache,
-                    stopping_criteria=[stopping_criteria],
+                    #stopping_criteria=[stopping_criteria],
                     do_sample=True if gen_kwargs["temperature"] > 0 else False,
                     temperature=gen_kwargs["temperature"],
                     top_p=gen_kwargs["top_p"],
diff --git a/lmms_eval/tasks/longvideobench/longvideobench_val_i.yaml b/lmms_eval/tasks/longvideobench/longvideobench_val_i.yaml
@@ -0,0 +1,29 @@
+dataset_path: longvideobench/LongVideoBench
+dataset_kwargs:
+  token: True
+  cache_dir: longvideobench
+  video: True
+  force_download: False
+  local_files_only: False
+  # From_YouTube: True
+task: longvideobench_val_i
+test_split: validation
+doc_to_visual: !function utils.longvideobench_doc_to_visual_i
+doc_to_text: !function utils.longvideobench_doc_to_text
+doc_to_target: "correct_choice"
+generation_kwargs:
+  max_new_tokens: 32
+  temperature: 0
+  do_sample: False
+process_results: !function utils.longvideobench_process_results
+metric_list:
+  - metric: lvb_acc
+    aggregation: !function utils.longvideobench_aggregate_results
+    higher_is_better: true
+
+model_specific_prompt_kwargs:
+  default:
+    pre_prompt: ""
+    post_prompt: "Answer with the option's letter from the given choices directly.\n"
+    insert_interleave_subtitles: True
+    
diff --git a/lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml b/lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml
@@ -0,0 +1,28 @@
+dataset_path: longvideobench/LongVideoBench
+dataset_kwargs:
+  token: True
+  cache_dir: longvideobench
+  video: True
+  force_download: False
+  local_files_only: False
+  # From_YouTube: True
+task: longvideobench_val_v
+test_split: validation
+doc_to_visual: !function utils.longvideobench_doc_to_visual_v
+doc_to_text: !function utils.longvideobench_doc_to_text
+doc_to_target: "correct_choice"
+generation_kwargs:
+  max_new_tokens: 32
+  temperature: 0
+  do_sample: False
+process_results: !function utils.longvideobench_process_results
+metric_list:
+  - metric: lvb_acc
+    aggregation: !function utils.longvideobench_aggregate_results
+    higher_is_better: true
+
+model_specific_prompt_kwargs:
+  default:
+    pre_prompt: ""
+    post_prompt: "Answer with the option's letter from the given choices directly.\n"
+  
diff --git a/lmms_eval/tasks/longvideobench/utils.py b/lmms_eval/tasks/longvideobench/utils.py