chore: Update lmms_eval/models/vila.py and lmms_eval/tasks/__init__.py

Luodian · Luodian · commit e31cd7883d45 · 2024-07-10T12:08:08.000+10:00
diff --git a/lmms_eval/models/vila.py b/lmms_eval/models/vila.py
@@ -34,8 +34,8 @@
     from llava.mm_utils import process_images
 except ImportError as e:
     print(e)
-    # import pdb;pdb.set_trace()
-    eval_logger.debug("VILA is not installed. Please install VILA to use this model.")
+    
+    eval_logger.debug("VILA is not installed. Please install VILA to use this model. Error: {e}")
 
 
 @register_model("vila")
@@ -202,7 +202,7 @@ def load_video(self, video_path, max_frames_num):
             return [Image.fromarray(img) for img in spare_frames]
         except Exception as e:
             eval_logger.error(f"Failed to load video {video_path} with error: {e}")
-            # import pdb;pdb.set_trace()
+            
             return [Image.new("RGB", (448, 448), (0, 0, 0))] * max_frames_num
 
     def tok_decode(self, tokens):
@@ -279,7 +279,7 @@ def generate_until(self, requests) -> List[str]:
 
         for contexts, gen_kwargs, doc_to_visual, doc_id, task, split in [reg.args for reg in requests]:
             # if self.task_dict[task][split][doc_id]["duration"] != "short":
-            #     # import pdb;pdb.set_trace()
+            #     
             #     res.append("A")
             #     pbar.update(1)
             #     continue
@@ -289,20 +289,20 @@ def generate_until(self, requests) -> List[str]:
 
             num_video_frames = self.model.config.num_video_frames
             videos = []
-            # import pdb;pdb.set_trace()
+            
             if self.max_frames_num == 0:
                 images = [Image.new("RGB", (448, 448), (0, 0, 0))] * num_video_frames
                 video = process_images(images, self.model.image_processor, self.model.config).half().cuda()
                 videos.append(video)
             else:
                 for visual in visuals:
                     # images, video_loading_succeed = LazySupervisedDataset._load_video(visual, num_video_frames, self.model)
-                    # import pdb;pdb.set_trace()
+                    
                     if self.video_decode_backend == "decord":
                         images = self.load_video(visual, num_video_frames)
                     elif self.video_decode_backend == "pyav":
                         images = read_video_pyav(visual, num_frm=num_video_frames)
-                    # import pdb;pdb.set_trace()
+                    
                     video = process_images(images, self.model.image_processor, self.model.config).half().cuda()
                     videos.append(video)
 
@@ -350,7 +350,7 @@ def generate_until(self, requests) -> List[str]:
             if "num_beams" not in gen_kwargs:
                 gen_kwargs["num_beams"] = 1
 
-            # import pdb;pdb.set_trace()
+            
             with torch.inference_mode():
                 output_ids = self.model.generate(
                     input_ids=input_ids,
@@ -370,7 +370,7 @@ def generate_until(self, requests) -> List[str]:
             outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
             print("Question: ", cur_prompt)
             print("Answer: ", outputs)
-            # import pdb;pdb.set_trace()
+            
             res.append(outputs)
             pbar.update(1)
         return res
diff --git a/lmms_eval/tasks/__init__.py b/lmms_eval/tasks/__init__.py
@@ -72,7 +72,7 @@ def include_task_folder(task_dir: str, register_task: bool = True) -> None:
         # if (subdirs == [] or subdirs == ["__pycache__"]) and (len(file_list) > 0):
         for f in file_list:
             # if "detail" in f:
-            #     import pdb;pdb.set_trace()
+            #     
             # if "vatex" in f:
             #     print("a")
             if f.endswith(".yaml"):