3434 from llava .mm_utils import process_images
3535except ImportError as e :
3636 print (e )
37- # import pdb;pdb.set_trace()
38- eval_logger .debug ("VILA is not installed. Please install VILA to use this model." )
37+
38+ eval_logger .debug ("VILA is not installed. Please install VILA to use this model. Error: {e} " )
3939
4040
4141@register_model ("vila" )
@@ -202,7 +202,7 @@ def load_video(self, video_path, max_frames_num):
202202 return [Image .fromarray (img ) for img in spare_frames ]
203203 except Exception as e :
204204 eval_logger .error (f"Failed to load video { video_path } with error: { e } " )
205- # import pdb;pdb.set_trace()
205+
206206 return [Image .new ("RGB" , (448 , 448 ), (0 , 0 , 0 ))] * max_frames_num
207207
208208 def tok_decode (self , tokens ):
@@ -279,7 +279,7 @@ def generate_until(self, requests) -> List[str]:
279279
280280 for contexts , gen_kwargs , doc_to_visual , doc_id , task , split in [reg .args for reg in requests ]:
281281 # if self.task_dict[task][split][doc_id]["duration"] != "short":
282- # # import pdb;pdb.set_trace()
282+ #
283283 # res.append("A")
284284 # pbar.update(1)
285285 # continue
@@ -289,20 +289,20 @@ def generate_until(self, requests) -> List[str]:
289289
290290 num_video_frames = self .model .config .num_video_frames
291291 videos = []
292- # import pdb;pdb.set_trace()
292+
293293 if self .max_frames_num == 0 :
294294 images = [Image .new ("RGB" , (448 , 448 ), (0 , 0 , 0 ))] * num_video_frames
295295 video = process_images (images , self .model .image_processor , self .model .config ).half ().cuda ()
296296 videos .append (video )
297297 else :
298298 for visual in visuals :
299299 # images, video_loading_succeed = LazySupervisedDataset._load_video(visual, num_video_frames, self.model)
300- # import pdb;pdb.set_trace()
300+
301301 if self .video_decode_backend == "decord" :
302302 images = self .load_video (visual , num_video_frames )
303303 elif self .video_decode_backend == "pyav" :
304304 images = read_video_pyav (visual , num_frm = num_video_frames )
305- # import pdb;pdb.set_trace()
305+
306306 video = process_images (images , self .model .image_processor , self .model .config ).half ().cuda ()
307307 videos .append (video )
308308
@@ -350,7 +350,7 @@ def generate_until(self, requests) -> List[str]:
350350 if "num_beams" not in gen_kwargs :
351351 gen_kwargs ["num_beams" ] = 1
352352
353- # import pdb;pdb.set_trace()
353+
354354 with torch .inference_mode ():
355355 output_ids = self .model .generate (
356356 input_ids = input_ids ,
@@ -370,7 +370,7 @@ def generate_until(self, requests) -> List[str]:
370370 outputs = self .tokenizer .batch_decode (output_ids , skip_special_tokens = True )[0 ].strip ()
371371 print ("Question: " , cur_prompt )
372372 print ("Answer: " , outputs )
373- # import pdb;pdb.set_trace()
373+
374374 res .append (outputs )
375375 pbar .update (1 )
376376 return res
0 commit comments