@@ -776,7 +776,6 @@ def _download_from_youtube(path):
776776 if accelerator .is_main_process :
777777 force_download = dataset_kwargs .get ("force_download" , False )
778778 force_unzip = dataset_kwargs .get ("force_unzip" , False )
779- print (force_download )
780779 cache_path = snapshot_download (repo_id = self .DATASET_PATH , repo_type = "dataset" , force_download = force_download , etag_timeout = 60 )
781780 zip_files = glob (os .path .join (cache_path , "**/*.zip" ), recursive = True )
782781 tar_files = glob (os .path .join (cache_path , "**/*.tar*" ), recursive = True )
@@ -797,15 +796,11 @@ def untar_video_data(tar_file):
797796
798797
799798 def concat_tar_parts (tar_parts , output_tar ):
800- print ("This is the output file:" , output_tar , "from:" , tar_parts )
801- try :
802- with open (output_tar , 'wb' ) as out_tar :
803- from tqdm import tqdm
804- for part in tqdm (sorted (tar_parts )):
805- with open (part , 'rb' ) as part_file :
806- out_tar .write (part_file .read ())
807- except Exception as ex :
808- print ("Error!!!" , ex )
799+ with open (output_tar , 'wb' ) as out_tar :
800+ from tqdm import tqdm
801+ for part in tqdm (sorted (tar_parts )):
802+ with open (part , 'rb' ) as part_file :
803+ out_tar .write (part_file .read ())
809804 eval_logger .info (f"Concatenated parts { tar_parts } into { output_tar } " )
810805
811806 # Unzip zip files if needed
@@ -824,7 +819,6 @@ def concat_tar_parts(tar_parts, output_tar):
824819 tar_parts_dict [base_name ] = []
825820 tar_parts_dict [base_name ].append (tar_file )
826821
827- print (tar_parts_dict )
828822
829823 # Concatenate and untar split parts
830824 for base_name , parts in tar_parts_dict .items ():
0 commit comments