Skip to content

Commit 5bf59ed

Browse files
committed
fix #117, allow auto download with tar format videos
1 parent 98b3955 commit 5bf59ed

File tree

1 file changed

+5
-11
lines changed

1 file changed

+5
-11
lines changed

lmms_eval/api/task.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,6 @@ def _download_from_youtube(path):
776776
if accelerator.is_main_process:
777777
force_download = dataset_kwargs.get("force_download", False)
778778
force_unzip = dataset_kwargs.get("force_unzip", False)
779-
print(force_download)
780779
cache_path = snapshot_download(repo_id=self.DATASET_PATH, repo_type="dataset", force_download=force_download, etag_timeout=60)
781780
zip_files = glob(os.path.join(cache_path, "**/*.zip"), recursive=True)
782781
tar_files = glob(os.path.join(cache_path, "**/*.tar*"), recursive=True)
@@ -797,15 +796,11 @@ def untar_video_data(tar_file):
797796

798797

799798
def concat_tar_parts(tar_parts, output_tar):
800-
print("This is the output file:", output_tar, "from:", tar_parts)
801-
try:
802-
with open(output_tar, 'wb') as out_tar:
803-
from tqdm import tqdm
804-
for part in tqdm(sorted(tar_parts)):
805-
with open(part, 'rb') as part_file:
806-
out_tar.write(part_file.read())
807-
except Exception as ex:
808-
print("Error!!!", ex)
799+
with open(output_tar, 'wb') as out_tar:
800+
from tqdm import tqdm
801+
for part in tqdm(sorted(tar_parts)):
802+
with open(part, 'rb') as part_file:
803+
out_tar.write(part_file.read())
809804
eval_logger.info(f"Concatenated parts {tar_parts} into {output_tar}")
810805

811806
# Unzip zip files if needed
@@ -824,7 +819,6 @@ def concat_tar_parts(tar_parts, output_tar):
824819
tar_parts_dict[base_name] = []
825820
tar_parts_dict[base_name].append(tar_file)
826821

827-
print(tar_parts_dict)
828822

829823
# Concatenate and untar split parts
830824
for base_name, parts in tar_parts_dict.items():

0 commit comments

Comments
 (0)