From 8c1bf812e2189f573eb2b789576a860e4e72b2d1 Mon Sep 17 00:00:00 2001 From: gstrat88 Date: Tue, 11 Nov 2025 06:57:23 +0200 Subject: [PATCH] Update sort_key function to include '.arrow' extension Load_dataset will handle local saved datasets that way --- src/datasets/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/load.py b/src/datasets/load.py index ae3b9825970..7587b782e64 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -226,7 +226,7 @@ def infer_module_for_data_files_list( def sort_key(ext_count: tuple[tuple[str, bool], int]) -> tuple[int, bool]: """Sort by count and set ".parquet" as the favorite in case of a draw, and ignore metadata files""" (ext, is_metadata), count = ext_count - return (not is_metadata, count, ext == ".parquet", ext == ".jsonl", ext == ".json", ext == ".csv", ext) + return (not is_metadata, count, ext == ".parquet", ext == ".arrow", ext == ".jsonl", ext == ".json", ext == ".csv", ext) for (ext, _), _ in sorted(extensions_counter.items(), key=sort_key, reverse=True): if ext in _EXTENSION_TO_MODULE: