@@ -603,6 +603,18 @@ def parse_recursive_paths(
603603 """
604604 Turn a DocDict that has `filetype==recursive_paths` into the DocDict of
605605 individual files in that path.
606+
607+ Args:
608+ cli_kwargs: Base CLI arguments to inherit
609+ path: The directory path to search recursively
610+ pattern: Glob pattern to match files (e.g., "*.pdf", "**/*.txt")
611+ recursed_filetype: The filetype to assign to found files
612+ include: Optional list of regex patterns to include files, default=None
613+ exclude: Optional list of regex patterns to exclude files, default=None
614+ **extra_args: Additional arguments to pass to each document
615+
616+ Returns:
617+ List of DocDict or dict objects, each representing a found file
606618 """
607619 logger .info (f"Parsing recursive load_filetype: '{ path } '" )
608620 assert recursed_filetype not in [
@@ -683,6 +695,14 @@ def parse_json_entries(
683695 """
684696 Turn a DocDict that has `filetype==json_entries` into the individual
685697 DocDict mentionned inside the json file.
698+
699+ Args:
700+ cli_kwargs: Base CLI arguments to inherit
701+ path: The path to the JSON file containing document entries
702+ **extra_args: Additional arguments to pass to each document
703+
704+ Returns:
705+ List of DocDict or dict objects, each representing an entry from the JSON file
686706 """
687707 logger .info (f"Loading json_entries: '{ path } '" )
688708 doclist = str (Path (path ).read_text ()).splitlines ()
@@ -719,6 +739,14 @@ def parse_toml_entries(
719739 """
720740 Turn a DocDict that has `filetype==toml_entries` into the individual
721741 DocDict mentionned inside the toml file.
742+
743+ Args:
744+ cli_kwargs: Base CLI arguments to inherit
745+ path: The path to the TOML file containing document entries
746+ **extra_args: Additional arguments to pass to each document
747+
748+ Returns:
749+ List of DocDict or dict objects, each representing an entry from the TOML file
722750 """
723751 logger .info (f"Loading toml_entries: '{ path } '" )
724752 content = rtoml .load (toml = Path (path ))
@@ -759,6 +787,14 @@ def parse_link_file(
759787 `link_file` file. Note that bullet points are stripped (i.e. "- [the url]" is
760788 treated the same as "the url"), and commented lines (i.e. starting with "#")
761789 are ignored.
790+
791+ Args:
792+ cli_kwargs: Base CLI arguments to inherit
793+ path: The path to the link file containing URLs
794+ **extra_args: Additional arguments to pass to each document
795+
796+ Returns:
797+ List of DocDict objects, each representing a URL from the link file
762798 """
763799 logger .info (f"Loading link_file: '{ path } '" )
764800 doclist = str (Path (path ).read_text ()).splitlines ()
@@ -796,6 +832,14 @@ def parse_youtube_playlist(
796832 """
797833 Turn a DocDict that has `filetype==youtube_playlist` into the individual
798834 DocDict of each youtube video part of that playlist.
835+
836+ Args:
837+ cli_kwargs: Base CLI arguments to inherit
838+ path: The YouTube playlist URL
839+ **extra_args: Additional arguments to pass to each document
840+
841+ Returns:
842+ List of DocDict objects, each representing a YouTube video from the playlist
799843 """
800844 if "\\ " in path :
801845 logger .warning (f"Removed backslash found in '{ path } '" )
0 commit comments