@@ -600,6 +600,10 @@ def parse_recursive_paths(
600600 exclude : Optional [List [str ]] = None ,
601601 ** extra_args ,
602602) -> List [Union [DocDict , dict ]]:
603+ """
604+ Turn a DocDict that has `filetype==recursive_paths` into the DocDict of
605+ individual files in that path.
606+ """
603607 logger .info (f"Parsing recursive load_filetype: '{ path } '" )
604608 assert recursed_filetype not in [
605609 "recursive_paths" ,
@@ -676,6 +680,10 @@ def parse_json_entries(
676680 path : Union [str , Path ],
677681 ** extra_args ,
678682) -> List [Union [DocDict , dict ]]:
683+ """
684+ Turn a DocDict that has `filetype==json_entries` into the individual
685+ DocDict mentionned inside the json file.
686+ """
679687 logger .info (f"Loading json_entries: '{ path } '" )
680688 doclist = str (Path (path ).read_text ()).splitlines ()
681689 doclist = [p [1 :].strip () if p .startswith ("-" ) else p .strip () for p in doclist ]
@@ -708,6 +716,10 @@ def parse_toml_entries(
708716 path : Union [str , Path ],
709717 ** extra_args ,
710718) -> List [Union [DocDict , dict ]]:
719+ """
720+ Turn a DocDict that has `filetype==toml_entries` into the individual
721+ DocDict mentionned inside the toml file.
722+ """
711723 logger .info (f"Loading toml_entries: '{ path } '" )
712724 content = rtoml .load (toml = Path (path ))
713725 assert isinstance (content , dict )
@@ -741,6 +753,13 @@ def parse_link_file(
741753 path : Union [str , Path ],
742754 ** extra_args ,
743755) -> List [DocDict ]:
756+ """
757+ Turn a DocDict that has `filetype==link_file` into the individual
758+ DocDict of each url, where there is one url per line inside the
759+ `link_file` file. Note that bullet points are stripped (i.e. "- [the url]" is
760+ treated the same as "the url"), and commented lines (i.e. starting with "#")
761+ are ignored.
762+ """
744763 logger .info (f"Loading link_file: '{ path } '" )
745764 doclist = str (Path (path ).read_text ()).splitlines ()
746765 doclist = [p [1 :].strip () if p .startswith ("-" ) else p .strip () for p in doclist ]
@@ -774,6 +793,10 @@ def parse_youtube_playlist(
774793 path : Union [str , Path ],
775794 ** extra_args ,
776795) -> List [DocDict ]:
796+ """
797+ Turn a DocDict that has `filetype==youtube_playlist` into the individual
798+ DocDict of each youtube video part of that playlist.
799+ """
777800 if "\\ " in path :
778801 logger .warning (f"Removed backslash found in '{ path } '" )
779802 path = path .replace ("\\ " , "" )
@@ -819,7 +842,9 @@ def parse_ddg_search(
819842 ** extra_args ,
820843) -> List [DocDict ]:
821844 """
822- Perform a DuckDuckGo search and return URLs as documents to be processed.
845+ Turn a DocDict that has `filetype==ddg` into the individual
846+ DocDict of the webpage of each DuckDuckGo search result, treating the
847+ `path` as a search query.
823848
824849 Args:
825850 cli_kwargs: Base CLI arguments to inherit
0 commit comments