Skip to content

Commit c443608

Browse files
doc: add docstrings for recursive parsers
Signed-off-by: thiswillbeyourgithub <[email protected]>
1 parent 5bb889f commit c443608

File tree

1 file changed

+26
-1
lines changed

1 file changed

+26
-1
lines changed

wdoc/utils/batch_file_loader.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,10 @@ def parse_recursive_paths(
600600
exclude: Optional[List[str]] = None,
601601
**extra_args,
602602
) -> List[Union[DocDict, dict]]:
603+
"""
604+
Turn a DocDict that has `filetype==recursive_paths` into the DocDict of
605+
individual files in that path.
606+
"""
603607
logger.info(f"Parsing recursive load_filetype: '{path}'")
604608
assert recursed_filetype not in [
605609
"recursive_paths",
@@ -676,6 +680,10 @@ def parse_json_entries(
676680
path: Union[str, Path],
677681
**extra_args,
678682
) -> List[Union[DocDict, dict]]:
683+
"""
684+
Turn a DocDict that has `filetype==json_entries` into the individual
685+
DocDict mentionned inside the json file.
686+
"""
679687
logger.info(f"Loading json_entries: '{path}'")
680688
doclist = str(Path(path).read_text()).splitlines()
681689
doclist = [p[1:].strip() if p.startswith("-") else p.strip() for p in doclist]
@@ -708,6 +716,10 @@ def parse_toml_entries(
708716
path: Union[str, Path],
709717
**extra_args,
710718
) -> List[Union[DocDict, dict]]:
719+
"""
720+
Turn a DocDict that has `filetype==toml_entries` into the individual
721+
DocDict mentionned inside the toml file.
722+
"""
711723
logger.info(f"Loading toml_entries: '{path}'")
712724
content = rtoml.load(toml=Path(path))
713725
assert isinstance(content, dict)
@@ -741,6 +753,13 @@ def parse_link_file(
741753
path: Union[str, Path],
742754
**extra_args,
743755
) -> List[DocDict]:
756+
"""
757+
Turn a DocDict that has `filetype==link_file` into the individual
758+
DocDict of each url, where there is one url per line inside the
759+
`link_file` file. Note that bullet points are stripped (i.e. "- [the url]" is
760+
treated the same as "the url"), and commented lines (i.e. starting with "#")
761+
are ignored.
762+
"""
744763
logger.info(f"Loading link_file: '{path}'")
745764
doclist = str(Path(path).read_text()).splitlines()
746765
doclist = [p[1:].strip() if p.startswith("-") else p.strip() for p in doclist]
@@ -774,6 +793,10 @@ def parse_youtube_playlist(
774793
path: Union[str, Path],
775794
**extra_args,
776795
) -> List[DocDict]:
796+
"""
797+
Turn a DocDict that has `filetype==youtube_playlist` into the individual
798+
DocDict of each youtube video part of that playlist.
799+
"""
777800
if "\\" in path:
778801
logger.warning(f"Removed backslash found in '{path}'")
779802
path = path.replace("\\", "")
@@ -819,7 +842,9 @@ def parse_ddg_search(
819842
**extra_args,
820843
) -> List[DocDict]:
821844
"""
822-
Perform a DuckDuckGo search and return URLs as documents to be processed.
845+
Turn a DocDict that has `filetype==ddg` into the individual
846+
DocDict of the webpage of each DuckDuckGo search result, treating the
847+
`path` as a search query.
823848
824849
Args:
825850
cli_kwargs: Base CLI arguments to inherit

0 commit comments

Comments
 (0)