diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 892b8d6dfa0c8..eb0abe1829fc6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -164,3 +164,8 @@ repos: entry: python scripts/no_bool_in_generic.py language: python files: ^pandas/core/generic\.py$ + - id: pandas-errors-documented + name: Ensure pandas errors are documented in doc/source/reference/general_utility_functions.rst + entry: python scripts/pandas_errors_documented.py + language: python + files: ^pandas/errors/__init__.py$ diff --git a/doc/source/reference/general_utility_functions.rst b/doc/source/reference/general_utility_functions.rst index 37fe980dbf68c..ee17ef3831164 100644 --- a/doc/source/reference/general_utility_functions.rst +++ b/doc/source/reference/general_utility_functions.rst @@ -35,14 +35,17 @@ Exceptions and warnings .. autosummary:: :toctree: api/ + errors.AbstractMethodError errors.AccessorRegistrationWarning errors.DtypeWarning errors.DuplicateLabelError errors.EmptyDataError errors.InvalidIndexError + errors.IntCastingNaNError errors.MergeError errors.NullFrequencyError errors.NumbaUtilError + errors.OptionError errors.OutOfBoundsDatetime errors.OutOfBoundsTimedelta errors.ParserError diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 92516a1609f10..56eda37c8122e 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -14,7 +14,7 @@ class IntCastingNaNError(ValueError): """ - raised when attempting an astype operation on an array with NaN to an integer + Raised when attempting an astype operation on an array with NaN to an integer dtype. """ diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py new file mode 100644 index 0000000000000..4024cd24c558e --- /dev/null +++ b/scripts/pandas_errors_documented.py @@ -0,0 +1,52 @@ +""" +Check that doc/source/reference/general_utility_functions.rst documents +all exceptions and warnings in pandas/errors/__init__.py. + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run pandas-errors-documented --all-files +""" +from __future__ import annotations + +import argparse +import ast +import pathlib +import sys +from typing import Sequence + +API_PATH = pathlib.Path("doc/source/reference/general_utility_functions.rst").resolve() + + +def get_defined_errors(content: str) -> set[str]: + errors = set() + for node in ast.walk(ast.parse(content)): + if isinstance(node, ast.ClassDef): + errors.add(node.name) + elif isinstance(node, ast.ImportFrom): + for alias in node.names: + errors.add(alias.name) + return errors + + +def main(argv: Sequence[str] | None = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("path") + args = parser.parse_args(argv) + with open(args.path, encoding="utf-8") as f: + file_errors = get_defined_errors(f.read()) + with open(API_PATH) as f: + doc_errors = { + line.split(".")[1].strip() for line in f.readlines() if "errors" in line + } + missing = file_errors.difference(doc_errors) + if missing: + sys.stdout.write( + f"The follow exceptions and/or warnings are not documented " + f"in {API_PATH}: {missing}" + ) + sys.exit(1) + sys.exit(0) + + +if __name__ == "__main__": + main()