diff --git a/ci/code_checks.sh b/ci/code_checks.sh index a90774d2e8ff1..5be34eee69a91 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -311,8 +311,8 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA01, SA02, SA03, SA05)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA01,SA02,SA03,SA05 + MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA02, SA03, SA05)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03,SA05 RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/environment.yml b/environment.yml index e244350a0bea0..5f1184e921119 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,6 @@ dependencies: # documentation - gitpython # obtain contributors from git for whatsnew - sphinx - - numpydoc>=0.9.0 # documentation (jupyter notebooks) - nbconvert>=5.4.1 @@ -105,3 +104,4 @@ dependencies: - tabulate>=0.8.3 # DataFrame.to_markdown - pip: - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master + - git+https://github.com/numpy/numpydoc diff --git a/requirements-dev.txt b/requirements-dev.txt index f4f5fed82662c..c9e4b6a1e3b1e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -16,7 +16,6 @@ mypy==0.730 pycodestyle gitpython sphinx -numpydoc>=0.9.0 nbconvert>=5.4.1 nbsphinx pandoc @@ -70,4 +69,5 @@ sqlalchemy xarray pyreadstat tabulate>=0.8.3 -git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master \ No newline at end of file +git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master +git+https://github.com/numpy/numpydoc \ No newline at end of file diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index a1bccb1dd1629..b11de0c4ad860 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -1,819 +1,52 @@ -import functools import io -import random -import string import textwrap -import numpy as np import pytest import validate_docstrings -import pandas as pd -validate_one = validate_docstrings.validate_one - - -class GoodDocStrings: - """ - Collection of good doc strings. - - This class contains a lot of docstrings that should pass the validation - script without any errors. - """ - - def plot(self, kind, color="blue", **kwargs): - """ - Generate a plot. - - Render the data in the Series as a matplotlib plot of the - specified kind. - - Parameters - ---------- - kind : str - Kind of matplotlib plot. - color : str, default 'blue' - Color name or rgb code. - **kwargs - These parameters will be passed to the matplotlib plotting - function. - """ - pass - - def swap(self, arr, i, j, *args, **kwargs): - """ - Swap two indicies on an array. - - Parameters - ---------- - arr : list - The list having indexes swapped. - i, j : int - The indexes being swapped. - *args, **kwargs - Extraneous parameters are being permitted. - """ - pass - - def sample(self): - """ - Generate and return a random number. - - The value is sampled from a continuous uniform distribution between - 0 and 1. - - Returns - ------- - float - Random number generated. - """ - return random.random() - - @functools.lru_cache(None) - def decorated_sample(self, max): - """ - Generate and return a random integer between 0 and max. - - Parameters - ---------- - max : int - The maximum value of the random number. - - Returns - ------- - int - Random number generated. - """ - return random.randint(0, max) - - def random_letters(self): - """ - Generate and return a sequence of random letters. - - The length of the returned string is also random, and is also - returned. - - Returns - ------- - length : int - Length of the returned string. - letters : str - String of random letters. - """ - length = random.randint(1, 10) - letters = "".join(random.sample(string.ascii_lowercase, length)) - return length, letters - - def sample_values(self): - """ - Generate an infinite sequence of random numbers. - - The values are sampled from a continuous uniform distribution between - 0 and 1. - - Yields - ------ - float - Random number generated. - """ - while True: - yield random.random() - - def head(self): - """ - Return the first 5 elements of the Series. - - This function is mainly useful to preview the values of the - Series without displaying the whole of it. - - Returns - ------- - Series - Subset of the original series with the 5 first values. - - See Also - -------- - Series.tail : Return the last 5 elements of the Series. - Series.iloc : Return a slice of the elements in the Series, - which can also be used to return the first or last n. - """ - return self.iloc[:5] - - def head1(self, n=5): - """ - Return the first elements of the Series. - - This function is mainly useful to preview the values of the - Series without displaying the whole of it. - - Parameters - ---------- - n : int - Number of values to return. - - Returns - ------- - Series - Subset of the original series with the n first values. - - See Also - -------- - tail : Return the last n elements of the Series. - - Examples - -------- - >>> s = pd.Series(['Ant', 'Bear', 'Cow', 'Dog', 'Falcon']) - >>> s.head() - 0 Ant - 1 Bear - 2 Cow - 3 Dog - 4 Falcon - dtype: object - - With the `n` parameter, we can change the number of returned rows: - - >>> s.head(n=3) - 0 Ant - 1 Bear - 2 Cow - dtype: object - """ - return self.iloc[:n] - - def contains(self, pat, case=True, na=np.nan): - """ - Return whether each value contains `pat`. - - In this case, we are illustrating how to use sections, even - if the example is simple enough and does not require them. - - Parameters - ---------- - pat : str - Pattern to check for within each element. - case : bool, default True - Whether check should be done with case sensitivity. - na : object, default np.nan - Fill value for missing data. - - Examples - -------- - >>> s = pd.Series(['Antelope', 'Lion', 'Zebra', np.nan]) - >>> s.str.contains(pat='a') - 0 False - 1 False - 2 True - 3 NaN - dtype: object - - **Case sensitivity** - - With `case_sensitive` set to `False` we can match `a` with both - `a` and `A`: - - >>> s.str.contains(pat='a', case=False) - 0 True - 1 False - 2 True - 3 NaN - dtype: object - - **Missing values** - - We can fill missing values in the output using the `na` parameter: - - >>> s.str.contains(pat='a', na=False) - 0 False - 1 False - 2 True - 3 False - dtype: bool - """ - pass - - def mode(self, axis, numeric_only): - """ - Ensure reST directives don't affect checks for leading periods. - - Parameters - ---------- - axis : str - Sentence ending in period, followed by single directive. - - .. versionchanged:: 0.1.2 - - numeric_only : bool - Sentence ending in period, followed by multiple directives. - - .. versionadded:: 0.1.2 - .. deprecated:: 0.00.0 - A multiline description, - which spans another line. - """ - pass - - def good_imports(self): - """ - Ensure import other than numpy and pandas are fine. - - Examples - -------- - This example does not import pandas or import numpy. - >>> import datetime - >>> datetime.MAXYEAR - 9999 - """ - pass - - def no_returns(self): - """ - Say hello and have no returns. - """ - pass - - def empty_returns(self): - """ - Say hello and always return None. - - Since this function never returns a value, this - docstring doesn't need a return section. - """ - - def say_hello(): - return "Hello World!" - - say_hello() - if True: - return - else: - return None - - def multiple_variables_on_one_line(self, matrix, a, b, i, j): - """ - Swap two values in a matrix. - - Parameters - ---------- - matrix : list of list - A double list that represents a matrix. - a, b : int - The indicies of the first value. - i, j : int - The indicies of the second value. - """ - pass - - -class BadGenericDocStrings: - """Everything here has a bad docstring - """ - - def func(self): - - """Some function. - - With several mistakes in the docstring. - - It has a blank like after the signature `def func():`. - - The text 'Some function' should go in the line after the - opening quotes of the docstring, not in the same line. - - There is a blank line between the docstring and the first line - of code `foo = 1`. - - The closing quotes should be in the next line, not in this one.""" - - foo = 1 - bar = 2 - return foo + bar - - def astype(self, dtype): - """ - Casts Series type. - - Verb in third-person of the present simple, should be infinitive. - """ - pass - - def astype1(self, dtype): - """ - Method to cast Series type. - - Does not start with verb. - """ - pass - - def astype2(self, dtype): - """ - Cast Series type - - Missing dot at the end. - """ - pass - - def astype3(self, dtype): - """ - Cast Series type from its current type to the new type defined in - the parameter dtype. - - Summary is too verbose and doesn't fit in a single line. - """ - pass - - def two_linebreaks_between_sections(self, foo): - """ - Test linebreaks message GL03. - - Note 2 blank lines before parameters section. - - - Parameters - ---------- - foo : str - Description of foo parameter. - """ - pass - - def linebreak_at_end_of_docstring(self, foo): - """ - Test linebreaks message GL03. - - Note extra blank line at end of docstring. - - Parameters - ---------- - foo : str - Description of foo parameter. - - """ - pass - - def plot(self, kind, **kwargs): - """ - Generate a plot. - - Render the data in the Series as a matplotlib plot of the - specified kind. - - Note the blank line between the parameters title and the first - parameter. Also, note that after the name of the parameter `kind` - and before the colon, a space is missing. - - Also, note that the parameter descriptions do not start with a - capital letter, and do not finish with a dot. - - Finally, the `**kwargs` parameter is missing. - - Parameters - ---------- - - kind: str - kind of matplotlib plot - """ - pass - - def method(self, foo=None, bar=None): - """ - A sample DataFrame method. - - Do not import numpy and pandas. - - Try to use meaningful data, when it makes the example easier - to understand. - - Try to avoid positional arguments like in `df.method(1)`. They - can be alright if previously defined with a meaningful name, - like in `present_value(interest_rate)`, but avoid them otherwise. - - When presenting the behavior with different parameters, do not place - all the calls one next to the other. Instead, add a short sentence - explaining what the example shows. - - Examples - -------- - >>> import numpy as np - >>> import pandas as pd - >>> df = pd.DataFrame(np.ones((3, 3)), - ... columns=('a', 'b', 'c')) - >>> df.all(1) - 0 True - 1 True - 2 True - dtype: bool - >>> df.all(bool_only=True) - Series([], dtype: bool) - """ - pass - - def private_classes(self): - """ - This mentions NDFrame, which is not correct. - """ - - def unknown_section(self): - """ - This section has an unknown section title. - - Unknown Section - --------------- - This should raise an error in the validation. - """ - - def sections_in_wrong_order(self): - """ - This docstring has the sections in the wrong order. - - Parameters - ---------- - name : str - This section is in the right position. - - Examples - -------- - >>> print('So far Examples is good, as it goes before Parameters') - So far Examples is good, as it goes before Parameters - - See Also - -------- - function : This should generate an error, as See Also needs to go - before Examples. - """ - - def deprecation_in_wrong_order(self): - """ - This docstring has the deprecation warning in the wrong order. - - This is the extended summary. The correct order should be - summary, deprecation warning, extended summary. - - .. deprecated:: 1.0 - This should generate an error as it needs to go before - extended summary. - """ - - def method_wo_docstrings(self): - pass - - def directives_without_two_colons(self, first, second): - """ - Ensure reST directives have trailing colons. - - Parameters - ---------- - first : str - Sentence ending in period, followed by single directive w/o colons. - - .. versionchanged 0.1.2 - - second : bool - Sentence ending in period, followed by multiple directives w/o - colons. - - .. versionadded 0.1.2 - .. deprecated 0.00.0 - - """ - pass - - -class BadSummaries: - def wrong_line(self): - """Exists on the wrong line""" - pass - - def no_punctuation(self): - """ - Has the right line but forgets punctuation - """ - pass - - def no_capitalization(self): - """ - provides a lowercase summary. - """ - pass - - def no_infinitive(self): - """ - Started with a verb that is not infinitive. - """ - - def multi_line(self): - """ - Extends beyond one line - which is not correct. - """ - - def two_paragraph_multi_line(self): - """ - Extends beyond one line - which is not correct. - - Extends beyond one line, which in itself is correct but the - previous short summary should still be an issue. - """ - - -class BadParameters: - """ - Everything here has a problem with its Parameters section. - """ - - def missing_params(self, kind, **kwargs): - """ - Lacks kwargs in Parameters. - - Parameters - ---------- - kind : str - Foo bar baz. - """ - - def bad_colon_spacing(self, kind): - """ - Has bad spacing in the type line. - - Parameters - ---------- - kind: str - Needs a space after kind. - """ - - def no_description_period(self, kind): - """ - Forgets to add a period to the description. - - Parameters - ---------- - kind : str - Doesn't end with a dot - """ - - def no_description_period_with_directive(self, kind): - """ - Forgets to add a period, and also includes a directive. - - Parameters - ---------- - kind : str - Doesn't end with a dot - - .. versionadded:: 0.00.0 - """ - - def no_description_period_with_directives(self, kind): - """ - Forgets to add a period, and also includes multiple directives. - - Parameters - ---------- - kind : str - Doesn't end with a dot - - .. versionchanged:: 0.00.0 - .. deprecated:: 0.00.0 - """ - - def parameter_capitalization(self, kind): - """ - Forgets to capitalize the description. - - Parameters - ---------- - kind : str - this is not capitalized. - """ - - def blank_lines(self, kind): - """ - Adds a blank line after the section header. - - Parameters - ---------- - - kind : str - Foo bar baz. - """ - pass - - def integer_parameter(self, kind): - """ - Uses integer instead of int. - - Parameters - ---------- - kind : integer - Foo bar baz. - """ - pass - - def string_parameter(self, kind): - """ - Uses string instead of str. - - Parameters - ---------- - kind : string - Foo bar baz. - """ - pass - - def boolean_parameter(self, kind): - """ - Uses boolean instead of bool. - - Parameters - ---------- - kind : boolean - Foo bar baz. - """ - pass - - def list_incorrect_parameter_type(self, kind): - """ - Uses list of boolean instead of list of bool. - - Parameters - ---------- - kind : list of boolean, integer, float or string - Foo bar baz. - """ - pass - - def bad_parameter_spacing(self, a, b): - """ - The parameters on the same line have an extra space between them. - - Parameters - ---------- - a, b : int - Foo bar baz. - """ - pass - - -class BadReturns: - def return_not_documented(self): - """ - Lacks section for Returns - """ - return "Hello world!" - - def yield_not_documented(self): - """ - Lacks section for Yields - """ - yield "Hello world!" - - def no_type(self): - """ - Returns documented but without type. - - Returns - ------- - Some value. - """ - return "Hello world!" - - def no_description(self): - """ - Provides type but no description. - - Returns - ------- - str - """ - return "Hello world!" - - def no_punctuation(self): - """ - Provides type and description but no period. - - Returns - ------- - str - A nice greeting - """ - return "Hello world!" - - def named_single_return(self): - """ - Provides name but returns only one value. - - Returns - ------- - s : str - A nice greeting. - """ - return "Hello world!" - - def no_capitalization(self): - """ - Forgets capitalization in return values description. - - Returns - ------- - foo : str - The first returned string. - bar : str - the second returned string. - """ - return "Hello", "World!" +class BadDocstrings: + """Everything here has a bad docstring + """ - def no_period_multi(self): + def private_classes(self): """ - Forgets period in return values description. - - Returns - ------- - foo : str - The first returned string - bar : str - The second returned string. + This mentions NDFrame, which is not correct. """ - return "Hello", "World!" - -class BadSeeAlso: - def desc_no_period(self): + def prefix_pandas(self): """ - Return the first 5 elements of the Series. + Have `pandas` prefix in See Also section. See Also -------- - Series.tail : Return the last 5 elements of the Series. - Series.iloc : Return a slice of the elements in the Series, - which can also be used to return the first or last n + pandas.Series.rename : Alter Series index labels or name. + DataFrame.head : The first `n` rows of the caller object. """ pass - def desc_first_letter_lowercase(self): - """ - Return the first 5 elements of the Series. - - See Also - -------- - Series.tail : return the last 5 elements of the Series. - Series.iloc : Return a slice of the elements in the Series, - which can also be used to return the first or last n. + def redundant_import(self, foo=None, bar=None): """ - pass + A sample DataFrame method. - def prefix_pandas(self): - """ - Have `pandas` prefix in See Also section. + Should not import numpy and pandas. - See Also + Examples -------- - pandas.Series.rename : Alter Series index labels or name. - DataFrame.head : The first `n` rows of the caller object. + >>> import numpy as np + >>> import pandas as pd + >>> df = pd.DataFrame(np.ones((3, 3)), + ... columns=('a', 'b', 'c')) + >>> df.all(1) + 0 True + 1 True + 2 True + dtype: bool + >>> df.all(bool_only=True) + Series([], dtype: bool) """ pass - -class BadExamples: def unused_import(self): """ Examples @@ -877,59 +110,9 @@ def _import_path(self, klass=None, func=None): return base_path - def test_good_class(self, capsys): - errors = validate_one(self._import_path(klass="GoodDocStrings"))["errors"] - assert isinstance(errors, list) - assert not errors - - @pytest.mark.parametrize( - "func", - [ - "plot", - "swap", - "sample", - "decorated_sample", - "random_letters", - "sample_values", - "head", - "head1", - "contains", - "mode", - "good_imports", - "no_returns", - "empty_returns", - "multiple_variables_on_one_line", - ], - ) - def test_good_functions(self, capsys, func): - errors = validate_one(self._import_path(klass="GoodDocStrings", func=func))[ - "errors" - ] - assert isinstance(errors, list) - assert not errors - def test_bad_class(self, capsys): - errors = validate_one(self._import_path(klass="BadGenericDocStrings"))["errors"] - assert isinstance(errors, list) - assert errors - - @pytest.mark.parametrize( - "func", - [ - "func", - "astype", - "astype1", - "astype2", - "astype3", - "plot", - "method", - "private_classes", - "directives_without_two_colons", - ], - ) - def test_bad_generic_functions(self, capsys, func): - errors = validate_one( - self._import_path(klass="BadGenericDocStrings", func=func) # noqa:F821 + errors = validate_docstrings.pandas_validate( + self._import_path(klass="BadDocstrings") )["errors"] assert isinstance(errors, list) assert errors @@ -937,9 +120,8 @@ def test_bad_generic_functions(self, capsys, func): @pytest.mark.parametrize( "klass,func,msgs", [ - # See Also tests ( - "BadGenericDocStrings", + "BadDocstrings", "private_classes", ( "Private classes (NDFrame) should not be mentioned in public " @@ -947,200 +129,31 @@ def test_bad_generic_functions(self, capsys, func): ), ), ( - "BadGenericDocStrings", - "unknown_section", - ('Found unknown section "Unknown Section".',), - ), - ( - "BadGenericDocStrings", - "sections_in_wrong_order", - ( - "Sections are in the wrong order. Correct order is: Parameters, " - "See Also, Examples", - ), - ), - ( - "BadGenericDocStrings", - "deprecation_in_wrong_order", - ("Deprecation warning should precede extended summary",), - ), - ( - "BadGenericDocStrings", - "directives_without_two_colons", - ( - "reST directives ['versionchanged', 'versionadded', " - "'deprecated'] must be followed by two colons", - ), - ), - ( - "BadSeeAlso", - "desc_no_period", - ('Missing period at end of description for See Also "Series.iloc"',), - ), - ( - "BadSeeAlso", - "desc_first_letter_lowercase", - ('should be capitalized for See Also "Series.tail"',), - ), - # Summary tests - ( - "BadSummaries", - "wrong_line", - ("should start in the line immediately after the opening quotes",), - ), - ("BadSummaries", "no_punctuation", ("Summary does not end with a period",)), - ( - "BadSummaries", - "no_capitalization", - ("Summary does not start with a capital letter",), - ), - ( - "BadSummaries", - "no_capitalization", - ("Summary must start with infinitive verb",), - ), - ("BadSummaries", "multi_line", ("Summary should fit in a single line",)), - ( - "BadSummaries", - "two_paragraph_multi_line", - ("Summary should fit in a single line",), - ), - # Parameters tests - ( - "BadParameters", - "missing_params", - ("Parameters {**kwargs} not documented",), - ), - ( - "BadParameters", - "bad_colon_spacing", - ( - 'Parameter "kind" requires a space before the colon ' - "separating the parameter name and type", - ), - ), - ( - "BadParameters", - "no_description_period", - ('Parameter "kind" description should finish with "."',), - ), - ( - "BadParameters", - "no_description_period_with_directive", - ('Parameter "kind" description should finish with "."',), - ), - ( - "BadParameters", - "parameter_capitalization", - ('Parameter "kind" description should start with a capital letter',), - ), - ( - "BadParameters", - "integer_parameter", - ('Parameter "kind" type should use "int" instead of "integer"',), - ), - ( - "BadParameters", - "string_parameter", - ('Parameter "kind" type should use "str" instead of "string"',), - ), - ( - "BadParameters", - "boolean_parameter", - ('Parameter "kind" type should use "bool" instead of "boolean"',), - ), - ( - "BadParameters", - "list_incorrect_parameter_type", - ('Parameter "kind" type should use "bool" instead of "boolean"',), - ), - ( - "BadParameters", - "list_incorrect_parameter_type", - ('Parameter "kind" type should use "int" instead of "integer"',), - ), - ( - "BadParameters", - "list_incorrect_parameter_type", - ('Parameter "kind" type should use "str" instead of "string"',), - ), - ( - "BadParameters", - "bad_parameter_spacing", - ("Parameters {b} not documented", "Unknown parameters { b}"), - ), - pytest.param( - "BadParameters", - "blank_lines", - ("No error yet?",), - marks=pytest.mark.xfail, - ), - # Returns tests - ("BadReturns", "return_not_documented", ("No Returns section found",)), - ("BadReturns", "yield_not_documented", ("No Yields section found",)), - pytest.param("BadReturns", "no_type", ("foo",), marks=pytest.mark.xfail), - ("BadReturns", "no_description", ("Return value has no description",)), - ( - "BadReturns", - "no_punctuation", - ('Return value description should finish with "."',), - ), - ( - "BadReturns", - "named_single_return", + "BadDocstrings", + "prefix_pandas", ( - "The first line of the Returns section should contain only the " - "type, unless multiple values are being returned", + "pandas.Series.rename in `See Also` section " + "does not need `pandas` prefix", ), ), - ( - "BadReturns", - "no_capitalization", - ("Return value description should start with a capital letter",), - ), - ( - "BadReturns", - "no_period_multi", - ('Return value description should finish with "."',), - ), # Examples tests ( - "BadGenericDocStrings", - "method", + "BadDocstrings", + "redundant_import", ("Do not import numpy, as it is imported automatically",), ), ( - "BadGenericDocStrings", - "method", + "BadDocstrings", + "redundant_import", ("Do not import pandas, as it is imported automatically",), ), ( - "BadGenericDocStrings", - "method_wo_docstrings", - ("The object does not have a docstring",), - ), - # See Also tests - ( - "BadSeeAlso", - "prefix_pandas", - ( - "pandas.Series.rename in `See Also` section " - "does not need `pandas` prefix", - ), - ), - # Examples tests - ( - "BadExamples", + "BadDocstrings", "unused_import", ("flake8 error: F401 'pandas as pdf' imported but unused",), ), ( - "BadExamples", - "indentation_is_not_a_multiple_of_four", - ("flake8 error: E111 indentation is not a multiple of four",), - ), - ( - "BadExamples", + "BadDocstrings", "missing_whitespace_around_arithmetic_operator", ( "flake8 error: " @@ -1148,39 +161,28 @@ def test_bad_generic_functions(self, capsys, func): ), ), ( - "BadExamples", - "missing_whitespace_after_comma", - ("flake8 error: E231 missing whitespace after ',' (3 times)",), - ), - ( - "BadGenericDocStrings", - "two_linebreaks_between_sections", - ( - "Double line break found; please use only one blank line to " - "separate sections or paragraphs, and do not leave blank lines " - "at the end of docstrings", - ), + "BadDocstrings", + "indentation_is_not_a_multiple_of_four", + ("flake8 error: E111 indentation is not a multiple of four",), ), ( - "BadGenericDocStrings", - "linebreak_at_end_of_docstring", - ( - "Double line break found; please use only one blank line to " - "separate sections or paragraphs, and do not leave blank lines " - "at the end of docstrings", - ), + "BadDocstrings", + "missing_whitespace_after_comma", + ("flake8 error: E231 missing whitespace after ',' (3 times)",), ), ], ) def test_bad_docstrings(self, capsys, klass, func, msgs): - result = validate_one(self._import_path(klass=klass, func=func)) + result = validate_docstrings.pandas_validate( + self._import_path(klass=klass, func=func) + ) for msg in msgs: assert msg in " ".join(err[1] for err in result["errors"]) def test_validate_all_ignore_deprecated(self, monkeypatch): monkeypatch.setattr( validate_docstrings, - "validate_one", + "pandas_validate", lambda func_name: { "docstring": "docstring1", "errors": [ @@ -1285,50 +287,22 @@ def test_item_subsection(self, idx, subsection): assert result[idx][3] == subsection -class TestDocstringClass: - @pytest.mark.parametrize( - "name, expected_obj", - [ - ("pandas.isnull", pd.isnull), - ("pandas.DataFrame", pd.DataFrame), - ("pandas.Series.sum", pd.Series.sum), - ], - ) - def test_resolves_class_name(self, name, expected_obj): - d = validate_docstrings.Docstring(name) - assert d.obj is expected_obj - - @pytest.mark.parametrize("invalid_name", ["panda", "panda.DataFrame"]) - def test_raises_for_invalid_module_name(self, invalid_name): - msg = f'No module can be imported from "{invalid_name}"' - with pytest.raises(ImportError, match=msg): - validate_docstrings.Docstring(invalid_name) - - @pytest.mark.parametrize( - "invalid_name", ["pandas.BadClassName", "pandas.Series.bad_method_name"] - ) - def test_raises_for_invalid_attribute_name(self, invalid_name): - name_components = invalid_name.split(".") - obj_name, invalid_attr_name = name_components[-2], name_components[-1] - msg = f"'{obj_name}' has no attribute '{invalid_attr_name}'" - with pytest.raises(AttributeError, match=msg): - validate_docstrings.Docstring(invalid_name) - +class TestPandasDocstringClass: @pytest.mark.parametrize( "name", ["pandas.Series.str.isdecimal", "pandas.Series.str.islower"] ) def test_encode_content_write_to_file(self, name): # GH25466 - docstr = validate_docstrings.Docstring(name).validate_pep8() + docstr = validate_docstrings.PandasDocstring(name).validate_pep8() # the list of pep8 errors should be empty assert not list(docstr) class TestMainFunction: - def test_exit_status_for_validate_one(self, monkeypatch): + def test_exit_status_for_main(self, monkeypatch): monkeypatch.setattr( validate_docstrings, - "validate_one", + "pandas_validate", lambda func_name: { "docstring": "docstring1", "errors": [ @@ -1336,8 +310,7 @@ def test_exit_status_for_validate_one(self, monkeypatch): ("ER02", "err desc"), ("ER03", "err desc"), ], - "warnings": [], - "examples_errors": "", + "examples_errs": "", }, ) exit_status = validate_docstrings.main( diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index bcf3fd5d276f5..079e9a16cfd13 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -14,19 +14,14 @@ $ ./validate_docstrings.py pandas.DataFrame.head """ import argparse -import ast import doctest -import functools import glob import importlib -import inspect import json import os -import pydoc -import re import sys import tempfile -import textwrap +from typing import List, Optional import flake8.main.application @@ -52,87 +47,15 @@ import pandas # noqa: E402 isort:skip sys.path.insert(1, os.path.join(BASE_PATH, "doc", "sphinxext")) -from numpydoc.docscrape import NumpyDocString # noqa: E402 isort:skip -from pandas.io.formats.printing import pprint_thing # noqa: E402 isort:skip +from numpydoc.validate import validate, Docstring # noqa: E402 isort:skip PRIVATE_CLASSES = ["NDFrame", "IndexOpsMixin"] -DIRECTIVES = ["versionadded", "versionchanged", "deprecated"] -DIRECTIVE_PATTERN = re.compile(rf"^\s*\.\. ({'|'.join(DIRECTIVES)})(?!::)", re.I | re.M) -ALLOWED_SECTIONS = [ - "Parameters", - "Attributes", - "Methods", - "Returns", - "Yields", - "Other Parameters", - "Raises", - "Warns", - "See Also", - "Notes", - "References", - "Examples", -] ERROR_MSGS = { - "GL01": "Docstring text (summary) should start in the line immediately " - "after the opening quotes (not in the same line, or leaving a " - "blank line in between)", - "GL02": "Closing quotes should be placed in the line after the last text " - "in the docstring (do not close the quotes in the same line as " - "the text, or leave a blank line between the last text and the " - "quotes)", - "GL03": "Double line break found; please use only one blank line to " - "separate sections or paragraphs, and do not leave blank lines " - "at the end of docstrings", "GL04": "Private classes ({mentioned_private_classes}) should not be " "mentioned in public docstrings", - "GL05": 'Tabs found at the start of line "{line_with_tabs}", please use ' - "whitespace only", - "GL06": 'Found unknown section "{section}". Allowed sections are: ' - "{allowed_sections}", - "GL07": "Sections are in the wrong order. Correct order is: {correct_sections}", - "GL08": "The object does not have a docstring", - "GL09": "Deprecation warning should precede extended summary", - "GL10": "reST directives {directives} must be followed by two colons", - "SS01": "No summary found (a short summary in a single line should be " - "present at the beginning of the docstring)", - "SS02": "Summary does not start with a capital letter", - "SS03": "Summary does not end with a period", - "SS04": "Summary contains heading whitespaces", - "SS05": "Summary must start with infinitive verb, not third person " - '(e.g. use "Generate" instead of "Generates")', - "SS06": "Summary should fit in a single line", - "ES01": "No extended summary found", - "PR01": "Parameters {missing_params} not documented", - "PR02": "Unknown parameters {unknown_params}", - "PR03": "Wrong parameters order. Actual: {actual_params}. " - "Documented: {documented_params}", - "PR04": 'Parameter "{param_name}" has no type', - "PR05": 'Parameter "{param_name}" type should not finish with "."', - "PR06": 'Parameter "{param_name}" type should use "{right_type}" instead ' - 'of "{wrong_type}"', - "PR07": 'Parameter "{param_name}" has no description', - "PR08": 'Parameter "{param_name}" description should start with a ' - "capital letter", - "PR09": 'Parameter "{param_name}" description should finish with "."', - "PR10": 'Parameter "{param_name}" requires a space before the colon ' - "separating the parameter name and type", - "RT01": "No Returns section found", - "RT02": "The first line of the Returns section should contain only the " - "type, unless multiple values are being returned", - "RT03": "Return value has no description", - "RT04": "Return value description should start with a capital letter", - "RT05": 'Return value description should finish with "."', - "YD01": "No Yields section found", - "SA01": "See Also section not found", - "SA02": "Missing period at end of description for See Also " - '"{reference_name}" reference', - "SA03": "Description should be capitalized for See Also " - '"{reference_name}" reference', - "SA04": 'Missing description for See Also "{reference_name}" reference', "SA05": "{reference_name} in `See Also` section does not need `pandas` " "prefix, use {right_reference} instead.", - "EX01": "No examples section found", "EX02": "Examples do not pass tests:\n{doctest_log}", "EX03": "flake8 error: {error_code} {error_message}{times_happening}", "EX04": "Do not import {imported_library}, as it is imported " @@ -140,29 +63,10 @@ } -def error(code, **kwargs): +def pandas_error(code, **kwargs): """ - Return a tuple with the error code and the message with variables replaced. - - This is syntactic sugar so instead of: - - `('EX02', ERROR_MSGS['EX02'].format(doctest_log=log))` - - We can simply use: - - `error('EX02', doctest_log=log)` - - Parameters - ---------- - code : str - Error code. - **kwargs - Values for the variables in the error messages - - Returns - ------- - code : str - Error code. - message : str - Error message with variables replaced. + Copy of the numpydoc error function, since ERROR_MSGS can't be updated + with our custom errors yet. """ return (code, ERROR_MSGS[code].format(**kwargs)) @@ -239,347 +143,7 @@ def get_api_items(api_doc_fd): previous_line = line -class Docstring: - def __init__(self, name): - self.name = name - obj = self._load_obj(name) - self.obj = obj - self.code_obj = self._to_original_callable(obj) - self.raw_doc = obj.__doc__ or "" - self.clean_doc = pydoc.getdoc(obj) - self.doc = NumpyDocString(self.clean_doc) - - def __len__(self) -> int: - return len(self.raw_doc) - - @staticmethod - def _load_obj(name): - """ - Import Python object from its name as string. - - Parameters - ---------- - name : str - Object name to import (e.g. pandas.Series.str.upper) - - Returns - ------- - object - Python object that can be a class, method, function... - - Examples - -------- - >>> Docstring._load_obj('pandas.Series') - - """ - for maxsplit in range(1, name.count(".") + 1): - # TODO when py3 only replace by: module, *func_parts = ... - func_name_split = name.rsplit(".", maxsplit) - module = func_name_split[0] - func_parts = func_name_split[1:] - try: - obj = importlib.import_module(module) - except ImportError: - pass - else: - continue - - if "obj" not in locals(): - raise ImportError(f'No module can be imported from "{name}"') - - for part in func_parts: - obj = getattr(obj, part) - return obj - - @staticmethod - def _to_original_callable(obj): - """ - Find the Python object that contains the source code of the object. - - This is useful to find the place in the source code (file and line - number) where a docstring is defined. It does not currently work for - all cases, but it should help find some (properties...). - """ - while True: - if inspect.isfunction(obj) or inspect.isclass(obj): - f = inspect.getfile(obj) - if f.startswith("<") and f.endswith(">"): - return None - return obj - if inspect.ismethod(obj): - obj = obj.__func__ - elif isinstance(obj, functools.partial): - obj = obj.func - elif isinstance(obj, property): - obj = obj.fget - else: - return None - - @property - def type(self): - return type(self.obj).__name__ - - @property - def is_function_or_method(self): - # TODO(py27): remove ismethod - return inspect.isfunction(self.obj) or inspect.ismethod(self.obj) - - @property - def source_file_name(self): - """ - File name where the object is implemented (e.g. pandas/core/frame.py). - """ - try: - fname = inspect.getsourcefile(self.code_obj) - except TypeError: - # In some cases the object is something complex like a cython - # object that can't be easily introspected. An it's better to - # return the source code file of the object as None, than crash - pass - else: - if fname: - fname = os.path.relpath(fname, BASE_PATH) - return fname - - @property - def source_file_def_line(self): - """ - Number of line where the object is defined in its file. - """ - try: - return inspect.getsourcelines(self.code_obj)[-1] - except (OSError, TypeError): - # In some cases the object is something complex like a cython - # object that can't be easily introspected. An it's better to - # return the line number as None, than crash - pass - - @property - def github_url(self): - url = "https://github.com/pandas-dev/pandas/blob/master/" - url += f"{self.source_file_name}#L{self.source_file_def_line}" - return url - - @property - def start_blank_lines(self): - i = None - if self.raw_doc: - for i, row in enumerate(self.raw_doc.split("\n")): - if row.strip(): - break - return i - - @property - def end_blank_lines(self): - i = None - if self.raw_doc: - for i, row in enumerate(reversed(self.raw_doc.split("\n"))): - if row.strip(): - break - return i - - @property - def double_blank_lines(self): - prev = True - for row in self.raw_doc.split("\n"): - if not prev and not row.strip(): - return True - prev = row.strip() - return False - - @property - def section_titles(self): - sections = [] - self.doc._doc.reset() - while not self.doc._doc.eof(): - content = self.doc._read_to_next_section() - if ( - len(content) > 1 - and len(content[0]) == len(content[1]) - and set(content[1]) == {"-"} - ): - sections.append(content[0]) - return sections - - @property - def summary(self): - return " ".join(self.doc["Summary"]) - - @property - def num_summary_lines(self): - return len(self.doc["Summary"]) - - @property - def extended_summary(self): - if not self.doc["Extended Summary"] and len(self.doc["Summary"]) > 1: - return " ".join(self.doc["Summary"]) - return " ".join(self.doc["Extended Summary"]) - - @property - def needs_summary(self): - return not (bool(self.summary) and bool(self.extended_summary)) - - @property - def doc_parameters(self): - parameters = {} - for names, type_, desc in self.doc["Parameters"]: - for name in names.split(", "): - parameters[name] = (type_, "".join(desc)) - return parameters - - @property - def signature_parameters(self): - def add_stars(param_name: str, info: inspect.Parameter): - """ - Add stars to *args and **kwargs parameters - """ - if info.kind == inspect.Parameter.VAR_POSITIONAL: - return f"*{param_name}" - elif info.kind == inspect.Parameter.VAR_KEYWORD: - return f"**{param_name}" - else: - return param_name - - if inspect.isclass(self.obj): - if hasattr(self.obj, "_accessors") and ( - self.name.split(".")[-1] in self.obj._accessors - ): - # accessor classes have a signature but don't want to show this - return tuple() - try: - sig = inspect.signature(self.obj) - except (TypeError, ValueError): - # Some objects, mainly in C extensions do not support introspection - # of the signature - return tuple() - - params = tuple( - add_stars(parameter, sig.parameters[parameter]) - for parameter in sig.parameters - ) - if params and params[0] in ("self", "cls"): - return params[1:] - return params - - @property - def parameter_mismatches(self): - errs = [] - signature_params = self.signature_parameters - doc_params = tuple(self.doc_parameters) - missing = set(signature_params) - set(doc_params) - if missing: - errs.append(error("PR01", missing_params=pprint_thing(missing))) - extra = set(doc_params) - set(signature_params) - if extra: - errs.append(error("PR02", unknown_params=pprint_thing(extra))) - if ( - not missing - and not extra - and signature_params != doc_params - and not (not signature_params and not doc_params) - ): - errs.append( - error( - "PR03", actual_params=signature_params, documented_params=doc_params - ) - ) - - return errs - - @property - def correct_parameters(self): - return not bool(self.parameter_mismatches) - - @property - def directives_without_two_colons(self): - return DIRECTIVE_PATTERN.findall(self.raw_doc) - - def parameter_type(self, param): - return self.doc_parameters[param][0] - - def parameter_desc(self, param): - desc = self.doc_parameters[param][1] - # Find and strip out any sphinx directives - for directive in DIRECTIVES: - full_directive = f".. {directive}" - if full_directive in desc: - # Only retain any description before the directive - desc = desc[: desc.index(full_directive)] - return desc - - @property - def see_also(self): - result = {} - for funcs, desc in self.doc["See Also"]: - for func, _ in funcs: - result[func] = "".join(desc) - - return result - - @property - def examples(self): - return self.doc["Examples"] - - @property - def returns(self): - return self.doc["Returns"] - - @property - def yields(self): - return self.doc["Yields"] - - @property - def method_source(self): - try: - source = inspect.getsource(self.obj) - except TypeError: - return "" - return textwrap.dedent(source) - - @property - def method_returns_something(self): - """ - Check if the docstrings method can return something. - - Bare returns, returns valued None and returns from nested functions are - disconsidered. - - Returns - ------- - bool - Whether the docstrings method can return something. - """ - - def get_returns_not_on_nested_functions(node): - returns = [node] if isinstance(node, ast.Return) else [] - for child in ast.iter_child_nodes(node): - # Ignore nested functions and its subtrees. - if not isinstance(child, ast.FunctionDef): - child_returns = get_returns_not_on_nested_functions(child) - returns.extend(child_returns) - return returns - - tree = ast.parse(self.method_source).body - if tree: - returns = get_returns_not_on_nested_functions(tree[0]) - return_values = [r.value for r in returns] - # Replace NameConstant nodes valued None for None. - for i, v in enumerate(return_values): - if isinstance(v, ast.NameConstant) and v.value is None: - return_values[i] = None - return any(return_values) - else: - return False - - @property - def first_line_ends_in_dot(self): - if self.doc: - return self.doc.split("\n")[0][-1] == "." - - @property - def deprecated(self): - return ".. deprecated:: " in (self.summary + self.extended_summary) - +class PandasDocstring(Docstring): @property def mentioned_private_classes(self): return [klass for klass in PRIVATE_CLASSES if klass in self.raw_doc] @@ -632,237 +196,66 @@ def validate_pep8(self): yield from application.guide.stats.statistics_for("") -def get_validation_data(doc): +def pandas_validate(func_name: str): """ - Validate the docstring. + Call the numpydoc validation, and add the errors specific to pandas. Parameters ---------- - doc : Docstring - A Docstring object with the given function name. + func_name : str + Name of the object of the docstring to validate. Returns ------- - tuple - errors : list of tuple - Errors occurred during validation. - warnings : list of tuple - Warnings occurred during validation. - examples_errs : str - Examples usage displayed along the error, otherwise empty string. - - Notes - ----- - The errors codes are defined as: - - First two characters: Section where the error happens: - * GL: Global (no section, like section ordering errors) - * SS: Short summary - * ES: Extended summary - * PR: Parameters - * RT: Returns - * YD: Yields - * RS: Raises - * WN: Warns - * SA: See Also - * NT: Notes - * RF: References - * EX: Examples - - Last two characters: Numeric error code inside the section - - For example, EX02 is the second codified error in the Examples section - (which in this case is assigned to examples that do not pass the tests). - - The error codes, their corresponding error messages, and the details on how - they are validated, are not documented more than in the source code of this - function. + dict + Information about the docstring and the errors found. """ + doc = PandasDocstring(func_name) + result = validate(func_name) - errs = [] - wrns = [] - if not doc.raw_doc: - errs.append(error("GL08")) - return errs, wrns, "" - - if doc.start_blank_lines != 1: - errs.append(error("GL01")) - if doc.end_blank_lines != 1: - errs.append(error("GL02")) - if doc.double_blank_lines: - errs.append(error("GL03")) mentioned_errs = doc.mentioned_private_classes if mentioned_errs: - errs.append(error("GL04", mentioned_private_classes=", ".join(mentioned_errs))) - for line in doc.raw_doc.splitlines(): - if re.match("^ *\t", line): - errs.append(error("GL05", line_with_tabs=line.lstrip())) - - unexpected_sections = [ - section for section in doc.section_titles if section not in ALLOWED_SECTIONS - ] - for section in unexpected_sections: - errs.append( - error("GL06", section=section, allowed_sections=", ".join(ALLOWED_SECTIONS)) + result["errors"].append( + pandas_error("GL04", mentioned_private_classes=", ".join(mentioned_errs)) ) - correct_order = [ - section for section in ALLOWED_SECTIONS if section in doc.section_titles - ] - if correct_order != doc.section_titles: - errs.append(error("GL07", correct_sections=", ".join(correct_order))) - - if doc.deprecated and not doc.extended_summary.startswith(".. deprecated:: "): - errs.append(error("GL09")) - - directives_without_two_colons = doc.directives_without_two_colons - if directives_without_two_colons: - errs.append(error("GL10", directives=directives_without_two_colons)) - - if not doc.summary: - errs.append(error("SS01")) - else: - if not doc.summary[0].isupper(): - errs.append(error("SS02")) - if doc.summary[-1] != ".": - errs.append(error("SS03")) - if doc.summary != doc.summary.lstrip(): - errs.append(error("SS04")) - elif doc.is_function_or_method and doc.summary.split(" ")[0][-1] == "s": - errs.append(error("SS05")) - if doc.num_summary_lines > 1: - errs.append(error("SS06")) - - if not doc.extended_summary: - wrns.append(("ES01", "No extended summary found")) - - # PR01: Parameters not documented - # PR02: Unknown parameters - # PR03: Wrong parameters order - errs += doc.parameter_mismatches - - for param in doc.doc_parameters: - if not param.startswith("*"): # Check can ignore var / kwargs - if not doc.parameter_type(param): - if ":" in param: - errs.append(error("PR10", param_name=param.split(":")[0])) - else: - errs.append(error("PR04", param_name=param)) - else: - if doc.parameter_type(param)[-1] == ".": - errs.append(error("PR05", param_name=param)) - common_type_errors = [ - ("integer", "int"), - ("boolean", "bool"), - ("string", "str"), - ] - for wrong_type, right_type in common_type_errors: - if wrong_type in doc.parameter_type(param): - errs.append( - error( - "PR06", - param_name=param, - right_type=right_type, - wrong_type=wrong_type, - ) - ) - if not doc.parameter_desc(param): - errs.append(error("PR07", param_name=param)) - else: - if not doc.parameter_desc(param)[0].isupper(): - errs.append(error("PR08", param_name=param)) - if doc.parameter_desc(param)[-1] != ".": - errs.append(error("PR09", param_name=param)) - - if doc.is_function_or_method: - if not doc.returns: - if doc.method_returns_something: - errs.append(error("RT01")) - else: - if len(doc.returns) == 1 and doc.returns[0].name: - errs.append(error("RT02")) - for name_or_type, type_, desc in doc.returns: - if not desc: - errs.append(error("RT03")) - else: - desc = " ".join(desc) - if not desc[0].isupper(): - errs.append(error("RT04")) - if not desc.endswith("."): - errs.append(error("RT05")) - - if not doc.yields and "yield" in doc.method_source: - errs.append(error("YD01")) - - if not doc.see_also: - wrns.append(error("SA01")) - else: + if doc.see_also: for rel_name, rel_desc in doc.see_also.items(): - if rel_desc: - if not rel_desc.endswith("."): - errs.append(error("SA02", reference_name=rel_name)) - if not rel_desc[0].isupper(): - errs.append(error("SA03", reference_name=rel_name)) - else: - errs.append(error("SA04", reference_name=rel_name)) if rel_name.startswith("pandas."): - errs.append( - error( + result["errors"].append( + pandas_error( "SA05", reference_name=rel_name, right_reference=rel_name[len("pandas.") :], ) ) - examples_errs = "" - if not doc.examples: - wrns.append(error("EX01")) - else: - examples_errs = doc.examples_errors - if examples_errs: - errs.append(error("EX02", doctest_log=examples_errs)) + result["examples_errs"] = "" + if doc.examples: + result["examples_errs"] = doc.examples_errors + if result["examples_errs"]: + result["errors"].append( + pandas_error("EX02", doctest_log=result["examples_errs"]) + ) for err in doc.validate_pep8(): - errs.append( - error( + result["errors"].append( + pandas_error( "EX03", error_code=err.error_code, error_message=err.message, - times_happening=f" ({err.count} times)" if err.count > 1 else "", + times_happening=" ({} times)".format(err.count) + if err.count > 1 + else "", ) ) examples_source_code = "".join(doc.examples_source_code) for wrong_import in ("numpy", "pandas"): - if f"import {wrong_import}" in examples_source_code: - errs.append(error("EX04", imported_library=wrong_import)) - return errs, wrns, examples_errs - - -def validate_one(func_name): - """ - Validate the docstring for the given func_name - - Parameters - ---------- - func_name : function - Function whose docstring will be evaluated (e.g. pandas.read_csv). + if "import {}".format(wrong_import) in examples_source_code: + result["errors"].append( + pandas_error("EX04", imported_library=wrong_import) + ) - Returns - ------- - dict - A dictionary containing all the information obtained from validating - the docstring. - """ - doc = Docstring(func_name) - errs, wrns, examples_errs = get_validation_data(doc) - return { - "type": doc.type, - "docstring": doc.clean_doc, - "deprecated": doc.deprecated, - "file": doc.source_file_name, - "file_line": doc.source_file_def_line, - "github_link": doc.github_url, - "errors": errs, - "warnings": wrns, - "examples_errors": examples_errs, - } + return result def validate_all(prefix, ignore_deprecated=False): @@ -887,16 +280,16 @@ def validate_all(prefix, ignore_deprecated=False): result = {} seen = {} - # functions from the API docs api_doc_fnames = os.path.join(BASE_PATH, "doc", "source", "reference", "*.rst") api_items = [] for api_doc_fname in glob.glob(api_doc_fnames): with open(api_doc_fname) as f: api_items += list(get_api_items(f)) + for func_name, func_obj, section, subsection in api_items: if prefix and not func_name.startswith(prefix): continue - doc_info = validate_one(func_name) + doc_info = pandas_validate(func_name) if ignore_deprecated and doc_info["deprecated"]: continue result[func_name] = doc_info @@ -914,100 +307,86 @@ def validate_all(prefix, ignore_deprecated=False): seen[shared_code_key] = func_name - # functions from introspecting Series and DataFrame - api_item_names = set(list(zip(*api_items))[0]) - for class_ in (pandas.Series, pandas.DataFrame): - for member in inspect.getmembers(class_): - func_name = f"pandas.{class_.__name__}.{member[0]}" - if not member[0].startswith("_") and func_name not in api_item_names: - if prefix and not func_name.startswith(prefix): - continue - doc_info = validate_one(func_name) - if ignore_deprecated and doc_info["deprecated"]: - continue - result[func_name] = doc_info - result[func_name]["in_api"] = False - return result -def main(func_name, prefix, errors, output_format, ignore_deprecated): +def print_validate_all_results( + prefix: str, + errors: Optional[List[str]], + output_format: str, + ignore_deprecated: bool, +): + if output_format not in ("default", "json", "actions"): + raise ValueError(f'Unknown output_format "{output_format}"') + + result = validate_all(prefix, ignore_deprecated) + + if output_format == "json": + sys.stdout.write(json.dumps(result)) + return 0 + + prefix = "##[error]" if output_format == "actions" else "" + exit_status = 0 + for name, res in result.items(): + for err_code, err_desc in res["errors"]: + if errors and err_code not in errors: + continue + sys.stdout.write( + f'{prefix}{res["file"]}:{res["file_line"]}:' + f"{err_code}:{name}:{err_desc}\n" + ) + exit_status += 1 + + return exit_status + + +def print_validate_one_results(func_name: str): def header(title, width=80, char="#"): full_line = char * width side_len = (width - len(title) - 2) // 2 adj = "" if len(title) % 2 == 0 else " " - title_line = f"{char * side_len} {title}{adj} {char * side_len}" + title_line = "{side} {title}{adj} {side}".format( + side=char * side_len, title=title, adj=adj + ) return f"\n{full_line}\n{title_line}\n{full_line}\n\n" - exit_status = 0 - if func_name is None: - result = validate_all(prefix, ignore_deprecated) - - if output_format == "json": - output = json.dumps(result) - else: - if output_format == "default": - output_format = "{text}\n" - elif output_format == "azure": - output_format = ( - "##vso[task.logissue type=error;" - "sourcepath={path};" - "linenumber={row};" - "code={code};" - "]{text}\n" - ) - else: - raise ValueError(f'Unknown output_format "{output_format}"') - - output = "" - for name, res in result.items(): - for err_code, err_desc in res["errors"]: - # The script would be faster if instead of filtering the - # errors after validating them, it didn't validate them - # initially. But that would complicate the code too much - if errors and err_code not in errors: - continue - exit_status += 1 - output += output_format.format( - path=res["file"], - row=res["file_line"], - code=err_code, - text=f"{name}: {err_desc}", - ) + result = pandas_validate(func_name) - sys.stdout.write(output) + sys.stderr.write(header(f"Docstring ({func_name})")) + sys.stderr.write(f"{result['docstring']}\n") - else: - result = validate_one(func_name) - sys.stderr.write(header(f"Docstring ({func_name})")) - sys.stderr.write(f"{result['docstring']}\n") - sys.stderr.write(header("Validation")) - if result["errors"]: - sys.stderr.write(f"{len(result['errors'])} Errors found:\n") - for err_code, err_desc in result["errors"]: - # Failing examples are printed at the end - if err_code == "EX02": - sys.stderr.write("\tExamples do not pass tests\n") - continue - sys.stderr.write(f"\t{err_desc}\n") - if result["warnings"]: - sys.stderr.write(f"{len(result['warnings'])} Warnings found:\n") - for wrn_code, wrn_desc in result["warnings"]: - sys.stderr.write(f"\t{wrn_desc}\n") - - if not result["errors"]: - sys.stderr.write(f'Docstring for "{func_name}" correct. :)\n') - - if result["examples_errors"]: - sys.stderr.write(header("Doctests")) - sys.stderr.write(result["examples_errors"]) + sys.stderr.write(header("Validation")) + if result["errors"]: + sys.stderr.write(f'{len(result["errors"])} Errors found:\n') + for err_code, err_desc in result["errors"]: + if err_code == "EX02": # Failing examples are printed at the end + sys.stderr.write("\tExamples do not pass tests\n") + continue + sys.stderr.write(f"\t{err_desc}\n") + elif result["errors"]: + sys.stderr.write(f'Docstring for "{func_name}" correct. :)\n') - return exit_status + if result["examples_errs"]: + sys.stderr.write(header("Doctests")) + sys.stderr.write(result["examples_errs"]) + + +def main(func_name, prefix, errors, output_format, ignore_deprecated): + """ + Main entry point. Call the validation for one or for all docstrings. + """ + if func_name is None: + return print_validate_all_results( + prefix, errors, output_format, ignore_deprecated + ) + else: + print_validate_one_results(func_name) + return 0 if __name__ == "__main__": - format_opts = "default", "json", "azure" + format_opts = "default", "json", "actions" func_help = ( "function or method to validate (e.g. pandas.DataFrame.head) " "if not provided, all docstrings are validated and returned " @@ -1020,16 +399,16 @@ def header(title, width=80, char="#"): default="default", choices=format_opts, help="format of the output when validating " - "multiple docstrings (ignored when validating one)." - f"It can be {str(format_opts)[1:-1]}", + "multiple docstrings (ignored when validating one). " + "It can be {str(format_opts)[1:-1]}", ) argparser.add_argument( "--prefix", default=None, help="pattern for the " "docstring names, in order to decide which ones " - 'will be validated. A prefix "pandas.Series.str.' - "will make the script validate all the docstrings" + 'will be validated. A prefix "pandas.Series.str."' + "will make the script validate all the docstrings " "of methods starting by this pattern. It is " "ignored if parameter function is provided", )