diff --git a/numpydoc/tests/test_validate.py b/numpydoc/tests/test_validate.py new file mode 100644 index 00000000..3f162139 --- /dev/null +++ b/numpydoc/tests/test_validate.py @@ -0,0 +1,1290 @@ +import pytest +import numpydoc.validate +import numpydoc.tests + + +validate_one = numpydoc.validate.validate + + +class GoodDocStrings: + """ + Collection of good doc strings. + + This class contains a lot of docstrings that should pass the validation + script without any errors. + + See Also + -------- + AnotherClass : With its description. + + Examples + -------- + >>> result = 1 + 1 + """ + def one_liner(self): + """Allow one liner docstrings (including quotes).""" + # This should fail, but not because of the position of the quotes + pass + + def plot(self, kind, color="blue", **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Parameters + ---------- + kind : str + Kind of matplotlib plot. + color : str, default 'blue' + Color name or rgb code. + **kwargs + These parameters will be passed to the matplotlib plotting + function. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + def swap(self, arr, i, j, *args, **kwargs): + """ + Swap two indicies on an array. + + The extended summary can be multiple paragraphs, but just one + is enough to pass the validation. + + Parameters + ---------- + arr : list + The list having indexes swapped. + i, j : int + The indexes being swapped. + *args, **kwargs + Extraneous parameters are being permitted. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + def sample(self): + """ + Generate and return a random number. + + The value is sampled from a continuous uniform distribution between + 0 and 1. + + Returns + ------- + float + Random number generated. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + def random_letters(self): + """ + Generate and return a sequence of random letters. + + The length of the returned string is also random, and is also + returned. + + Returns + ------- + length : int + Length of the returned string. + letters : str + String of random letters. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + def sample_values(self): + """ + Generate an infinite sequence of random numbers. + + The values are sampled from a continuous uniform distribution between + 0 and 1. + + Yields + ------ + float + Random number generated. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + def head(self): + """ + Return the first 5 elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Returns + ------- + int + Subset of the original series with the 5 first values. + + See Also + -------- + Series.tail : Return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n. + + Examples + -------- + >>> 1 + 1 + 2 + """ + return 1 + + def head1(self, n=5): + """ + Return the first elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Parameters + ---------- + n : int + Number of values to return. + + Returns + ------- + int + Subset of the original series with the n first values. + + See Also + -------- + tail : Return the last n elements of the Series. + + Examples + -------- + >>> s = 10 + >>> s + 10 + + With the `n` parameter, we can change the number of returned rows: + + >>> s + 1 + 11 + """ + return 1 + + def summary_starts_with_number(self, n=5): + """ + 2nd rule of summaries should allow this. + + 3 Starting the summary with a number instead of a capital letter. + Also in parameters, returns, see also... + + Parameters + ---------- + n : int + 4 Number of values to return. + + Returns + ------- + int + 5 Subset of the original series with the n first values. + + See Also + -------- + tail : 6 Return the last n elements of the Series. + + Examples + -------- + >>> s = 10 + >>> s + 10 + + 7 With the `n` parameter, we can change the number of returned rows: + + >>> s + 1 + 11 + """ + return 1 + + def contains(self, pat, case=True, na=float('NaN')): + """ + Return whether each value contains `pat`. + + In this case, we are illustrating how to use sections, even + if the example is simple enough and does not require them. + + Parameters + ---------- + pat : str + Pattern to check for within each element. + case : bool, default True + Whether check should be done with case sensitivity. + na : object, default np.nan + Fill value for missing data. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> s = 25 + >>> s + 25 + + **Case sensitivity** + + With `case_sensitive` set to `False` we can match `a` with both + `a` and `A`: + + >>> s + 1 + 26 + + **Missing values** + + We can fill missing values in the output using the `na` parameter: + + >>> s * 2 + 50 + """ + pass + + def mode(self, axis, numeric_only): + """ + Ensure reST directives don't affect checks for leading periods. + + The extended summary can be multiple paragraphs, but just one + is enough to pass the validation. + + Parameters + ---------- + axis : str + Sentence ending in period, followed by single directive. + + .. versionchanged:: 0.1.2 + + numeric_only : bool + Sentence ending in period, followed by multiple directives. + + .. versionadded:: 0.1.2 + .. deprecated:: 0.00.0 + A multiline description, + which spans another line. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + def good_imports(self): + """ + Ensure import other than numpy and pandas are fine. + + The extended summary can be multiple paragraphs, but just one + is enough to pass the validation. + + See Also + -------- + related : Something related. + + Examples + -------- + This example does not import pandas or import numpy. + >>> import datetime + >>> datetime.MAXYEAR + 9999 + """ + pass + + def no_returns(self): + """ + Say hello and have no returns. + + The extended summary can be multiple paragraphs, but just one + is enough to pass the validation. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + def empty_returns(self): + """ + Say hello and always return None. + + Since this function never returns a value, this + docstring doesn't need a return section. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + + def say_hello(): + return "Hello World!" + + say_hello() + if True: + return + else: + return None + + def multiple_variables_on_one_line(self, matrix, a, b, i, j): + """ + Swap two values in a matrix. + + The extended summary can be multiple paragraphs, but just one + is enough to pass the validation. + + Parameters + ---------- + matrix : list of list + A double list that represents a matrix. + a, b : int + The indicies of the first value. + i, j : int + The indicies of the second value. + + See Also + -------- + related : Something related. + + Examples + -------- + >>> result = 1 + 1 + """ + pass + + +class BadGenericDocStrings: + """Everything here has a bad docstring + """ + + def func(self): + + """Some function. + + With several mistakes in the docstring. + + It has a blank like after the signature `def func():`. + + The text 'Some function' should go in the line after the + opening quotes of the docstring, not in the same line. + + There is a blank line between the docstring and the first line + of code `foo = 1`. + + The closing quotes should be in the next line, not in this one.""" + + foo = 1 + bar = 2 + return foo + bar + + def astype(self, dtype): + """ + Casts Series type. + + Verb in third-person of the present simple, should be infinitive. + """ + pass + + def astype1(self, dtype): + """ + Method to cast Series type. + + Does not start with verb. + """ + pass + + def astype2(self, dtype): + """ + Cast Series type + + Missing dot at the end. + """ + pass + + def astype3(self, dtype): + """ + Cast Series type from its current type to the new type defined in + the parameter dtype. + + Summary is too verbose and doesn't fit in a single line. + """ + pass + + def two_linebreaks_between_sections(self, foo): + """ + Test linebreaks message GL03. + + Note 2 blank lines before parameters section. + + + Parameters + ---------- + foo : str + Description of foo parameter. + """ + pass + + def linebreak_at_end_of_docstring(self, foo): + """ + Test linebreaks message GL03. + + Note extra blank line at end of docstring. + + Parameters + ---------- + foo : str + Description of foo parameter. + + """ + pass + + def plot(self, kind, **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Note the blank line between the parameters title and the first + parameter. Also, note that after the name of the parameter `kind` + and before the colon, a space is missing. + + Also, note that the parameter descriptions do not start with a + capital letter, and do not finish with a dot. + + Finally, the `**kwargs` parameter is missing. + + Parameters + ---------- + + kind: str + kind of matplotlib plot + """ + pass + + def unknown_section(self): + """ + This section has an unknown section title. + + Unknown Section + --------------- + This should raise an error in the validation. + """ + + def sections_in_wrong_order(self): + """ + This docstring has the sections in the wrong order. + + Parameters + ---------- + name : str + This section is in the right position. + + Examples + -------- + >>> print('So far Examples is good, as it goes before Parameters') + So far Examples is good, as it goes before Parameters + + See Also + -------- + function : This should generate an error, as See Also needs to go + before Examples. + """ + + def deprecation_in_wrong_order(self): + """ + This docstring has the deprecation warning in the wrong order. + + This is the extended summary. The correct order should be + summary, deprecation warning, extended summary. + + .. deprecated:: 1.0 + This should generate an error as it needs to go before + extended summary. + """ + + def method_wo_docstrings(self): + pass + + def directives_without_two_colons(self, first, second): + """ + Ensure reST directives have trailing colons. + + Parameters + ---------- + first : str + Sentence ending in period, followed by single directive w/o colons. + + .. versionchanged 0.1.2 + + second : bool + Sentence ending in period, followed by multiple directives w/o + colons. + + .. versionadded 0.1.2 + .. deprecated 0.00.0 + + """ + pass + + +class BadSummaries: + def no_summary(self): + """ + Returns + ------- + int + Always one. + """ + + def heading_whitespaces(self): + """ + Summary with heading whitespaces. + + Returns + ------- + int + Always one. + """ + + def wrong_line(self): + """Quotes are on the wrong line. + + Both opening and closing.""" + pass + + def no_punctuation(self): + """ + Has the right line but forgets punctuation + """ + pass + + def no_capitalization(self): + """ + provides a lowercase summary. + """ + pass + + def no_infinitive(self): + """ + Started with a verb that is not infinitive. + """ + + def multi_line(self): + """ + Extends beyond one line + which is not correct. + """ + + def two_paragraph_multi_line(self): + """ + Extends beyond one line + which is not correct. + + Extends beyond one line, which in itself is correct but the + previous short summary should still be an issue. + """ + + +class BadParameters: + """ + Everything here has a problem with its Parameters section. + """ + def no_type(self, value): + """ + Lacks the type. + + Parameters + ---------- + value + A parameter without type. + """ + + def type_with_period(self, value): + """ + Has period after type. + + Parameters + ---------- + value : str. + A parameter type should not finish with period. + """ + + def no_description(self, value): + """ + Lacks the description. + + Parameters + ---------- + value : str + """ + + def missing_params(self, kind, **kwargs): + """ + Lacks kwargs in Parameters. + + Parameters + ---------- + kind : str + Foo bar baz. + """ + + def bad_colon_spacing(self, kind): + """ + Has bad spacing in the type line. + + Parameters + ---------- + kind: str + Needs a space after kind. + """ + + def no_description_period(self, kind): + """ + Forgets to add a period to the description. + + Parameters + ---------- + kind : str + Doesn't end with a dot + """ + + def no_description_period_with_directive(self, kind): + """ + Forgets to add a period, and also includes a directive. + + Parameters + ---------- + kind : str + Doesn't end with a dot + + .. versionadded:: 0.00.0 + """ + + def no_description_period_with_directives(self, kind): + """ + Forgets to add a period, and also includes multiple directives. + + Parameters + ---------- + kind : str + Doesn't end with a dot + + .. versionchanged:: 0.00.0 + .. deprecated:: 0.00.0 + """ + + def parameter_capitalization(self, kind): + """ + Forgets to capitalize the description. + + Parameters + ---------- + kind : str + this is not capitalized. + """ + + def blank_lines(self, kind): + """ + Adds a blank line after the section header. + + Parameters + ---------- + + kind : str + Foo bar baz. + """ + pass + + def integer_parameter(self, kind): + """ + Uses integer instead of int. + + Parameters + ---------- + kind : integer + Foo bar baz. + """ + pass + + def string_parameter(self, kind): + """ + Uses string instead of str. + + Parameters + ---------- + kind : string + Foo bar baz. + """ + pass + + def boolean_parameter(self, kind): + """ + Uses boolean instead of bool. + + Parameters + ---------- + kind : boolean + Foo bar baz. + """ + pass + + def list_incorrect_parameter_type(self, kind): + """ + Uses list of boolean instead of list of bool. + + Parameters + ---------- + kind : list of boolean, integer, float or string + Foo bar baz. + """ + pass + + def bad_parameter_spacing(self, a, b): + """ + The parameters on the same line have an extra space between them. + + Parameters + ---------- + a, b : int + Foo bar baz. + """ + pass + + +class BadReturns: + def return_not_documented(self): + """ + Lacks section for Returns + """ + return "Hello world!" + + def yield_not_documented(self): + """ + Lacks section for Yields + """ + yield "Hello world!" + + def no_type(self): + """ + Returns documented but without type. + + Returns + ------- + Some value. + """ + return "Hello world!" + + def no_description(self): + """ + Provides type but no descrption. + + Returns + ------- + str + """ + return "Hello world!" + + def no_punctuation(self): + """ + Provides type and description but no period. + + Returns + ------- + str + A nice greeting + """ + return "Hello world!" + + def named_single_return(self): + """ + Provides name but returns only one value. + + Returns + ------- + s : str + A nice greeting. + """ + return "Hello world!" + + def no_capitalization(self): + """ + Forgets capitalization in return values description. + + Returns + ------- + foo : str + The first returned string. + bar : str + the second returned string. + """ + return "Hello", "World!" + + def no_period_multi(self): + """ + Forgets period in return values description. + + Returns + ------- + foo : str + The first returned string + bar : str + The second returned string. + """ + return "Hello", "World!" + + +class BadSeeAlso: + def no_desc(self): + """ + Return the first 5 elements of the Series. + + See Also + -------- + Series.tail + """ + pass + + def desc_no_period(self): + """ + Return the first 5 elements of the Series. + + See Also + -------- + Series.tail : Return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n + """ + pass + + def desc_first_letter_lowercase(self): + """ + Return the first 5 elements of the Series. + + See Also + -------- + Series.tail : return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n. + """ + pass + + def prefix_pandas(self): + """ + Have `pandas` prefix in See Also section. + + See Also + -------- + pandas.Series.rename : Alter Series index labels or name. + DataFrame.head : The first `n` rows of the caller object. + """ + pass + + +class BadExamples: + def missing_whitespace_around_arithmetic_operator(self): + """ + Examples + -------- + >>> 2+5 + 7 + """ + pass + + def indentation_is_not_a_multiple_of_four(self): + """ + Examples + -------- + >>> if 2 + 5: + ... pass + """ + pass + + def missing_whitespace_after_comma(self): + """ + Examples + -------- + >>> import datetime + >>> value = datetime.date(2019,1,1) + """ + pass + + +class TestValidator: + def _import_path(self, klass=None, func=None): + """ + Build the required import path for tests in this module. + + Parameters + ---------- + klass : str + Class name of object in module. + func : str + Function name of object in module. + + Returns + ------- + str + Import path of specified object in this module + """ + base_path = "numpydoc.tests.test_validate" + + if klass: + base_path = ".".join([base_path, klass]) + + if func: + base_path = ".".join([base_path, func]) + + return base_path + + def test_one_liner(self, capsys): + result = validate_one(self._import_path(klass="GoodDocStrings", func='one_liner')) + errors = " ".join(err[1] for err in result["errors"]) + assert 'should start in the line immediately after the opening quotes' not in errors + assert 'should be placed in the line after the last text' not in errors + + def test_good_class(self, capsys): + errors = validate_one(self._import_path(klass="GoodDocStrings"))["errors"] + assert isinstance(errors, list) + assert not errors + + @pytest.mark.parametrize( + "func", + [ + "plot", + "swap", + "sample", + "random_letters", + "sample_values", + "head", + "head1", + "summary_starts_with_number", + "contains", + "mode", + "good_imports", + "no_returns", + "empty_returns", + "multiple_variables_on_one_line", + ], + ) + def test_good_functions(self, capsys, func): + errors = validate_one(self._import_path(klass="GoodDocStrings", func=func))[ + "errors" + ] + assert isinstance(errors, list) + assert not errors + + def test_bad_class(self, capsys): + errors = validate_one(self._import_path(klass="BadGenericDocStrings"))["errors"] + assert isinstance(errors, list) + assert errors + + @pytest.mark.parametrize( + "func", + [ + "func", + "astype", + "astype1", + "astype2", + "astype3", + "plot", + "directives_without_two_colons", + ], + ) + def test_bad_generic_functions(self, capsys, func): + errors = validate_one( + self._import_path(klass="BadGenericDocStrings", func=func) # noqa:F821 + )["errors"] + assert isinstance(errors, list) + assert errors + + @pytest.mark.parametrize( + "klass,func,msgs", + [ + # See Also tests + ( + "BadGenericDocStrings", + "unknown_section", + ('Found unknown section "Unknown Section".',), + ), + ( + "BadGenericDocStrings", + "sections_in_wrong_order", + ( + "Sections are in the wrong order. Correct order is: Parameters, " + "See Also, Examples", + ), + ), + ( + "BadGenericDocStrings", + "deprecation_in_wrong_order", + ("Deprecation warning should precede extended summary",), + ), + ( + "BadGenericDocStrings", + "directives_without_two_colons", + ( + "reST directives ['versionchanged', 'versionadded', " + "'deprecated'] must be followed by two colons", + ), + ), + ( + "BadSeeAlso", + "no_desc", + ('Missing description for See Also "Series.tail" reference',), + ), + ( + "BadSeeAlso", + "desc_no_period", + ('Missing period at end of description for See Also "Series.iloc"',), + ), + ( + "BadSeeAlso", + "desc_first_letter_lowercase", + ('should be capitalized for See Also "Series.tail"',), + ), + # Summary tests + ( + "BadSummaries", + "no_summary", + ("No summary found",), + ), + ( + "BadSummaries", + "heading_whitespaces", + ("Summary contains heading whitespaces",), + ), + ( + "BadSummaries", + "wrong_line", + ("should start in the line immediately after the opening quotes", + "should be placed in the line after the last text"), + ), + ("BadSummaries", "no_punctuation", ("Summary does not end with a period",)), + ( + "BadSummaries", + "no_capitalization", + ("Summary does not start with a capital letter",), + ), + ( + "BadSummaries", + "no_capitalization", + ("Summary must start with infinitive verb",), + ), + ("BadSummaries", "multi_line", ("Summary should fit in a single line",)), + ( + "BadSummaries", + "two_paragraph_multi_line", + ("Summary should fit in a single line",), + ), + # Parameters tests + ( + "BadParameters", + "no_type", + ('Parameter "value" has no type',), + ), + ( + "BadParameters", + "type_with_period", + ('Parameter "value" type should not finish with "."',), + ), + ( + "BadParameters", + "no_description", + ('Parameter "value" has no description',), + ), + ( + "BadParameters", + "missing_params", + ("Parameters {'**kwargs'} not documented",), + ), + ( + "BadParameters", + "bad_colon_spacing", + ( + 'Parameter "kind" requires a space before the colon ' + "separating the parameter name and type", + ), + ), + ( + "BadParameters", + "no_description_period", + ('Parameter "kind" description should finish with "."',), + ), + ( + "BadParameters", + "no_description_period_with_directive", + ('Parameter "kind" description should finish with "."',), + ), + ( + "BadParameters", + "parameter_capitalization", + ('Parameter "kind" description should start with a capital letter',), + ), + ( + "BadParameters", + "integer_parameter", + ('Parameter "kind" type should use "int" instead of "integer"',), + ), + ( + "BadParameters", + "string_parameter", + ('Parameter "kind" type should use "str" instead of "string"',), + ), + ( + "BadParameters", + "boolean_parameter", + ('Parameter "kind" type should use "bool" instead of "boolean"',), + ), + ( + "BadParameters", + "list_incorrect_parameter_type", + ('Parameter "kind" type should use "bool" instead of "boolean"',), + ), + ( + "BadParameters", + "list_incorrect_parameter_type", + ('Parameter "kind" type should use "int" instead of "integer"',), + ), + ( + "BadParameters", + "list_incorrect_parameter_type", + ('Parameter "kind" type should use "str" instead of "string"',), + ), + ( + "BadParameters", + "bad_parameter_spacing", + ("Parameters {'b'} not documented", "Unknown parameters {' b'}"), + ), + pytest.param( + "BadParameters", + "blank_lines", + ("No error yet?",), + marks=pytest.mark.xfail, + ), + # Returns tests + ("BadReturns", "return_not_documented", ("No Returns section found",)), + ("BadReturns", "yield_not_documented", ("No Yields section found",)), + pytest.param("BadReturns", "no_type", ("foo",), marks=pytest.mark.xfail), + ("BadReturns", "no_description", ("Return value has no description",)), + ( + "BadReturns", + "no_punctuation", + ('Return value description should finish with "."',), + ), + ( + "BadReturns", + "named_single_return", + ( + "The first line of the Returns section should contain only the " + "type, unless multiple values are being returned", + ), + ), + ( + "BadReturns", + "no_capitalization", + ("Return value description should start with a capital letter",), + ), + ( + "BadReturns", + "no_period_multi", + ('Return value description should finish with "."',), + ), + ( + "BadGenericDocStrings", + "method_wo_docstrings", + ("The object does not have a docstring",), + ), + ( + "BadGenericDocStrings", + "two_linebreaks_between_sections", + ( + "Double line break found; please use only one blank line to " + "separate sections or paragraphs, and do not leave blank lines " + "at the end of docstrings", + ), + ), + ( + "BadGenericDocStrings", + "linebreak_at_end_of_docstring", + ( + "Double line break found; please use only one blank line to " + "separate sections or paragraphs, and do not leave blank lines " + "at the end of docstrings", + ), + ), + ], + ) + def test_bad_docstrings(self, capsys, klass, func, msgs): + result = validate_one(self._import_path(klass=klass, func=func)) + for msg in msgs: + assert msg in " ".join(err[1] for err in result["errors"]) + + +class TestDocstringClass: + @pytest.mark.parametrize("invalid_name", ["unknown_mod", "unknown_mod.MyClass"]) + def test_raises_for_invalid_module_name(self, invalid_name): + msg = 'No module can be imported from "{}"'.format(invalid_name) + with pytest.raises(ImportError, match=msg): + numpydoc.validate.Docstring(invalid_name) + + @pytest.mark.parametrize( + "invalid_name", ["datetime.BadClassName", "datetime.bad_method_name"] + ) + def test_raises_for_invalid_attribute_name(self, invalid_name): + name_components = invalid_name.split(".") + obj_name, invalid_attr_name = name_components[-2], name_components[-1] + msg = "'{}' has no attribute '{}'".format(obj_name, invalid_attr_name) + with pytest.raises(AttributeError, match=msg): + numpydoc.validate.Docstring(invalid_name) diff --git a/numpydoc/validate.py b/numpydoc/validate.py new file mode 100644 index 00000000..f268d8b8 --- /dev/null +++ b/numpydoc/validate.py @@ -0,0 +1,593 @@ +#!/usr/bin/env python +""" +Analyze docstrings to detect errors. + +Call ``validate(object_name_to_validate)`` to get a dictionary +with all the detected errors. +""" +import ast +import collections +import importlib +import inspect +import pydoc +import re +import textwrap +from .docscrape import NumpyDocString + + +DIRECTIVES = ["versionadded", "versionchanged", "deprecated"] +DIRECTIVE_PATTERN = re.compile(r"^\s*\.\. ({})(?!::)".format('|'.join(DIRECTIVES)), + re.I | re.M) +ALLOWED_SECTIONS = [ + "Parameters", + "Attributes", + "Methods", + "Returns", + "Yields", + "Other Parameters", + "Raises", + "Warns", + "See Also", + "Notes", + "References", + "Examples", +] +ERROR_MSGS = { + "GL01": "Docstring text (summary) should start in the line immediately " + "after the opening quotes (not in the same line, or leaving a " + "blank line in between)", + "GL02": "Closing quotes should be placed in the line after the last text " + "in the docstring (do not close the quotes in the same line as " + "the text, or leave a blank line between the last text and the " + "quotes)", + "GL03": "Double line break found; please use only one blank line to " + "separate sections or paragraphs, and do not leave blank lines " + "at the end of docstrings", + "GL05": 'Tabs found at the start of line "{line_with_tabs}", please use ' + "whitespace only", + "GL06": 'Found unknown section "{section}". Allowed sections are: ' + "{allowed_sections}", + "GL07": "Sections are in the wrong order. Correct order is: {correct_sections}", + "GL08": "The object does not have a docstring", + "GL09": "Deprecation warning should precede extended summary", + "GL10": "reST directives {directives} must be followed by two colons", + "SS01": "No summary found (a short summary in a single line should be " + "present at the beginning of the docstring)", + "SS02": "Summary does not start with a capital letter", + "SS03": "Summary does not end with a period", + "SS04": "Summary contains heading whitespaces", + "SS05": "Summary must start with infinitive verb, not third person " + '(e.g. use "Generate" instead of "Generates")', + "SS06": "Summary should fit in a single line", + "ES01": "No extended summary found", + "PR01": "Parameters {missing_params} not documented", + "PR02": "Unknown parameters {unknown_params}", + "PR03": "Wrong parameters order. Actual: {actual_params}. " + "Documented: {documented_params}", + "PR04": 'Parameter "{param_name}" has no type', + "PR05": 'Parameter "{param_name}" type should not finish with "."', + "PR06": 'Parameter "{param_name}" type should use "{right_type}" instead ' + 'of "{wrong_type}"', + "PR07": 'Parameter "{param_name}" has no description', + "PR08": 'Parameter "{param_name}" description should start with a ' + "capital letter", + "PR09": 'Parameter "{param_name}" description should finish with "."', + "PR10": 'Parameter "{param_name}" requires a space before the colon ' + "separating the parameter name and type", + "RT01": "No Returns section found", + "RT02": "The first line of the Returns section should contain only the " + "type, unless multiple values are being returned", + "RT03": "Return value has no description", + "RT04": "Return value description should start with a capital letter", + "RT05": 'Return value description should finish with "."', + "YD01": "No Yields section found", + "SA01": "See Also section not found", + "SA02": "Missing period at end of description for See Also " + '"{reference_name}" reference', + "SA03": "Description should be capitalized for See Also " + '"{reference_name}" reference', + "SA04": 'Missing description for See Also "{reference_name}" reference', + "EX01": "No examples section found", +} + + +def error(code, **kwargs): + """ + Return a tuple with the error code and the message with variables replaced. + + This is syntactic sugar so instead of: + - `('PR02', ERROR_MSGS['PR02'].format(doctest_log=log))` + + We can simply use: + - `error('PR02', doctest_log=log)` + + Parameters + ---------- + code : str + Error code. + **kwargs + Values for the variables in the error messages + + Returns + ------- + code : str + Error code. + message : str + Error message with variables replaced. + """ + return (code, ERROR_MSGS[code].format(**kwargs)) + + +class Docstring: + # TODO Can all this class be merged into NumpyDocString? + def __init__(self, name): + self.name = name + obj = self._load_obj(name) + self.obj = obj + self.code_obj = inspect.unwrap(obj) + self.raw_doc = obj.__doc__ or "" + self.clean_doc = pydoc.getdoc(obj) + self.doc = NumpyDocString(self.clean_doc) + + @staticmethod + def _load_obj(name): + """ + Import Python object from its name as string. + + Parameters + ---------- + name : str + Object name to import (e.g. pandas.Series.str.upper) + + Returns + ------- + object + Python object that can be a class, method, function... + + Examples + -------- + >>> Docstring._load_obj('datetime.datetime') + + """ + for maxsplit in range(1, name.count(".") + 1): + module, *func_parts = name.rsplit(".", maxsplit) + try: + obj = importlib.import_module(module) + except ImportError: + pass + else: + break + else: + raise ImportError("No module can be imported " 'from "{}"'.format(name)) + + for part in func_parts: + obj = getattr(obj, part) + return obj + + @property + def type(self): + return type(self.obj).__name__ + + @property + def is_function_or_method(self): + return inspect.isfunction(self.obj) + + @property + def source_file_name(self): + """ + File name where the object is implemented (e.g. pandas/core/frame.py). + """ + try: + fname = inspect.getsourcefile(self.code_obj) + except TypeError: + # In some cases the object is something complex like a cython + # object that can't be easily introspected. An it's better to + # return the source code file of the object as None, than crash + pass + else: + return fname + + @property + def source_file_def_line(self): + """ + Number of line where the object is defined in its file. + """ + try: + return inspect.getsourcelines(self.code_obj)[-1] + except (OSError, TypeError): + # In some cases the object is something complex like a cython + # object that can't be easily introspected. An it's better to + # return the line number as None, than crash + pass + + @property + def start_blank_lines(self): + i = None + if self.raw_doc: + for i, row in enumerate(self.raw_doc.split("\n")): + if row.strip(): + break + return i + + @property + def end_blank_lines(self): + i = None + if self.raw_doc: + for i, row in enumerate(reversed(self.raw_doc.split("\n"))): + if row.strip(): + break + return i + + @property + def double_blank_lines(self): + prev = True + for row in self.raw_doc.split("\n"): + if not prev and not row.strip(): + return True + prev = row.strip() + return False + + @property + def section_titles(self): + sections = [] + self.doc._doc.reset() + while not self.doc._doc.eof(): + content = self.doc._read_to_next_section() + if ( + len(content) > 1 + and len(content[0]) == len(content[1]) + and set(content[1]) == {"-"} + ): + sections.append(content[0]) + return sections + + @property + def summary(self): + return " ".join(self.doc["Summary"]) + + @property + def num_summary_lines(self): + return len(self.doc["Summary"]) + + @property + def extended_summary(self): + if not self.doc["Extended Summary"] and len(self.doc["Summary"]) > 1: + return " ".join(self.doc["Summary"]) + return " ".join(self.doc["Extended Summary"]) + + @property + def doc_parameters(self): + parameters = collections.OrderedDict() + for names, type_, desc in self.doc["Parameters"]: + for name in names.split(", "): + parameters[name] = (type_, "".join(desc)) + return parameters + + @property + def signature_parameters(self): + def add_stars(param_name, info): + """ + Add stars to *args and **kwargs parameters + """ + if info.kind == inspect.Parameter.VAR_POSITIONAL: + return "*{}".format(param_name) + elif info.kind == inspect.Parameter.VAR_KEYWORD: + return "**{}".format(param_name) + else: + return param_name + + if inspect.isclass(self.obj): + if hasattr(self.obj, "_accessors") and ( + self.name.split(".")[-1] in self.obj._accessors + ): + # accessor classes have a signature but don't want to show this + return tuple() + try: + sig = inspect.signature(self.obj) + except (TypeError, ValueError): + # Some objects, mainly in C extensions do not support introspection + # of the signature + return tuple() + + params = tuple( + add_stars(parameter, sig.parameters[parameter]) + for parameter in sig.parameters + ) + if params and params[0] in ("self", "cls"): + return params[1:] + return params + + @property + def parameter_mismatches(self): + errs = [] + signature_params = self.signature_parameters + doc_params = tuple(self.doc_parameters) + missing = set(signature_params) - set(doc_params) + if missing: + errs.append(error("PR01", missing_params=str(missing))) + extra = set(doc_params) - set(signature_params) + if extra: + errs.append(error("PR02", unknown_params=str(extra))) + if ( + not missing + and not extra + and signature_params != doc_params + and not (not signature_params and not doc_params) + ): + errs.append( + error( + "PR03", actual_params=signature_params, documented_params=doc_params + ) + ) + + return errs + + @property + def directives_without_two_colons(self): + return DIRECTIVE_PATTERN.findall(self.raw_doc) + + def parameter_type(self, param): + return self.doc_parameters[param][0] + + def parameter_desc(self, param): + desc = self.doc_parameters[param][1] + # Find and strip out any sphinx directives + for directive in DIRECTIVES: + full_directive = ".. {}".format(directive) + if full_directive in desc: + # Only retain any description before the directive + desc = desc[: desc.index(full_directive)] + return desc + + @property + def see_also(self): + result = collections.OrderedDict() + for funcs, desc in self.doc["See Also"]: + for func, _ in funcs: + result[func] = "".join(desc) + + return result + + @property + def examples(self): + return self.doc["Examples"] + + @property + def returns(self): + return self.doc["Returns"] + + @property + def yields(self): + return self.doc["Yields"] + + @property + def method_source(self): + try: + source = inspect.getsource(self.obj) + except TypeError: + return "" + return textwrap.dedent(source) + + @property + def method_returns_something(self): + """ + Check if the docstrings method can return something. + + Bare returns, returns valued None and returns from nested functions are + disconsidered. + + Returns + ------- + bool + Whether the docstrings method can return something. + """ + + def get_returns_not_on_nested_functions(node): + returns = [node] if isinstance(node, ast.Return) else [] + for child in ast.iter_child_nodes(node): + # Ignore nested functions and its subtrees. + if not isinstance(child, ast.FunctionDef): + child_returns = get_returns_not_on_nested_functions(child) + returns.extend(child_returns) + return returns + + tree = ast.parse(self.method_source).body + if tree: + returns = get_returns_not_on_nested_functions(tree[0]) + return_values = [r.value for r in returns] + # Replace NameConstant nodes valued None for None. + for i, v in enumerate(return_values): + if isinstance(v, ast.NameConstant) and v.value is None: + return_values[i] = None + return any(return_values) + else: + return False + + @property + def deprecated(self): + return ".. deprecated:: " in (self.summary + self.extended_summary) + + +def validate(func_name): + """ + Validate the docstring. + + Parameters + ---------- + func_name : function + Function whose docstring will be evaluated (e.g. pandas.read_csv). + + Returns + ------- + dict + A dictionary containing all the information obtained from validating + the docstring. + + Notes + ----- + The errors codes are defined as: + - First two characters: Section where the error happens: + * GL: Global (no section, like section ordering errors) + * SS: Short summary + * ES: Extended summary + * PR: Parameters + * RT: Returns + * YD: Yields + * RS: Raises + * WN: Warns + * SA: See Also + * NT: Notes + * RF: References + * EX: Examples + - Last two characters: Numeric error code inside the section + + For example, PR02 is the second codified error in the Parameters section + (which in this case is assigned to the error when unknown parameters are documented). + + The error codes, their corresponding error messages, and the details on how + they are validated, are not documented more than in the source code of this + function. + """ + doc = Docstring(func_name) + + errs = [] + if not doc.raw_doc: + errs.append(error("GL08")) + return { + "type": doc.type, + "docstring": doc.clean_doc, + "deprecated": doc.deprecated, + "file": doc.source_file_name, + "file_line": doc.source_file_def_line, + "errors": errs, + "examples_errors": "", + } + + if doc.start_blank_lines != 1 and "\n" in doc.raw_doc: + errs.append(error("GL01")) + if doc.end_blank_lines != 1 and "\n" in doc.raw_doc: + errs.append(error("GL02")) + if doc.double_blank_lines: + errs.append(error("GL03")) + for line in doc.raw_doc.splitlines(): + if re.match("^ *\t", line): + errs.append(error("GL05", line_with_tabs=line.lstrip())) + + unexpected_sections = [ + section for section in doc.section_titles if section not in ALLOWED_SECTIONS + ] + for section in unexpected_sections: + errs.append( + error("GL06", section=section, allowed_sections=", ".join(ALLOWED_SECTIONS)) + ) + + correct_order = [ + section for section in ALLOWED_SECTIONS if section in doc.section_titles + ] + if correct_order != doc.section_titles: + errs.append(error("GL07", correct_sections=", ".join(correct_order))) + + if doc.deprecated and not doc.extended_summary.startswith(".. deprecated:: "): + errs.append(error("GL09")) + + directives_without_two_colons = doc.directives_without_two_colons + if directives_without_two_colons: + errs.append(error("GL10", directives=directives_without_two_colons)) + + if not doc.summary: + errs.append(error("SS01")) + else: + if doc.summary[0].isalpha() and not doc.summary[0].isupper(): + errs.append(error("SS02")) + if doc.summary[-1] != ".": + errs.append(error("SS03")) + if doc.summary != doc.summary.lstrip(): + errs.append(error("SS04")) + elif doc.is_function_or_method and doc.summary.split(" ")[0][-1] == "s": + errs.append(error("SS05")) + if doc.num_summary_lines > 1: + errs.append(error("SS06")) + + if not doc.extended_summary: + errs.append(("ES01", "No extended summary found")) + + # PR01: Parameters not documented + # PR02: Unknown parameters + # PR03: Wrong parameters order + errs += doc.parameter_mismatches + + for param in doc.doc_parameters: + if not param.startswith("*"): # Check can ignore var / kwargs + if not doc.parameter_type(param): + if ":" in param: + errs.append(error("PR10", param_name=param.split(":")[0])) + else: + errs.append(error("PR04", param_name=param)) + else: + if doc.parameter_type(param)[-1] == ".": + errs.append(error("PR05", param_name=param)) + common_type_errors = [ + ("integer", "int"), + ("boolean", "bool"), + ("string", "str"), + ] + for wrong_type, right_type in common_type_errors: + if wrong_type in doc.parameter_type(param): + errs.append( + error( + "PR06", + param_name=param, + right_type=right_type, + wrong_type=wrong_type, + ) + ) + if not doc.parameter_desc(param): + errs.append(error("PR07", param_name=param)) + else: + if doc.parameter_desc(param)[0].isalpha() and not doc.parameter_desc(param)[0].isupper(): + errs.append(error("PR08", param_name=param)) + if doc.parameter_desc(param)[-1] != ".": + errs.append(error("PR09", param_name=param)) + + if doc.is_function_or_method: + if not doc.returns: + if doc.method_returns_something: + errs.append(error("RT01")) + else: + if len(doc.returns) == 1 and doc.returns[0].name: + errs.append(error("RT02")) + for name_or_type, type_, desc in doc.returns: + if not desc: + errs.append(error("RT03")) + else: + desc = " ".join(desc) + if desc[0].isalpha() and not desc[0].isupper(): + errs.append(error("RT04")) + if not desc.endswith("."): + errs.append(error("RT05")) + + if not doc.yields and "yield" in doc.method_source: + errs.append(error("YD01")) + + if not doc.see_also: + errs.append(error("SA01")) + else: + for rel_name, rel_desc in doc.see_also.items(): + if rel_desc: + if not rel_desc.endswith("."): + errs.append(error("SA02", reference_name=rel_name)) + if rel_desc[0].isalpha() and not rel_desc[0].isupper(): + errs.append(error("SA03", reference_name=rel_name)) + else: + errs.append(error("SA04", reference_name=rel_name)) + + if not doc.examples: + errs.append(error("EX01")) + return { + "type": doc.type, + "docstring": doc.clean_doc, + "deprecated": doc.deprecated, + "file": doc.source_file_name, + "file_line": doc.source_file_def_line, + "errors": errs, + }