Skip to content

Commit 48e31bb

Browse files
committed
PyCQA#129 - Adding a more heuristic algorithm for checking section headers
1 parent eac1caf commit 48e31bb

File tree

1 file changed

+101
-82
lines changed

1 file changed

+101
-82
lines changed

src/pydocstyle.py

Lines changed: 101 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,8 @@ def to_rst(cls):
701701
'at the first line')
702702
D213 = D2xx.create_error('D213', 'Multi-line docstring summary should start '
703703
'at the second line')
704+
D214 = D2xx.create_error('D214', 'Section or section underline is '
705+
'over-indented', 'in section %r')
704706

705707
D3xx = ErrorRegistry.create_group('D3', 'Quotes Issues')
706708
D300 = D3xx.create_error('D300', 'Use """triple double quotes"""',
@@ -719,8 +721,10 @@ def to_rst(cls):
719721
'properly capitalized', '%r, not %r')
720722
D404 = D4xx.create_error('D404', 'Section name should be properly capitalized',
721723
'%r, not %r')
722-
D405 = D4xx.create_error('D405', 'Section underline should match the length of '
723-
'the section\'s name', 'len(%r) == %r, not %r')
724+
D405 = D4xx.create_error('D405', 'Section name should not end with a colon',
725+
'%r, not %r')
726+
D406 = D4xx.create_error('D406', 'Section underline should match the length of '
727+
'the section\'s name', 'len(%r) == %r')
724728

725729
class AttrDict(dict):
726730
def __getattr__(self, item):
@@ -1281,7 +1285,7 @@ def check(filenames, select=None, ignore=None):
12811285
try:
12821286
with tokenize_open(filename) as file:
12831287
source = file.read()
1284-
for error in PEP257Checker().check_source(source, filename):
1288+
for error in ConventionChecker().check_source(source, filename):
12851289
code = getattr(error, 'code', None)
12861290
if code in checked_codes:
12871291
yield error
@@ -1371,7 +1375,7 @@ def decorator(f):
13711375
return decorator
13721376

13731377

1374-
class PEP257Checker(object):
1378+
class ConventionChecker(object):
13751379
"""Checker for PEP 257.
13761380
13771381
D10x: Missing docstrings
@@ -1381,13 +1385,27 @@ class PEP257Checker(object):
13811385
13821386
"""
13831387

1388+
ALL_NUMPY_SECTIONS = ['Short Summary',
1389+
'Extended Summary',
1390+
'Parameters',
1391+
'Returns',
1392+
'Yields',
1393+
'Other Parameters',
1394+
'Raises',
1395+
'See Also',
1396+
'Notes',
1397+
'References',
1398+
'Examples',
1399+
'Attributes',
1400+
'Methods']
1401+
13841402
def check_source(self, source, filename):
13851403
module = parse(StringIO(source), filename)
13861404
for definition in module:
13871405
for check in self.checks:
13881406
terminate = False
13891407
if isinstance(definition, check._check_for):
1390-
error = check(None, definition, definition.docstring)
1408+
error = check(self, definition, definition.docstring)
13911409
errors = error if hasattr(error, '__iter__') else [error]
13921410
for error in errors:
13931411
if error is not None:
@@ -1516,6 +1534,13 @@ def check_blank_after_summary(self, definition, docstring):
15161534
if blanks_count != 1:
15171535
return D205(blanks_count)
15181536

1537+
@staticmethod
1538+
def _get_docstring_indent(definition, docstring):
1539+
"""Return the indentation of the docstring's opening quotes."""
1540+
before_docstring, _, _ = definition.source.partition(docstring)
1541+
_, _, indent = before_docstring.rpartition('\n')
1542+
return indent
1543+
15191544
@check_for(Definition)
15201545
def check_indent(self, definition, docstring):
15211546
"""D20{6,7,8}: The entire docstring should be indented same as code.
@@ -1525,8 +1550,7 @@ def check_indent(self, definition, docstring):
15251550
15261551
"""
15271552
if docstring:
1528-
before_docstring, _, _ = definition.source.partition(docstring)
1529-
_, _, indent = before_docstring.rpartition('\n')
1553+
indent = self._get_docstring_indent(definition, docstring)
15301554
lines = docstring.split('\n')
15311555
if len(lines) > 1:
15321556
lines = lines[1:] # First line does not need indent.
@@ -1714,99 +1738,90 @@ def SKIP_check_return_type(self, function, docstring):
17141738
if 'return' not in docstring.lower():
17151739
return Error()
17161740

1717-
@check_for(Function)
1718-
def check_numpy(self, function, docstring):
1741+
@check_for(Definition)
1742+
def check_numpy_content(self, definition, docstring):
1743+
"""Check the content of the docstring for numpy conventions."""
1744+
pass
1745+
1746+
@check_for(Definition)
1747+
def check_numpy(self, definition, docstring):
17191748
"""D403: First word of the first line should be properly capitalized.
17201749
17211750
The [first line of a] docstring is a phrase ending in a period.
17221751
17231752
"""
1724-
SECTIONS = ['Summary',
1725-
'Extended Summary',
1726-
'Parameters',
1727-
'Returns',
1728-
'Yields',
1729-
'Raises',
1730-
'Other Parameters',
1731-
'See Also',
1732-
'Notes',
1733-
'References',
1734-
'Examples']
1735-
17361753
if not docstring:
17371754
return
17381755

1739-
ds = DocstringStream(docstring)
1740-
if ds.line_number < 2:
1756+
lines = docstring.split("\n")
1757+
if len(lines) < 2:
1758+
# It's not a multiple lined docstring
17411759
return
17421760

1743-
_ = ds.consume_line() # Skipping the first line
1744-
curr_line = ds.consume_line()
1745-
1746-
while curr_line is not None:
1747-
for section in SECTIONS:
1748-
if section.lower() == curr_line.strip().lower():
1749-
if len(curr_line) > len(curr_line.lstrip()):
1750-
return D208()
1751-
if section not in curr_line:
1752-
return D404(section, curr_line.strip())
1753-
1754-
curr_line = ds.consume_line()
1755-
if curr_line.rstrip() != "-" * len(section):
1756-
return D405(section, len(section),
1757-
len(curr_line.rstrip()))
1758-
curr_line = ds.consume_line()
1761+
lines_generator = ScrollableGenerator(lines[1:]) # Skipping first line
1762+
indent = self._get_docstring_indent(definition, docstring)
1763+
1764+
for line in lines_generator:
1765+
for section in self.ALL_NUMPY_SECTIONS:
1766+
with_colon = section.lower() + ':'
1767+
if line.strip().lower() in [section.lower(), with_colon]:
1768+
# There's a chance that this line is a numpy parameter
1769+
try:
1770+
next_line = lines_generator.next()
1771+
except StopIteration:
1772+
# It probably isn't :)
1773+
return
1774+
1775+
if ''.join(set(next_line.strip())) == '-':
1776+
# The next line contains only dashes, it's a good chance
1777+
# that it's a numpy section
1778+
1779+
if (leading_space(line) > indent or
1780+
leading_space(next_line) > indent):
1781+
yield D214(section)
1782+
1783+
if section not in line:
1784+
yield D404(section, line.strip())
1785+
elif line.strip().lower() == with_colon:
1786+
yield D405(section, line.strip())
1787+
1788+
if next_line.strip() != "-" * len(section):
1789+
yield D406(section, len(section))
1790+
else:
1791+
# The next line does not contain only dashes, so it's
1792+
# not likely to be a section header.
1793+
lines_generator.scroll_back()
17591794

17601795

1761-
class DocstringStream(object):
1762-
"""Reads numpy conventions."""
1796+
class ScrollableGenerator(object):
1797+
"""A generator over a list that can be moved back during iteration."""
17631798

1764-
def __init__(self, docstring):
1765-
self._lines = ast.literal_eval(docstring).split('\n')
1766-
self._base_indent = self._find_indent_level(docstring)
1767-
self._line_index = 0
1799+
def __init__(self, list_like):
1800+
self._list_like = list_like
1801+
self._index = 0
17681802

1769-
self._handlers = {'parameters': self._consume_parameters_section}
1770-
self.line_number = len(self._lines)
1803+
def __iter__(self):
1804+
return self
17711805

1772-
def consume_line(self):
1773-
if self._line_index >= len(self._lines):
1774-
return None
1806+
def next(self):
1807+
"""Generate the next item or raise StopIteration."""
17751808
try:
1776-
return self.peek_current_line()
1809+
return self._list_like[self._index]
1810+
except IndexError:
1811+
raise StopIteration()
17771812
finally:
1778-
self._line_index += 1
1779-
1780-
def peek_current_line(self):
1781-
# First line is not indented
1782-
if self._line_index == 0:
1783-
return self._lines[self._line_index]
1784-
1785-
return self._lines[self._line_index][self._base_indent:]
1786-
1787-
def peek_next_line(self):
1788-
if self._line_index + 1 >= self.line_number:
1789-
return None
1790-
1791-
return self._lines[self._line_index + 1][self._base_indent:]
1813+
self._index += 1
17921814

1793-
def _verify_section_header(self, section_name):
1794-
curr_line = self.peek_current_line()
1815+
def scroll_back(self, num=1):
1816+
"""Move the generator `num` items backwards."""
1817+
if num < 0:
1818+
raise ValueError('num cannot be a negative number')
1819+
self._index = max(0, self._index - num)
17951820

1796-
def _consume_parameters_section(self):
1797-
pass
1798-
1799-
1800-
@staticmethod
1801-
def _find_indent_level(docstring):
1802-
lines = docstring.split('\n')
1803-
if len(lines) > 1:
1804-
last_line = lines[-1]
1805-
if last_line.endswith('"""'):
1806-
return last_line.find('"""')
1807-
else:
1808-
return last_line.find("'''")
1809-
return 0
1821+
def clone(self):
1822+
"""Return a copy of the generator set to the same item index."""
1823+
obj_copy = self.__class__(self._list_like)
1824+
obj_copy._index = self._index
18101825

18111826

18121827
def main(use_pep257=False):
@@ -1823,7 +1838,11 @@ def foo():
18231838
"""A.
18241839
18251840
Parameters
1826-
---------
1841+
----------
1842+
1843+
This is a string that defines some things, such as the following
1844+
parameters
1845+
a, b, d.
18271846
"""
18281847

18291848
if __name__ == '__main__':

0 commit comments

Comments
 (0)