12
12
./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
13
13
./scripts/validate_rst_title_capitalization.py doc/source/
14
14
15
- Files that cannot be validated: (code crashes when validating for some reason)
16
- doc/source/user_guide/io.rst
17
- doc/source/whatsnew/v0.17.1.rst
18
-
19
- Reference: doctree elements
20
- http://epydoc.sourceforge.net/docutils/public/docutils.nodes.Element-class.html
21
-
22
15
"""
23
16
24
17
import argparse
25
18
import sys
26
- from docutils .parsers .rst import Parser
27
- import docutils
28
- from docutils import nodes
29
19
import re
30
20
import os
31
21
from os import walk
32
22
from typing import Generator , List
33
23
34
24
35
- class suppress_stdout_stderr :
36
- '''
37
- Code source:
38
- https://stackoverflow.com/questions/11130156/
39
-
40
- A context manager for doing a "deep suppression" of stdout and stderr in
41
- Python, i.e. will suppress all print, even if the print originates in a
42
- compiled C/Fortran sub-function.
43
- This will not suppress raised exceptions, since exceptions are printed
44
- to stderr just before a script exits, and after the context manager has
45
- exited (at least, I think that is why it lets exceptions through).
46
-
47
- This code is needed to suppress output from the parser method
48
- because the parser method prints to stdout when encountering Sphinx
49
- references, as it cannot parse those at this moment.
50
-
51
- '''
52
- def __init__ (self ):
53
- self .null_fds = [os .open (os .devnull , os .O_WRONLY ) for x in range (2 )]
54
- self .save_fds = [os .dup (1 ), os .dup (2 )]
55
-
56
- def __enter__ (self ):
57
- '''
58
- Assign the null pointers to stdout and stderr.
59
-
60
- '''
61
- os .dup2 (self .null_fds [0 ], 1 )
62
- os .dup2 (self .null_fds [1 ], 2 )
63
-
64
- def __exit__ (self , * _ ):
65
- '''
66
- Re-assign the real stdout/stderr back to (1) and (2) and close all
67
- file descriptors
68
-
69
- '''
70
- os .dup2 (self .save_fds [0 ], 1 )
71
- os .dup2 (self .save_fds [1 ], 2 )
72
- for fd in self .null_fds + self .save_fds :
73
- os .close (fd )
74
-
75
-
76
25
# Keynames that would not follow capitalization convention
77
26
CAPITALIZATION_EXCEPTIONS = {
78
27
'pandas' , 'Python' , 'IPython' , 'PyTables' , 'Excel' , 'JSON' ,
@@ -88,12 +37,6 @@ def __exit__(self, *_):
88
37
# Key: Document Directory, Value: Pair(Bad Title, Line Number)
89
38
bad_title_dict = {}
90
39
91
- # List of problematic tags that are exceptions to parent rule
92
- list_of_markers = {'emphasis' , 'strong' , 'reference' , 'literal' }
93
-
94
- # List of files that, when validated, causes the program to crash
95
- cannot_validate = ['doc/source/user_guide/io.rst' , 'doc/source/whatsnew/v0.17.1.rst' ]
96
-
97
40
# Error Message:
98
41
err_msg = 'Heading capitalization formatted incorrectly. Please correctly capitalize'
99
42
@@ -152,118 +95,18 @@ def follow_capitalization_convention(title: str) -> bool:
152
95
return True
153
96
154
97
155
- def find_line_number ( node : docutils . nodes ) -> int :
98
+ def findTitles ( rst_file : str ) -> Generator [ List [ str ], List [ int ], None ] :
156
99
'''
157
- Recursive method that finds the line number in a document for a particular node
158
- in the doctree
100
+ Algorithm to identify particular text that should be considered headings in an
101
+ RST file
159
102
160
- Text nodes usually don't have any value for its "line" instance variable,
161
- so instead, we recursively look through the parent nodes to eventually find the
162
- correct line number, which I determined would be node.line - 1
163
-
164
- Parameters
165
- ----------
166
- node : docutils.node
167
- Name of the object of the docstring to validate.
168
-
169
- Returns
170
- -------
171
- int
172
- The line number of the node
173
-
174
- '''
175
- if (node .tagname == 'document' ):
176
- return 1
177
- elif (node .line is None ):
178
- return find_line_number (node .parent )
179
- else :
180
- return node .line - 1
181
-
182
-
183
- def parse_RST (rst_file : str ) -> docutils .nodes .document :
184
- '''
185
- Method to parse through an rst_file and return a document tree
103
+ See <https://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html> for details
104
+ on what constitutes a string as a heading in RST
186
105
187
106
Parameters
188
107
----------
189
108
rst_file : str
190
- Directory address of a .rst file as a string
191
-
192
- Returns
193
- -------
194
- document : docutils.nodes.document
195
- Root node of the .rst file's document tree
196
-
197
- '''
198
- # Initialize rst Parser object
199
- parser = Parser ()
200
-
201
- # Open and read the .rst file and store the string of data into input
202
- f = open (rst_file , "r" )
203
- input = f .read ()
204
-
205
- # Set up default settings for the document tree
206
- settings = docutils .frontend .OptionParser (
207
- components = (docutils .parsers .rst .Parser ,)
208
- ).get_default_values ()
209
-
210
- # Initialize an empty document tree with the default settings from above
211
- document = docutils .utils .new_document ('Document' , settings )
212
-
213
- # Parse input into an RST doctree, suppressing any stderr from parse method
214
- parser .parse (input , document )
215
-
216
- # Return the root node of the document tree
217
- return document
218
-
219
-
220
- def find_titles_in_doctree (document : docutils .nodes .document ) -> Generator [
221
- List [str ], List [int ], None ]:
222
- '''
223
- Algorithm to identify particular text nodes as headings
224
- along with the text node's line number.
225
-
226
- The idea is that when we traverse through the text nodes, nodes whose
227
- parents have a tagname of 'title' are definitely considered to be part
228
- of headings.
229
-
230
- However, the problem occurs when we encounter text that has been either
231
- italicized, bolded, referenced, etc. In these situations, the tagname of
232
- the parent node could be one of the following: 'emphasis', 'strong',
233
- 'reference', and 'literal', stored in the 'list_of_markers' set variable. In
234
- this situation, the node's grandparent would have the 'title' tagname instead.
235
-
236
- Let's see an example that can cause a problem. The heading provided will be
237
- 'Looking at *pandas* docs' versus 'Looking at pandas docs'. In this example,
238
- the stars around pandas in the first string italicizes the word.
239
- However, the doctree would be representing both doctrees as follows:
240
-
241
- 'Looking at *pandas* docs' 'Looking at pandas docs'
242
- title title
243
- / | | |
244
- #text emphasis #text VS #text
245
- | | | |
246
- 'Looking at' #text 'docs' 'Looking at pandas docs'
247
- |
248
- 'pandas'
249
-
250
- When iterating through the nodes, we first encounter the node: 'Looking at'.
251
- However, this isn't the full line of the heading (Looking at pandas docs).
252
- We're still missing 'pandas docs'. Hence, we must store this first word into
253
- a variable (my_text in my function) and append this string variable with more
254
- words in case we encounter text that has a parent with tagname in list_of_markers.
255
- In this example, we have to go through two more nodes to get the full heading.
256
-
257
- Meanwhile, when nothing has a parent with tagname in list_of_markers, we only
258
- need to access one node to find the 'Looking at the pandas docs' text.
259
-
260
- My algorithm adjusts for this pattern, iterating through nodes and
261
- identifying when headings are complete.
262
-
263
- Parameters
264
- ----------
265
- document : docutils.nodes.document
266
- Root node of a .rst file's document tree
109
+ RST file to scan through for headings
267
110
268
111
Returns
269
112
-------
@@ -275,62 +118,42 @@ def find_titles_in_doctree(document: docutils.nodes.document) -> Generator[
275
118
276
119
'''
277
120
278
- # my_text will be used to construct headings and append into title_list
279
- my_text : str = ""
280
-
281
- # line_no will be used to retrieve line numbers of certain headings
282
- line_no : int = 0
283
-
284
- # A docutils.nodes object that stores a list_of_markers text's grandparent node,
285
- # which should have a tagname of title
286
- marker_grandparent : docutils .nodes .Title = None
287
-
288
- # True if the most recent node encountered had a parent with a list_of_markers
289
- # tagname and a grandparent with a tagname of title
290
- before_marker : bool = False
291
-
292
121
# title_list is the list of headings that is encountered in the doctree
293
122
title_list : List [str ] = []
294
123
295
124
# List of line numbers that corresponding headings in title_list can be found at
296
125
line_number_list : List [int ] = []
297
126
298
- # Traverse through the nodes.Text in the document tree to construct headings
299
- for node in document .traverse (nodes .Text ):
300
- # Case 1: Encounter a node with a parent tagname of title
301
- if (node .parent .tagname == 'title' ):
302
- if (before_marker and marker_grandparent == node .parent ):
303
- my_text = my_text + node .astext ()
304
- before_marker = False
305
- else :
306
- if (my_text != "" ):
307
- title_list .append (my_text )
308
- line_number_list .append (line_no )
309
- line_no = find_line_number (node )
310
- my_text = node .astext ()
311
- before_marker = False
312
- # Case 2: Encounter a node with parent tagname in list_of_markers
313
- elif (node .parent .parent .tagname == 'title' and
314
- node .parent .tagname in list_of_markers ):
315
- line_no = find_line_number (node )
316
- my_text = my_text + node .astext ()
317
- before_marker = True
318
- marker_grandparent = node .parent .parent
319
- # Case 3: Encounter parent tagname of none of the above (Ex. 'paragraph')
320
- else :
321
- before_marker = False
322
- if (my_text != "" ):
323
- title_list .append (my_text )
324
- line_number_list .append (line_no )
325
- my_text = ""
326
- line_no = 0
327
-
328
- # Leftover string that hasn't been appended yet due to how the for loop works
329
- if (my_text != "" ):
330
- title_list .append (my_text )
331
- line_number_list .append (line_no )
332
-
333
- # Return a list of the headings and a list of their corresponding line numbers
127
+ # Open and read the .rst file and store the string of data into input
128
+ f = open (rst_file , "r" )
129
+ input = f .read ().split ('\n ' )
130
+
131
+ # Regular expressions that denote a title beforehand
132
+ regex = {
133
+ '*' : r'^(?:\*{1})*$' , '=' : r'^(?:={1})*$' , '-' : r'^(?:-{1})*$' ,
134
+ '^' : r'^(?:\^{1})*$' , '~' : r'^(?:~{1})*$' , '#' : r'^(?:#{1})*$' ,
135
+ '"' : r'^(?:"{1})*$'
136
+ }
137
+
138
+ # '*`_' markers are removed from original string text.
139
+ table = str .maketrans ("" , "" , '*`_' )
140
+
141
+ # Loop through input lines, appending if they are considered headings
142
+ for lineno in range (1 , len (input )):
143
+ if (len (input [lineno ]) != 0 and len (input [lineno - 1 ]) != 0 ):
144
+ for key in regex :
145
+ match = re .search (regex [key ], input [lineno ])
146
+ if (match is not None ):
147
+ if (lineno >= 2 ):
148
+ if (input [lineno ] == input [lineno - 2 ]):
149
+ if (len (input [lineno ]) == len (input [lineno - 1 ])):
150
+ title_list .append (input [lineno - 1 ].translate (table ))
151
+ line_number_list .append (lineno )
152
+ break
153
+ if (len (input [lineno ]) >= len (input [lineno - 1 ])):
154
+ title_list .append (input [lineno - 1 ].translate (table ))
155
+ line_number_list .append (lineno )
156
+
334
157
return title_list , line_number_list
335
158
336
159
@@ -345,19 +168,12 @@ def fill_bad_title_dict(rst_file: str) -> None:
345
168
346
169
'''
347
170
348
- # Ensure file isn't one that causes the code to crash
349
- if rst_file in cannot_validate :
350
- return
351
-
352
171
# Ensure this file doesn't already have a bad_title_dict slot
353
172
if rst_file in bad_title_dict :
354
173
return
355
174
356
- # Parse rst_file with an RST parser
357
- document = parse_RST (rst_file )
358
-
359
- # Make a list of headings along with their line numbers from document tree
360
- title_list , line_number_list = find_titles_in_doctree (document )
175
+ # Make a list of headings along with their line numbers
176
+ title_list , line_number_list = findTitles (rst_file )
361
177
362
178
# Append the bad_title_dict if the capitalization convention not followed
363
179
for i in range (len (title_list )):
@@ -424,9 +240,9 @@ def main(source_paths: List[str], output_format: str) -> bool:
424
240
directory_list = find_rst_files (source_paths )
425
241
426
242
# Fill the bad_title_dict, which contains all incorrectly capitalized headings
427
- with suppress_stdout_stderr ():
428
- for filename in directory_list :
429
- fill_bad_title_dict (filename )
243
+ # with suppress_stdout_stderr():
244
+ for filename in directory_list :
245
+ fill_bad_title_dict (filename )
430
246
431
247
# Return an exit status of 0 if there are no bad titles in the dictionary
432
248
if (len (bad_title_dict ) == 0 ):
0 commit comments