Skip to content

Commit 043dc87

Browse files
committed
Merge pull request #25 from OpenScienceFramework/issue_25
Ordering bug with deleted text in a list
2 parents 10443ab + c42e88b commit 043dc87

File tree

4 files changed

+113
-12
lines changed

4 files changed

+113
-12
lines changed

pydocx/DocxParser.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@
1313
# http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx
1414
EMUS_PER_PIXEL = 9525
1515
USE_ALIGNMENTS = True
16+
TAGS_CONTAINING_CONTENT = (
17+
't',
18+
'pict',
19+
'drawing',
20+
'delText',
21+
'ins',
22+
)
23+
TAGS_HOLDING_CONTENT_TAGS = (
24+
'p',
25+
'tbl',
26+
)
1627

1728

1829
def remove_namespaces(document): # remove namespaces
@@ -332,17 +343,14 @@ def _set_headers(self, elements):
332343
element.heading_level = headers[style.lower()]
333344

334345
def _set_next(self, body):
335-
def _get_children(el):
346+
def _get_children_with_content(el):
336347
# We only care about children if they have text in them.
337348
children = []
338-
for child in self._filter_children(el, ['p', 'tbl']):
339-
has_descendant_with_tag = False
340-
if child.has_descendant_with_tag('t'):
341-
has_descendant_with_tag = True
342-
if child.has_descendant_with_tag('pict'):
343-
has_descendant_with_tag = True
344-
if child.has_descendant_with_tag('drawing'):
345-
has_descendant_with_tag = True
349+
for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS):
350+
has_descendant_with_tag = any(
351+
child.has_descendant_with_tag(tag) for
352+
tag in TAGS_CONTAINING_CONTENT
353+
)
346354
if has_descendant_with_tag:
347355
children.append(child)
348356
return children
@@ -361,11 +369,11 @@ def _assign_next(children):
361369
except IndexError:
362370
pass
363371
# Assign next for everything in the root.
364-
_assign_next(_get_children(body))
372+
_assign_next(_get_children_with_content(body))
365373

366374
# In addition set next for everything in table cells.
367375
for tc in body.find_all('tc'):
368-
_assign_next(_get_children(tc))
376+
_assign_next(_get_children_with_content(tc))
369377

370378
def parse_begin(self, el):
371379
self._set_list_attributes(el)

pydocx/tests/document_builder.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pydocx.DocxParser import EMUS_PER_PIXEL
33

44
templates = {
5+
'delete': 'text_delete.xml',
56
'drawing': 'drawing.xml',
67
'hyperlink': 'hyperlink.xml',
78
'insert': 'insert.xml',
@@ -78,6 +79,14 @@ def insert_tag(self, run_tags):
7879
}
7980
return template.render(**kwargs)
8081

82+
@classmethod
83+
def delete_tag(self, deleted_texts):
84+
template = env.get_template(templates['delete'])
85+
kwargs = {
86+
'deleted_texts': deleted_texts,
87+
}
88+
return template.render(**kwargs)
89+
8190
@classmethod
8291
def smart_tag(self, run_tags):
8392
template = env.get_template(templates['smartTag'])
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<w:del w:id="12" w:author="mfiem" w:date="2008-02-27T06:48:00Z">
2+
{% for deleted_text in deleted_texts %}
3+
<w:r w:rsidDel="005D3333">
4+
<w:rPr>
5+
<w:rFonts w:ascii="Times New Roman" w:hAnsi="Times New Roman"/>
6+
</w:rPr>
7+
<w:delText>{{ deleted_text }}</w:delText>
8+
</w:r>
9+
{% endfor %}
10+
</w:del>

pydocx/tests/test_xml.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,80 @@ def get_xml(self):
718718
return xml
719719

720720

721+
class DeleteTagInList(_TranslationTestCase):
722+
expected_output = '''
723+
<html><body>
724+
<ol data-list-type="decimal">
725+
<li>AAA<br/>
726+
<span class='delete' author='' date=''>BBB</span>
727+
</li>
728+
<li>CCC</li>
729+
</ol>
730+
</body></html>
731+
'''
732+
733+
def get_xml(self):
734+
delete_tags = DXB.delete_tag(['BBB'])
735+
p_tag = DXB.p_tag([delete_tags])
736+
737+
body = DXB.li(text='AAA', ilvl=0, numId=0)
738+
body += p_tag
739+
body += DXB.li(text='CCC', ilvl=0, numId=0)
740+
741+
xml = DXB.xml(body)
742+
return xml
743+
744+
745+
class InsertTagInList(_TranslationTestCase):
746+
expected_output = '''
747+
<html><body>
748+
<ol data-list-type="decimal">
749+
<li>AAA<br/>
750+
<span class='insert' author='' date=''>BBB</span>
751+
</li>
752+
<li>CCC</li>
753+
</ol>
754+
</body></html>
755+
'''
756+
757+
def get_xml(self):
758+
run_tags = [DXB.r_tag(i) for i in 'BBB']
759+
insert_tags = DXB.insert_tag(run_tags)
760+
p_tag = DXB.p_tag([insert_tags])
761+
762+
body = DXB.li(text='AAA', ilvl=0, numId=0)
763+
body += p_tag
764+
body += DXB.li(text='CCC', ilvl=0, numId=0)
765+
766+
xml = DXB.xml(body)
767+
return xml
768+
769+
770+
class SmartTagInList(_TranslationTestCase):
771+
expected_output = '''
772+
<html><body>
773+
<ol data-list-type="decimal">
774+
<li>AAA<br/>
775+
BBB
776+
</li>
777+
<li>CCC</li>
778+
</ol>
779+
</body></html>
780+
'''
781+
782+
def get_xml(self):
783+
run_tags = [DXB.r_tag(i) for i in 'BBB']
784+
smart_tag = DXB.smart_tag(run_tags)
785+
p_tag = DXB.p_tag([smart_tag])
786+
787+
body = DXB.li(text='AAA', ilvl=0, numId=0)
788+
body += p_tag
789+
body += DXB.li(text='CCC', ilvl=0, numId=0)
790+
791+
xml = DXB.xml(body)
792+
return xml
793+
794+
721795
class SingleListItem(_TranslationTestCase):
722796
expected_output = '''
723797
<html><body>
@@ -767,6 +841,7 @@ def get_xml(self):
767841
[DXB.p_tag('Fourth')],
768842
), merge=True)
769843
body = table
844+
770845
xml = DXB.xml(body)
771846
return xml
772847

@@ -793,7 +868,6 @@ def get_xml(self):
793868
for text, ilvl, numId in li_text:
794869
lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
795870
body = lis
796-
797871
xml = DXB.xml(body)
798872
return xml
799873

0 commit comments

Comments
 (0)