Skip to content

Ordering bug with deleted text in a list #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions pydocx/DocxParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
# http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx
EMUS_PER_PIXEL = 9525
USE_ALIGNMENTS = True
TAGS_CONTAINING_CONTENT = (
't',
'pict',
'drawing',
'delText',
'ins',
)
TAGS_HOLDING_CONTENT_TAGS = (
'p',
'tbl',
)


def remove_namespaces(document): # remove namespaces
Expand Down Expand Up @@ -332,17 +343,14 @@ def _set_headers(self, elements):
element.heading_level = headers[style.lower()]

def _set_next(self, body):
def _get_children(el):
def _get_children_with_content(el):
# We only care about children if they have text in them.
children = []
for child in self._filter_children(el, ['p', 'tbl']):
has_descendant_with_tag = False
if child.has_descendant_with_tag('t'):
has_descendant_with_tag = True
if child.has_descendant_with_tag('pict'):
has_descendant_with_tag = True
if child.has_descendant_with_tag('drawing'):
has_descendant_with_tag = True
for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS):
has_descendant_with_tag = any(
child.has_descendant_with_tag(tag) for
tag in TAGS_CONTAINING_CONTENT
)
if has_descendant_with_tag:
children.append(child)
return children
Expand All @@ -361,11 +369,11 @@ def _assign_next(children):
except IndexError:
pass
# Assign next for everything in the root.
_assign_next(_get_children(body))
_assign_next(_get_children_with_content(body))

# In addition set next for everything in table cells.
for tc in body.find_all('tc'):
_assign_next(_get_children(tc))
_assign_next(_get_children_with_content(tc))

def parse_begin(self, el):
self._set_list_attributes(el)
Expand Down
9 changes: 9 additions & 0 deletions pydocx/tests/document_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pydocx.DocxParser import EMUS_PER_PIXEL

templates = {
'delete': 'text_delete.xml',
'drawing': 'drawing.xml',
'hyperlink': 'hyperlink.xml',
'insert': 'insert.xml',
Expand Down Expand Up @@ -78,6 +79,14 @@ def insert_tag(self, run_tags):
}
return template.render(**kwargs)

@classmethod
def delete_tag(self, deleted_texts):
template = env.get_template(templates['delete'])
kwargs = {
'deleted_texts': deleted_texts,
}
return template.render(**kwargs)

@classmethod
def smart_tag(self, run_tags):
template = env.get_template(templates['smartTag'])
Expand Down
10 changes: 10 additions & 0 deletions pydocx/tests/templates/text_delete.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<w:del w:id="12" w:author="mfiem" w:date="2008-02-27T06:48:00Z">
{% for deleted_text in deleted_texts %}
<w:r w:rsidDel="005D3333">
<w:rPr>
<w:rFonts w:ascii="Times New Roman" w:hAnsi="Times New Roman"/>
</w:rPr>
<w:delText>{{ deleted_text }}</w:delText>
</w:r>
{% endfor %}
</w:del>
76 changes: 75 additions & 1 deletion pydocx/tests/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,80 @@ def get_xml(self):
return xml


class DeleteTagInList(_TranslationTestCase):
expected_output = '''
<html><body>
<ol data-list-type="decimal">
<li>AAA<br/>
<span class='delete' author='' date=''>BBB</span>
</li>
<li>CCC</li>
</ol>
</body></html>
'''

def get_xml(self):
delete_tags = DXB.delete_tag(['BBB'])
p_tag = DXB.p_tag([delete_tags])

body = DXB.li(text='AAA', ilvl=0, numId=0)
body += p_tag
body += DXB.li(text='CCC', ilvl=0, numId=0)

xml = DXB.xml(body)
return xml


class InsertTagInList(_TranslationTestCase):
expected_output = '''
<html><body>
<ol data-list-type="decimal">
<li>AAA<br/>
<span class='insert' author='' date=''>BBB</span>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is the author and date stuff coming from? Those aren't valid HTML attributes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I'm surprised by the <br /> here. If I had change tracking on and typed BBB after the AAA, I would actually be trying to type AAABBB on one line. If I typed BBB I would be trying to type AAA BBB on the same line. Is there something I'm not understanding about the way Word records the track changes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is existing behaviour and has nothing to do with this ticket. #30 will deal with that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yuck. "Existing behavior" is not a great justification for adding another bug, but I understand wanting to break it up. Seems odd to do for such a small ticket, though. Your call, so this would be done if you're cool with it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not certain how complicated it will be, and it would be much simpler to handle it as a separate issue.

</li>
<li>CCC</li>
</ol>
</body></html>
'''

def get_xml(self):
run_tags = [DXB.r_tag(i) for i in 'BBB']
insert_tags = DXB.insert_tag(run_tags)
p_tag = DXB.p_tag([insert_tags])

body = DXB.li(text='AAA', ilvl=0, numId=0)
body += p_tag
body += DXB.li(text='CCC', ilvl=0, numId=0)

xml = DXB.xml(body)
return xml


class SmartTagInList(_TranslationTestCase):
expected_output = '''
<html><body>
<ol data-list-type="decimal">
<li>AAA<br/>
BBB
</li>
<li>CCC</li>
</ol>
</body></html>
'''

def get_xml(self):
run_tags = [DXB.r_tag(i) for i in 'BBB']
smart_tag = DXB.smart_tag(run_tags)
p_tag = DXB.p_tag([smart_tag])

body = DXB.li(text='AAA', ilvl=0, numId=0)
body += p_tag
body += DXB.li(text='CCC', ilvl=0, numId=0)

xml = DXB.xml(body)
return xml


class SingleListItem(_TranslationTestCase):
expected_output = '''
<html><body>
Expand Down Expand Up @@ -767,6 +841,7 @@ def get_xml(self):
[DXB.p_tag('Fourth')],
), merge=True)
body = table

xml = DXB.xml(body)
return xml

Expand All @@ -793,7 +868,6 @@ def get_xml(self):
for text, ilvl, numId in li_text:
lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
body = lis

xml = DXB.xml(body)
return xml

Expand Down