From 13a677f172a839c5b6b9994561d2b1b39bc8f504 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 3 Jul 2013 15:14:18 -0400 Subject: [PATCH 1/2] refs #50: Catching the SyntaxError and raising a custom exception --- pydocx/exceptions.py | 2 ++ pydocx/utils.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 pydocx/exceptions.py diff --git a/pydocx/exceptions.py b/pydocx/exceptions.py new file mode 100644 index 00000000..cdff556a --- /dev/null +++ b/pydocx/exceptions.py @@ -0,0 +1,2 @@ +class MalformedDocxException(Exception): + pass diff --git a/pydocx/utils.py b/pydocx/utils.py index 56d4bdec..45beed0b 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -3,6 +3,8 @@ from collections import defaultdict from xml.etree import cElementTree +from pydocx.exceptions import MalformedDocxException + UPPER_ROMAN_TO_HEADING_VALUE = 'h2' TAGS_CONTAINING_CONTENT = ( @@ -70,6 +72,14 @@ def _filter_children(element, tags): def remove_namespaces(document): + """ + >>> exception_raised = False + >>> try: + ... remove_namespaces('junk') + ... except MalformedDocxException: + ... exception_raised = True + >>> assert exception_raised + """ encoding_regex = re.compile( r'<\?xml.*encoding="(.+?)"', re.DOTALL | re.MULTILINE, @@ -78,7 +88,10 @@ def remove_namespaces(document): m = encoding_regex.match(document) if m: encoding = m.groups(0)[0] - root = cElementTree.fromstring(document) + try: + root = cElementTree.fromstring(document) + except SyntaxError: + raise MalformedDocxException('This document cannot be converted.') for child in el_iter(root): child.tag = child.tag.split("}")[1] child.attrib = dict( From 101f5d21eba36b8b5765123eff36d1a7c23d10cb Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 3 Jul 2013 15:16:13 -0400 Subject: [PATCH 2/2] refs #50: update note --- CHANGELOG | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index f1bc6410..63e02ddf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ Changelog ========= +* 0.3.3 + * In the event that `cElementTree` has a problem parsing the document, a + `MalformedDocxException` is raised instead of a `SyntaxError` * 0.3.2 * We were not taking into account that vertical merges should have a continue attribute, but sometimes they do not, and in those cases word