From 13a677f172a839c5b6b9994561d2b1b39bc8f504 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Wed, 3 Jul 2013 15:14:18 -0400
Subject: [PATCH 1/2] refs #50: Catching the SyntaxError and raising a custom
 exception

---
 pydocx/exceptions.py |  2 ++
 pydocx/utils.py      | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 pydocx/exceptions.py

diff --git a/pydocx/exceptions.py b/pydocx/exceptions.py
new file mode 100644
index 00000000..cdff556a
--- /dev/null
+++ b/pydocx/exceptions.py
@@ -0,0 +1,2 @@
+class MalformedDocxException(Exception):
+    pass
diff --git a/pydocx/utils.py b/pydocx/utils.py
index 56d4bdec..45beed0b 100644
--- a/pydocx/utils.py
+++ b/pydocx/utils.py
@@ -3,6 +3,8 @@
 from collections import defaultdict
 from xml.etree import cElementTree
 
+from pydocx.exceptions import MalformedDocxException
+
 
 UPPER_ROMAN_TO_HEADING_VALUE = 'h2'
 TAGS_CONTAINING_CONTENT = (
@@ -70,6 +72,14 @@ def _filter_children(element, tags):
 
 
 def remove_namespaces(document):
+    """
+    >>> exception_raised = False
+    >>> try:
+    ...     remove_namespaces('junk')
+    ... except MalformedDocxException:
+    ...     exception_raised = True
+    >>> assert exception_raised
+    """
     encoding_regex = re.compile(
         r'<\?xml.*encoding="(.+?)"',
         re.DOTALL | re.MULTILINE,
@@ -78,7 +88,10 @@ def remove_namespaces(document):
     m = encoding_regex.match(document)
     if m:
         encoding = m.groups(0)[0]
-    root = cElementTree.fromstring(document)
+    try:
+        root = cElementTree.fromstring(document)
+    except SyntaxError:
+        raise MalformedDocxException('This document cannot be converted.')
     for child in el_iter(root):
         child.tag = child.tag.split("}")[1]
         child.attrib = dict(

From 101f5d21eba36b8b5765123eff36d1a7c23d10cb Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Wed, 3 Jul 2013 15:16:13 -0400
Subject: [PATCH 2/2] refs #50: update note

---
 CHANGELOG | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index f1bc6410..63e02ddf 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,9 @@
 
 Changelog
 =========
+* 0.3.3
+    * In the event that `cElementTree` has a problem parsing the document, a
+      `MalformedDocxException` is raised instead of a `SyntaxError`
 * 0.3.2
     * We were not taking into account that vertical merges should have a
       continue attribute, but sometimes they do not, and in those cases word