diff --git a/.gitignore b/.gitignore
index ce7a7cef..40efeefd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,6 @@ pip-log.txt
nosetests.xml
*.mo
.idea
+
+test.html
+testxml.html
diff --git a/.travis.yml b/.travis.yml
index 6a5babb4..4251ba15 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,9 +2,13 @@ language: python
python:
- "2.6"
- "2.7"
-script: python main.py
+script: ./run_tests.sh
install:
+ - python setup.py -q install
- pip install -r requirements.txt
+env:
+ - TRAVIS_EXECUTE_PERFORMANCE=1
notifications:
email:
- jason.louard.ward@gmail.com
+ - samson91787@gmail.com
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 00000000..33954f41
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,2 @@
+Sam Portnow
+Jason Ward
diff --git a/CHANGELOG b/CHANGELOG
new file mode 100644
index 00000000..d8aa3f16
--- /dev/null
+++ b/CHANGELOG
@@ -0,0 +1,38 @@
+
+Changelog
+=========
+* 0.3.4
+ * It is possible for `w:t` tags to have `text` set to `None`. This no longer causes an error when escaping that text.
+* 0.3.3
+ * In the event that `cElementTree` has a problem parsing the document, a
+ `MalformedDocxException` is raised instead of a `SyntaxError`
+* 0.3.2
+ * We were not taking into account that vertical merges should have a
+ continue attribute, but sometimes they do not, and in those cases word
+ assumes the continue attribute. We updated the parser to handle the
+ cases in which the continue attribute is not there.
+ * We now correctly handle documents with unicode character in the
+ namespace.
+ * In rare cases, some text would be output with a style when it should not
+ have been. This issue has been fixed.
+* 0.3.1
+ * Added support for several more OOXML tags including:
+ * caps
+ * smallCaps
+ * strike
+ * dstrike
+ * vanish
+ * webHidden
+ More details in the README.
+* 0.3.0
+ * We switched from using stock *xml.etree.ElementTree* to using
+ *xml.etree.cElementTree*. This has resulted in a fairly significant speed
+ increase for python 2.6
+ * It is now possible to create your own pre processor to do additional pre
+ processing.
+ * Superscripts and subscripts are now extracted correctly.
+* 0.2.1
+ * Added a changelog
+ * Added the version in pydocx.__init__
+ * Fixed an issue with duplicating content if there was indentation or
+ justification on a p element that had multiple t tags.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..88fbbf67
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,7 @@
+include AUTHORS
+include CHANGELOG
+include LICENSE
+include MANIFEST.in
+include README.rst
+include pydocx/fixtures/*
+include pydocx/tests/templates/*
diff --git a/README.md b/README.md
deleted file mode 100644
index e3773551..00000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-pydocx
-======
\ No newline at end of file
diff --git a/README.rst b/README.rst
new file mode 100644
index 00000000..2f750299
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,228 @@
+======
+pydocx
+======
+.. image:: https://travis-ci.org/OpenScienceFramework/pydocx.png?branch=master
+ :align: left
+ :target: https://travis-ci.org/OpenScienceFramework/pydocx
+
+pydocx is a parser that breaks down the elements of a docxfile and converts them
+into different markup languages. Right now, HTML is supported. Markdown and LaTex
+will be available soon. You can extend any of the available parsers to customize it
+to your needs. You can also create your own class that inherits DocxParser
+to create your own methods for a markup language not yet supported.
+
+Currently Supported
+###################
+
+* tables
+ * nested tables
+ * rowspans
+ * colspans
+ * lists in tables
+* lists
+ * list styles
+ * nested lists
+ * list of tables
+ * list of pragraphs
+* justification
+* images
+* styles
+ * bold
+ * italics
+ * underline
+ * hyperlinks
+* headings
+
+Usage
+#####
+
+DocxParser includes abstracts methods that each parser overwrites to satsify its own needs. The abstract methods are as follows:
+
+::
+
+ class DocxParser:
+
+ @property
+ def parsed(self):
+ return self._parsed
+
+ @property
+ def escape(self, text):
+ return text
+
+ @abstractmethod
+ def linebreak(self):
+ return ''
+
+ @abstractmethod
+ def paragraph(self, text):
+ return text
+
+ @abstractmethod
+ def heading(self, text, heading_level):
+ return text
+
+ @abstractmethod
+ def insertion(self, text, author, date):
+ return text
+
+ @abstractmethod
+ def hyperlink(self, text, href):
+ return text
+
+ @abstractmethod
+ def image_handler(self, path):
+ return path
+
+ @abstractmethod
+ def image(self, path, x, y):
+ return self.image_handler(path)
+
+ @abstractmethod
+ def deletion(self, text, author, date):
+ return text
+
+ @abstractmethod
+ def bold(self, text):
+ return text
+
+ @abstractmethod
+ def italics(self, text):
+ return text
+
+ @abstractmethod
+ def underline(self, text):
+ return text
+
+ @abstractmethod
+ def superscript(self, text):
+ return text
+
+ @abstractmethod
+ def subscript(self, text):
+ return text
+
+ @abstractmethod
+ def tab(self):
+ return True
+
+ @abstractmethod
+ def ordered_list(self, text):
+ return text
+
+ @abstractmethod
+ def unordered_list(self, text):
+ return text
+
+ @abstractmethod
+ def list_element(self, text):
+ return text
+
+ @abstractmethod
+ def table(self, text):
+ return text
+ @abstractmethod
+ def table_row(self, text):
+ return text
+
+ @abstractmethod
+ def table_cell(self, text):
+ return text
+
+ @abstractmethod
+ def page_break(self):
+ return True
+
+ @abstractmethod
+ def indent(self, text, left='', right='', firstLine=''):
+ return text
+
+Docx2Html inherits DocxParser and implements basic HTML handling. Ex.
+
+::
+
+ class Docx2Html(DocxParser):
+
+ # Escape '&', '<', and '>' so we render the HTML correctly
+ def escape(self, text):
+ return xml.sax.saxutils.quoteattr(text)[1:-1]
+
+ # return a line break
+ def linebreak(self, pre=None):
+ return '
'
+
+ # add paragraph tags
+ def paragraph(self, text, pre=None):
+ return '' + text + '
'
+
+
+However, let's say you want to add a specific style to your HTML document. In order to do this, you want to make each paragraph a class of type `my_implementation`. Simply extend docx2Html and add what you need.
+
+::
+
+ class My_Implementation_of_Docx2Html(Docx2Html):
+
+ def paragraph(self, text, pre = None):
+ return + text + '
'
+
+
+
+OR, let's say FOO is your new favorite markup language. Simply customize your own new parser, overwritting the abstract methods of DocxParser
+
+::
+
+ class Docx2Foo(DocxParser):
+
+ # because linebreaks in are denoted by '!!!!!!!!!!!!' with the FOO markup langauge :)
+ def linebreak(self):
+ return '!!!!!!!!!!!!'
+
+Custom Pre-Processor
+####################
+
+When creating your own Parser (as described above) you can now add in your own custom Pre Processor. To do so you will need to set the `pre_processor` field on the custom parser, like so:
+
+::
+
+ class Docx2Foo(DocxParser):
+ pre_processor_class = FooPrePorcessor
+
+
+The `FooPrePorcessor` will need a few things to get you going:
+
+::
+
+ class FooPrePorcessor(PydocxPrePorcessor):
+ def perform_pre_processing(self, root, *args, **kwargs):
+ super(FooPrePorcessor, self).perform_pre_processing(root, *args, **kwargs)
+ self._set_foo(root)
+
+ def _set_foo(self, root):
+ pass
+
+If you want `_set_foo` to be called you must add it to `perform_pre_processing` which is called in the base parser for pydocx.
+
+Everything done during pre-processing is executed prior to `parse` being called for the first time.
+
+
+Styles
+######
+
+The base parser `Docx2Html` relies on certain css class being set for certain behaviour to occur. Currently these include:
+
+* class `pydocx-insert` -> Turns the text green.
+* class `pydocx-delete` -> Turns the text red and draws a line through the text.
+* class `pydocx-center` -> Aligns the text to the center.
+* class `pydocx-right` -> Aligns the text to the right.
+* class `pydocx-left` -> Aligns the text to the left.
+* class `pydocx-comment` -> Turns the text blue.
+* class `pydocx-underline` -> Underlines the text.
+* class `pydocx-caps` -> Makes all text uppercase.
+* class `pydocx-small-caps` -> Makes all text uppercase, however truly lowercase letters will be small than their uppercase counterparts.
+* class `pydocx-strike` -> Strike a line through.
+* class `pydocx-hidden` -> Hide the text.
+
+Optional Arguments
+##################
+
+You can pass in `convert_root_level_upper_roman=True` to the parser and it will convert all root level upper roman lists to headings instead.
diff --git a/main.py b/main.py
deleted file mode 100644
index c9e8e1d4..00000000
--- a/main.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from pydocx import *
-from bs4 import BeautifulSoup
-import xml.etree.ElementTree as ElementTree
-#import lxml.etree as etree
-
-with open('test.html', 'w') as f:
- f.write(docx2html('helloworld.docx'))
-with open('testxml.html','w') as f:
- f.write(BeautifulSoup(ElementTree.tostring(Docx2Html('helloworld.docx').root)).prettify())
-
-#print docx2html('helloworld.docx')
-#print docx2markdown('helloworld.docx')
\ No newline at end of file
diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py
index b3006ef0..5364289b 100644
--- a/pydocx/DocxParser.py
+++ b/pydocx/DocxParser.py
@@ -1,323 +1,686 @@
-from abc import abstractmethod, ABCMeta
-import zipfile
import logging
-import xml.etree.ElementTree as ElementTree
-from xml.etree.ElementTree import _ElementInterface
+import os
+import zipfile
+
+from abc import abstractmethod, ABCMeta
+from contextlib import contextmanager
+
+from pydocx.utils import (
+ PydocxPrePorcessor,
+ get_list_style,
+ parse_xml_from_string,
+ find_first,
+ find_all,
+ find_ancestor_with_tag,
+ has_descendant_with_tag,
+)
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("NewParser")
-def remove_namespaces(document):
- root = ElementTree.fromstring(document)
- for child in el_iter(root):
- child.tag = child.tag.split("}")[1]
- child.attrib = dict(
- (k.split("}")[1], v)
- for k, v in child.attrib.items()
- )
- return ElementTree.tostring(root)
-
-# Add some helper functions to Element to make it slightly more readable
-
-
-def has_child(self, tag):
- return True if self.find(tag) is not None else False
+# http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx
+EMUS_PER_PIXEL = 9525
+USE_ALIGNMENTS = True
+JUSTIFY_CENTER = 'center'
+JUSTIFY_LEFT = 'left'
+JUSTIFY_RIGHT = 'right'
-def has_child_all(self, tag):
- return True if self.find('.//' + tag) is not None else False
-
-
-def find_all(self, tag):
- return self.find('.//' + tag)
-
-
-def findall_all(self, tag):
- return self.findall('.//' + tag)
-
-
-def el_iter(el):
- try:
- return el.iter()
- except AttributeError:
- return el.findall('.//*')
+INDENTATION_RIGHT = 'right'
+INDENTATION_LEFT = 'left'
+INDENTATION_FIRST_LINE = 'firstLine'
+DISABLED_VALUES = ['false', '0']
+# Add some helper functions to Element to make it slightly more readable
-setattr(_ElementInterface, 'has_child', has_child)
-setattr(_ElementInterface, 'has_child_all', has_child_all)
-setattr(_ElementInterface, 'find_all', find_all)
-setattr(_ElementInterface, 'findall_all', findall_all)
-setattr(_ElementInterface, 'parent', None)
-setattr(_ElementInterface, 'parent_list', [])
-# End helpers
+@contextmanager
+def ZipFile(path): # This is not needed in python 3.2+
+ f = zipfile.ZipFile(path)
+ yield f
+ f.close()
class DocxParser:
__metaclass__ = ABCMeta
+ pre_processor_class = PydocxPrePorcessor
- def __init__(self, path):
- self._parsed = ''
- self.in_list = False
-
- f = zipfile.ZipFile(path)
- try:
+ def _build_data(self, path, *args, **kwargs):
+ with ZipFile(path) as f:
self.document_text = f.read('word/document.xml')
+ self.styles_text = f.read('word/styles.xml')
try:
+ self.fonts = f.read('/word/fontTable.xml')
+ except KeyError:
+ self.fonts = None
+ try: # Only present if there are lists
self.numbering_text = f.read('word/numbering.xml')
- except zipfile.BadZipfile:
- pass
- try:
+ except KeyError:
+ self.numbering_text = None
+ try: # Only present if there are comments
self.comment_text = f.read('word/comments.xml')
- except zipfile.BadZipfile:
- pass
- finally:
- f.close()
-
- self.root = ElementTree.fromstring(
- remove_namespaces(self.document_text),
- )
-
- def add_parent(el):
- for child in el.getchildren():
- setattr(child, 'parent', el)
- add_parent(child)
- add_parent(self.root)
-
- def create_parent_list(el, tmp=None):
- if tmp is None:
- tmp = []
- for child in el:
- tmp.append(el)
- tmp = create_parent_list(child, tmp)
- el.parent_list = tmp[:]
- try:
- tmp.pop()
- except:
- tmp = []
- return tmp
-
- create_parent_list(self.root)
-
+ except KeyError:
+ self.comment_text = None
+ self.relationship_text = f.read('word/_rels/document.xml.rels')
+ zipped_image_files = [
+ e for e in f.infolist()
+ if e.filename.startswith('word/media/')
+ ]
+ for e in zipped_image_files:
+ self._image_data[e.filename] = f.read(e.filename)
+
+ self.root = parse_xml_from_string(self.document_text)
+ self.numbering_root = None
+ self.expon = ''
+ self.degree = ''
+ if self.numbering_text:
+ self.numbering_root = parse_xml_from_string(self.numbering_text)
+ self.comment_root = None
+ if self.comment_text:
+ self.comment_root = parse_xml_from_string(self.comment_text)
+
+ def _parse_styles(self):
+ tree = parse_xml_from_string(self.styles_text)
+ result = {}
+ for style in find_all(tree, 'style'):
+ style_val = find_first(style, 'name').attrib['val']
+ result[style.attrib['styleId']] = style_val
+ return result
+
+ def _parse_rels_root(self):
+ tree = parse_xml_from_string(self.relationship_text)
+ rels_dict = {}
+ for el in tree:
+ rId = el.get('Id')
+ target = el.get('Target')
+ rels_dict[rId] = target
+ return rels_dict
+
+ def __init__(
+ self,
+ path,
+ convert_root_level_upper_roman=False,
+ *args,
+ **kwargs):
+ self._parsed = ''
+ self.block_text = ''
+ self.page_width = 0
+ self.convert_root_level_upper_roman = convert_root_level_upper_roman
+ self._image_data = {}
+ self._build_data(path, *args, **kwargs)
+ self.pre_processor = None
+
+ #divide by 20 to get to pt (Office works in 20th's of a point)
+ """
+ see http://msdn.microsoft.com/en-us/library/documentformat
+ .openxml.wordprocessing.indentation.aspx
+ """
+ if find_first(self.root, 'pgSz') is not None:
+ self.page_width = int(
+ find_first(self.root, 'pgSz').attrib['w']
+ ) / 20
+
+ #all blank when we init
self.comment_store = None
- self.numbering_store = None
- self.ignore_current = False
- self.elements = []
- self.tables_seen = []
self.visited = []
- try:
- self.numbering_root = ElementTree.fromstring(
- remove_namespaces(self.numbering_text),
- )
- except:
- pass
- self.parse_begin(self.root)
+ self.list_depth = 0
+ self.rels_dict = self._parse_rels_root()
+ self.styles_dict = self._parse_styles()
+ self.parse_begin(self.root) # begin to parse
def parse_begin(self, el):
- self._parsed += self.parse_lists(el)
-
-### parse table function and is_table flag
- def parse_lists(self, el):
- parsed = ''
- first_p = el.find_all('p')
- children = []
- for child in first_p.parent:
- if child.tag == 'p' or child.tag == 'tbl':
- children.append(child)
- p_list = children
- list_started = False
- list_type = ''
- list_chunks = []
- index_start = 0
- index_end = 1
- for i, el in enumerate(p_list):
- if not list_started and el.has_child_all('ilvl'):
- list_started = True
- list_type = self.get_list_style(
- el.find_all('numId').attrib['val'],
- )
- list_chunks.append(p_list[index_start:index_end])
- index_start = i
- index_end = i+1
- elif (
- list_started and
- el.has_child_all('ilvl') and
- not list_type == self.get_list_style(
- el.find_all('numId').attrib['val']
- )):
- list_type = self.get_list_style(
- el.find_all('numId').attrib['val'],
- )
- list_started = True
- list_chunks.append(p_list[index_start:index_end])
- index_start = i
- index_end = i+1
- elif list_started and not el.has_child_all('ilvl'):
- list_started = False
- list_chunks.append(p_list[index_start:index_end])
- index_start = i
- index_end = i+1
- else:
- index_end = i+1
- list_chunks.append(p_list[index_start:index_end])
- for chunk in list_chunks:
- chunk_parsed = ''
- for el in chunk:
- chunk_parsed += self.parse(el)
- if chunk[0].has_child_all('ilvl'):
- lst_style = self.get_list_style(
- chunk[0].find_all('numId').attrib['val'],
- )
- if lst_style['val'] == 'bullet':
- parsed += self.unordered_list(chunk_parsed)
- else:
- parsed += self.ordered_list(chunk_parsed)
- elif chunk[0].has_child_all('br'):
- parsed += self.page_break()
- else:
- parsed += chunk_parsed
-
- return parsed
+ self.pre_processor = self.pre_processor_class(
+ convert_root_level_upper_roman=self.convert_root_level_upper_roman,
+ styles_dict=self.styles_dict,
+ numbering_root=self.numbering_root,
+ )
+ self.pre_processor.perform_pre_processing(el)
+ self._parsed += self.parse(el)
def parse(self, el):
+ if el in self.visited:
+ return ''
+ self.visited.append(el)
parsed = ''
- if not self.ignore_current:
- tmp_d = dict(
- (tmpel.tag, i)
- for i, tmpel in enumerate(el.parent_list)
- )
- if (
- 'tbl' in tmp_d and
- el.parent_list[tmp_d['tbl']] not in self.tables_seen):
- self.ignore_current = True
- self.tables_seen.append(el.parent_list[tmp_d['tbl']])
- tmpout = self.table(self.parse(el.parent_list[tmp_d['tbl']]))
- self.ignore_current = False
- return tmpout
-
for child in el:
+ # recursive. So you can get all the way to the bottom
parsed += self.parse(child)
-
- if el.tag == 'br' and el.attrib['type'] == 'page':
- #TODO figure out what parsed is getting overwritten
- return self.page_break()
- # add it to the list so we don't repeat!
- if el.tag == 'ilvl' and el not in self.visited:
- self.in_list = True
- self.visited.append(el)
- ## This starts the returns
+ if el.tag == 'br' and el.attrib.get('type') == 'page':
+ return self.parse_page_break(el, parsed)
+ elif el.tag == 'tbl':
+ return self.parse_table(el, parsed)
elif el.tag == 'tr':
- return self.table_row(parsed)
+ return self.parse_table_row(el, parsed)
elif el.tag == 'tc':
- self.elements.append(el)
- return self.table_cell(parsed)
- if el.tag == 'r' and el not in self.elements:
- self.elements.append(el)
- return self.parse_r(el)
+ return self.parse_table_cell(el, parsed)
+ elif el.tag == 'm':
+ return self.matrix(parsed)
+ elif el.tag == 'mr':
+ return self.parse_matrix_row(el, parsed)
+ elif el.tag == 'rad':
+ return self.radical(self.degree, self.expon)
+ elif el.tag == 'deg':
+ return self.parse_deg(el, parsed)
+ elif el.tag == 'e':
+ return self.parse_exp(el, parsed)
+ elif el.tag == 'num':
+ return self.num(parsed)
+ elif el.tag == 'den':
+ return self.den(parsed)
+ elif el.tag == 'sup':
+ return self.superscript(parsed)
+ elif el.tag == 'sub':
+ return self.subscript(parsed)
+ elif el.tag == 'r':
+ return self.parse_r(el, parsed)
+ elif el.tag == 't':
+ return self.parse_t(el, parsed)
+ elif el.tag == 'br':
+ return self.parse_break_tag(el, parsed)
+ elif el.tag == 'delText':
+ return self.parse_deletion(el, parsed)
elif el.tag == 'p':
return self.parse_p(el, parsed)
elif el.tag == 'ins':
- return self.insertion(parsed, '', '')
+ return self.parse_insertion(el, parsed)
+ elif el.tag == 'hyperlink':
+ return self.parse_hyperlink(el, parsed)
+ elif el.tag in ('pict', 'drawing'):
+ return self.parse_image(el)
else:
return parsed
- def parse_p(self, el, text):
- parsed = text
- if self.in_list:
- self.in_list = False
- parsed = self.list_element(parsed)
- elif (
- not el.has_child_all('t') and
- 'tbl' not in [i.tag for i in el.parent_list]):
- parsed = self.linebreak()
- elif el.parent not in self.elements:
- parsed = self.paragraph(parsed)
+ def parse_page_break(self, el, text):
+ #TODO figure out what parsed is getting overwritten
+ return self.page_break()
+
+ def parse_table(self, el, text):
+ return self.table(text)
+
+ def parse_table_row(self, el, text):
+ return self.table_row(text)
+
+ def parse_table_cell(self, el, text):
+ v_merge = find_first(el, 'vMerge')
+ if v_merge is not None and (
+ 'restart' != v_merge.get('val', '')):
+ return self.empty_cell()
+ colspan = self.get_colspan(el)
+ rowspan = self._get_rowspan(el, v_merge)
+ if rowspan > 1:
+ rowspan = str(rowspan)
+ else:
+ rowspan = ''
+ return self.table_cell(
+ text, col=colspan, row=rowspan,
+ is_last_row_item=self.pre_processor.is_last_row_item(el),
+ is_list_item=has_descendant_with_tag(el, 'ilvl'))
+
+ def parse_list(self, el, text):
+ """
+ All the meat of building the list is done in _parse_list, however we
+ call this method for two reasons: It is the naming convention we are
+ following. And we need a reliable way to raise and lower the list_depth
+ (which is used to determine if we are in a list). I could have done
+ this in _parse_list, however it seemed cleaner to do it here.
+ """
+ self.list_depth += 1
+ parsed = self._parse_list(el, text)
+ self.list_depth -= 1
+ if self.pre_processor.is_in_table(el):
+ return self.parse_table_cell_contents(el, parsed)
return parsed
- def parse_r(self, el):
- is_deleted = False
- text = None
- if el.has_child('t'):
- text = self.escape(el.find('t').text)
- elif el.has_child('delText'):
- text = self.escape(el.find('delText').text)
- is_deleted = True
- if text:
- rpr = el.find('rPr')
- if rpr is not None:
- fns = []
- if rpr.has_child('b'):
- fns.append(self.bold)
- if rpr.has_child('i'):
- fns.append(self.italics)
- if rpr.has_child('u'):
- fns.append(self.underline)
- for fn in fns:
- text = fn(text)
- ppr = el.parent.find('pPr')
- if ppr is not None:
- jc = ppr.find('jc')
- if jc is not None:
- if jc.attrib['val'] == 'right':
- text = self.right_justify(text)
- if jc.attrib['val'] == 'center':
- text = self.center_justify(text)
- ind = ppr.find('ind')
- if ind is not None:
- right = None
- left = None
- firstLine = None
- if 'right' in ind.attrib:
- right = ind.attrib['right']
- right = int(right)/20
- right = str(right)
- if 'left' in ind.attrib:
- left = ind.attrib['left']
- left = int(left)/20
- left = str(left)
- if 'firstLine' in ind.attrib:
- firstLine = ind.attrib['firstLine']
- firstLine = int(firstLine)/20
- firstLine = str(firstLine)
- text = self.indent(text, right, left, firstLine)
- if is_deleted:
- text = self.deletion(text, '', '')
+ def get_list_style(self, num_id, ilvl):
+ return get_list_style(self.numbering_root, num_id, ilvl)
+
+ def _build_list(self, el, text):
+ # Get the list style for the pending list.
+ lst_style = self.get_list_style(
+ self.pre_processor.num_id(el).num_id,
+ self.pre_processor.ilvl(el),
+ )
+
+ parsed = text
+ # Create the actual list and return it.
+ if lst_style == 'bullet':
+ return self.unordered_list(parsed)
+ else:
+ return self.ordered_list(
+ parsed,
+ lst_style,
+ )
+
+ def _parse_list(self, el, text):
+ parsed = self.parse_list_item(el, text)
+ num_id = self.pre_processor.num_id(el)
+ ilvl = self.pre_processor.ilvl(el)
+ # Everything after this point assumes the first element is not also the
+ # last. If the first element is also the last then early return by
+ # building and returning the completed list.
+ if self.pre_processor.is_last_list_item_in_root(el):
+ return self._build_list(el, parsed)
+ next_el = self.pre_processor.next(el)
+
+ def is_same_list(next_el, num_id, ilvl):
+ # Bail if next_el is not an element
+ if next_el is None:
+ return False
+ if self.pre_processor.is_last_list_item_in_root(next_el):
+ return False
+ # If next_el is not a list item then roll it into the list by
+ # returning True.
+ if not self.pre_processor.is_list_item(next_el):
+ return True
+ if self.pre_processor.num_id(next_el) != num_id:
+ # The next element is a new list entirely
+ return False
+ if self.pre_processor.ilvl(next_el) < ilvl:
+ # The next element is de-indented, so this is really the last
+ # element in the list
+ return False
+ return True
+
+ while is_same_list(next_el, num_id, ilvl):
+ if next_el in self.visited:
+ # Early continue for elements we have already visited.
+ next_el = self.pre_processor.next(next_el)
+ continue
+
+ if self.pre_processor.is_list_item(next_el):
+ # Reset the ilvl
+ ilvl = self.pre_processor.ilvl(next_el)
+
+ parsed += self.parse(next_el)
+ next_el = self.pre_processor.next(next_el)
+
+ def should_parse_last_el(last_el, first_el):
+ if last_el is None:
+ return False
+ # Different list
+ if (
+ self.pre_processor.num_id(last_el) !=
+ self.pre_processor.num_id(first_el)):
+ return False
+ # Will be handled when the ilvls do match (nesting issue)
+ if (
+ self.pre_processor.ilvl(last_el) !=
+ self.pre_processor.ilvl(first_el)):
+ return False
+ # We only care about last items that have not been
+ # parsed before (first list items are
+ # always parsed at the beginning of this method.)
+ return (
+ not self.pre_processor.is_first_list_item(last_el) and
+ self.pre_processor.is_last_list_item_in_root(last_el)
+ )
+ if should_parse_last_el(next_el, el):
+ parsed += self.parse(next_el)
+
+ # If the list has no content, then we don't need to worry about the
+ # list styling, because it will be stripped out.
+ if parsed == '':
+ return parsed
+
+ return self._build_list(el, parsed)
+
+ def justification(self, el, text):
+ paragraph_tag_property = el.find('pPr')
+ if paragraph_tag_property is None:
return text
+
+ _justification = paragraph_tag_property.find('jc')
+ indentation = paragraph_tag_property.find('ind')
+ if _justification is None and indentation is None:
+ return text
+ alignment = None
+ right = None
+ left = None
+ firstLine = None
+ if _justification is not None: # text alignments
+ value = _justification.attrib['val']
+ if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]:
+ alignment = value
+ if indentation is not None:
+ if INDENTATION_RIGHT in indentation.attrib:
+ right = indentation.attrib[INDENTATION_RIGHT]
+ # divide by 20 to get to pt. multiply by (4/3) to get to px
+ right = (int(right) / 20) * float(4) / float(3)
+ right = str(right)
+ if INDENTATION_LEFT in indentation.attrib:
+ left = indentation.attrib[INDENTATION_LEFT]
+ left = (int(left) / 20) * float(4) / float(3)
+ left = str(left)
+ if INDENTATION_FIRST_LINE in indentation.attrib:
+ firstLine = indentation.attrib[INDENTATION_FIRST_LINE]
+ firstLine = (int(firstLine) / 20) * float(4) / float(3)
+ firstLine = str(firstLine)
+ if any([alignment, firstLine, left, right]):
+ return self.indent(
+ text, just=alignment, firstLine=firstLine,
+ left=left, right=right,
+ is_in_table=self.pre_processor.is_in_table(el))
+ return text
+
+ def parse_p(self, el, text):
+ if text == '':
+ return ''
+ # TODO This is still not correct, however it fixes the bug. We need to
+ # apply the classes/styles on p, td, li and h tags instead of inline,
+ # but that is for another ticket.
+ text = self.justification(el, text)
+ if self.pre_processor.is_first_list_item(el):
+ return self.parse_list(el, text)
+ if self.pre_processor.heading_level(el):
+ return self.parse_heading(el, text)
+ if self.pre_processor.is_list_item(el):
+ return self.parse_list_item(el, text)
+ if self.pre_processor.is_in_table(el):
+ return self.parse_table_cell_contents(el, text)
+ parsed = text
+ # No p tags in li tags
+ if el.find('oMathPara') is not None:
+ math = True
else:
+ math = False
+ if self.list_depth == 0:
+ parsed = self.paragraph(parsed, math)
+ return parsed
+
+ def _should_append_break_tag(self, next_el):
+ paragraph_like_tags = [
+ 'p',
+ ]
+ inline_like_tags = [
+ 'smartTag',
+ 'ins',
+ 'delText',
+ ]
+ if self.pre_processor.is_list_item(next_el):
+ return False
+ if self.pre_processor.previous(next_el) is None:
+ return False
+ tag_is_inline_like = any(
+ has_descendant_with_tag(next_el, tag) for
+ tag in inline_like_tags
+ )
+ if tag_is_inline_like:
+ return False
+ if (
+ self.pre_processor.is_last_list_item_in_root(
+ self.pre_processor.previous(next_el))):
+ return False
+ if self.pre_processor.previous(next_el).tag not in paragraph_like_tags:
+ return False
+ if next_el.tag not in paragraph_like_tags:
+ return False
+ return True
+
+ def parse_heading(self, el, parsed):
+ return self.heading(parsed, self.pre_processor.heading_level(el))
+
+ def parse_list_item(self, el, text):
+ # If for whatever reason we are not currently in a list, then start
+ # a list here. This will only happen if the num_id/ilvl combinations
+ # between lists is not well formed.
+ parsed = text
+ if self.list_depth == 0:
+ return self.parse_list(el, parsed)
+
+ def _should_parse_next_as_content(el):
+ """
+ Get the contents of the next el and append it to the
+ contents of the current el (that way things like tables
+ are actually in the li tag instead of in the ol/ul tag).
+ """
+ next_el = self.pre_processor.next(el)
+ if next_el is None:
+ return False
+ if (
+ not self.pre_processor.is_list_item(next_el) and
+ not self.pre_processor.is_last_list_item_in_root(el)
+ ):
+ return True
+ if self.pre_processor.is_first_list_item(next_el):
+ if (
+ self.pre_processor.num_id(next_el) ==
+ self.pre_processor.num_id(el)):
+ return True
+ return False
+
+ while el is not None:
+ if _should_parse_next_as_content(el):
+ el = self.pre_processor.next(el)
+ next_elements_content = self.parse(el)
+ if not next_elements_content:
+ continue
+ if self._should_append_break_tag(el):
+ parsed += self.break_tag(
+ self.pre_processor.is_in_table(el))
+ parsed += next_elements_content
+ else:
+ break
+ # Create the actual li element
+ return self.list_element(parsed)
+
+ def _get_rowspan(self, el, v_merge):
+ current_row = self.pre_processor.row_index(el)
+ current_col = self.pre_processor.column_index(el)
+ rowspan = 1
+ result = ''
+ tbl = find_ancestor_with_tag(self.pre_processor, el, 'tbl')
+ # We only want table cells that have a higher row_index that is greater
+ # than the current_row and that are on the current_col
+ if tbl is None:
+ return ''
+ tcs = [
+ tc for tc in find_all(tbl, 'tc')
+ if self.pre_processor.row_index(tc) >= current_row and
+ self.pre_processor.column_index(tc) == current_col
+ ]
+ restart_in_v_merge = False
+ if v_merge is not None and 'val' in v_merge.attrib:
+ restart_in_v_merge = 'restart' in v_merge.attrib['val']
+
+ def increment_rowspan(tc):
+ if not restart_in_v_merge:
+ return False
+ if not self.pre_processor.vmerge_continue(tc):
+ return False
+ return True
+
+ for tc in tcs:
+ if increment_rowspan(tc):
+ rowspan += 1
+ else:
+ rowspan = 1
+ if rowspan > 1:
+ result = rowspan
+ return str(result)
+
+ def get_colspan(self, el):
+ grid_span = find_first(el, 'gridSpan')
+ if grid_span is None:
return ''
+ return find_first(el, 'gridSpan').attrib['val']
- def get_list_style(self, numval):
- ids = self.numbering_root.findall_all('num')
- for _id in ids:
- if _id.attrib['numId'] == numval:
- abstractid = _id.find('abstractNumId')
- abstractid = abstractid.attrib['val']
- style_information = self.numbering_root.findall_all(
- 'abstractNum',
- )
- for info in style_information:
- if info.attrib['abstractNumId'] == abstractid:
- for i in el_iter(info):
- if i.find('numFmt') is not None:
- return i.find('numFmt').attrib
-
- def get_comments(self, doc_id):
- if self.comment_store is None:
- # TODO throw appropriate error
- comment_root = ElementTree.fromstring(
- remove_namespaces(self.comment_text),
+ def parse_table_cell_contents(self, el, text):
+ parsed = text
+
+ def _should_parse_next_as_content(el):
+ next_el = self.pre_processor.next(el)
+ if next_el is None:
+ return False
+ if self.pre_processor.is_in_table(next_el):
+ return True
+ while el is not None:
+ if _should_parse_next_as_content(el):
+ el = self.pre_processor.next(el)
+ next_elements_content = self.parse(el)
+ if not next_elements_content:
+ continue
+ if self._should_append_break_tag(el):
+ parsed += self.break_tag(
+ self.pre_processor.is_in_table(el))
+ parsed += next_elements_content
+ else:
+ break
+ return parsed
+
+ def parse_hyperlink(self, el, text):
+ rId = el.get('id')
+ href = self.rels_dict.get(rId)
+ if not href:
+ return text
+ href = self.escape(href)
+ return self.hyperlink(text, href)
+
+ def _get_image_id(self, el):
+ # Drawings
+ blip = find_first(el, 'blip')
+ if blip is not None:
+ # On drawing tags the id is actually whatever is returned from the
+ # embed attribute on the blip tag. Thanks a lot Microsoft.
+ return blip.get('embed')
+ # Picts
+ imagedata = find_first(el, 'imagedata')
+ if imagedata is not None:
+ return imagedata.get('id')
+
+ def _convert_image_size(self, size):
+ return size / EMUS_PER_PIXEL
+
+ def _get_image_size(self, el):
+ """
+ If we can't find a height or width, return 0 for whichever is not
+ found, then rely on the `image` handler to strip those attributes. This
+ functionality can change once we integrate PIL.
+ """
+ sizes = find_first(el, 'ext')
+ if sizes is not None and sizes.get('cx'):
+ if sizes.get('cx'):
+ x = self._convert_image_size(int(sizes.get('cx')))
+ if sizes.get('cy'):
+ y = self._convert_image_size(int(sizes.get('cy')))
+ return (
+ '%dpx' % x,
+ '%dpx' % y,
)
- ids_and_info = {}
- ids = comment_root.findall_all('comment')
- for _id in ids:
- ids_and_info[_id.attrib['id']] = {
- "author": _id.attrib['author'],
- "date": _id.attrib['date'],
- "text": _id.findall_all('t')[0].text,
- }
- self.comment_store = ids_and_info
- return self.comment_store[doc_id]
+ shape = find_first(el, 'shape')
+ if shape is not None and shape.get('style') is not None:
+ # If either of these are not set, rely on the method `image` to not
+ # use either of them.
+ x = 0
+ y = 0
+ styles = shape.get('style').split(';')
+
+ for s in styles:
+ if s.startswith('height:'):
+ y = s.split(':')[1]
+ if s.startswith('width:'):
+ x = s.split(':')[1]
+ return x, y
+ return 0, 0
+
+ def parse_image(self, el):
+ x, y = self._get_image_size(el)
+ rId = self._get_image_id(el)
+ src = self.rels_dict.get(rId)
+ if not src:
+ return ''
+ src = os.path.join(
+ 'word',
+ src,
+ )
+ if src in self._image_data:
+ filename = os.path.split(src)[-1]
+ return self.image(self._image_data[src], filename, x, y)
+ return ''
+
+ def _is_style_on(self, el):
+ """
+ For b, i, u (bold, italics, and underline) merely having the tag is not
+ sufficient. You need to check to make sure it is not set to "false" as
+ well.
+ """
+ val = el.get('val', '').lower()
+ return val.lower() not in DISABLED_VALUES
+
+ def parse_t(self, el, parsed):
+ if el.text is None:
+ return ''
+ return self.escape(el.text)
+
+ def parse_break_tag(self, el, parsed):
+ return self.break_tag(self.pre_processor.is_in_table(el))
+
+ def parse_deletion(self, el, parsed):
+ if el.text is None:
+ return ''
+ return self.deletion(el.text, '', '')
+
+ def parse_insertion(self, el, parsed):
+ return self.insertion(parsed, '', '')
+
+ def parse_r(self, el, parsed):
+ """
+ Parse the running text.
+ """
+ text = parsed
+ if not text:
+ return ''
+ run_tag_property = el.find('rPr')
+
+ def _has_style_on(run_tag_property, tag):
+ el = run_tag_property.find(tag)
+ if el is not None:
+ return self._is_style_on(el)
+ inline_tags = {
+ 'b': self.bold,
+ 'i': self.italics,
+ 'u': self.underline,
+ 'caps': self.caps,
+ 'smallCaps': self.small_caps,
+ 'strike': self.strike,
+ 'dstrike': self.strike,
+ 'vanish': self.hide,
+ 'webHidden': self.hide,
+ }
+ if run_tag_property is not None:
+ for child in run_tag_property:
+ # These tags are a little different, handle them separately
+ # from the rest.
+ # This could be a superscript or a subscript
+ if child.tag == 'vertAlign':
+ if child.attrib['val'] == 'superscript':
+ text = self.superscript(text)
+ elif child.attrib['val'] == 'subscript':
+ text = self.subscript(text)
+ elif child.tag in inline_tags and self._is_style_on(child):
+ text = inline_tags[child.tag](text)
+
+ return text
+
+ def parse_rad(self, el, parsed):
+ return ''
+
+ def parse_deg(self, el, parsed):
+ self.degree = self.deg(parsed)
+ return ''
+
+ def parse_exp(self, el, parsed):
+ if find_ancestor_with_tag(self.pre_processor, el, 'rad'):
+ self.expon = self.exp(parsed)
+ return ''
+ elif find_ancestor_with_tag(self.pre_processor, el, 'm'):
+ return self.matrix_cell(
+ parsed, self.pre_processor.is_last_matrix_row_item(el))
+ else:
+ return self.exp(parsed)
+
+ def parse_matrix_row(self, el, parsed):
+ return self.matrix_row(parsed)
@property
def parsed(self):
@@ -332,13 +695,29 @@ def linebreak(self):
return ''
@abstractmethod
- def paragraph(self, text):
+ def paragraph(self, text, math):
+ return text
+
+ @abstractmethod
+ def heading(self, text, heading_level):
return text
@abstractmethod
def insertion(self, text, author, date):
return text
+ @abstractmethod
+ def hyperlink(self, text, href):
+ return text
+
+ @abstractmethod
+ def image_handler(self, path):
+ return path
+
+ @abstractmethod
+ def image(self, data, filename, x, y):
+ return self.image_handler(data)
+
@abstractmethod
def deletion(self, text, author, date):
return text
@@ -355,6 +734,30 @@ def italics(self, text):
def underline(self, text):
return text
+ @abstractmethod
+ def caps(self, text):
+ return text
+
+ @abstractmethod
+ def small_caps(self, text):
+ return text
+
+ @abstractmethod
+ def strike(self, text):
+ return text
+
+ @abstractmethod
+ def hide(self, text):
+ return text
+
+ @abstractmethod
+ def superscript(self, text):
+ return text
+
+ @abstractmethod
+ def subscript(self, text):
+ return text
+
@abstractmethod
def tab(self):
return True
@@ -388,15 +791,41 @@ def page_break(self):
return True
@abstractmethod
- def right_justify(self, text):
+ def indent(self, text, left='', right='', firstLine=''):
return text
@abstractmethod
- def center_justify(self, text):
+ def empty_cell(self):
+ return ''
+
+ @abstractmethod
+ def num(self, text):
+ return text
+
+ @abstractmethod
+ def radical(self, deg, num):
+ return True
+
+ @abstractmethod
+ def den(self, text):
+ return text
+
+ @abstractmethod
+ def deg(self, text):
return text
@abstractmethod
- def indent(self, text, left=None, right=None, firstLine=None):
+ def exp(self, text):
return text
- #TODO JUSTIFIED JUSTIFIED TEXT
+ @abstractmethod
+ def matrix_row(self, text):
+ return text
+
+ @abstractmethod
+ def matrix_cell(self, text, is_last_row_item=False):
+ return text
+
+ @abstractmethod
+ def matrix(self, text):
+ return text
diff --git a/pydocx/__init__.py b/pydocx/__init__.py
index 9b42e00f..dad89dc2 100644
--- a/pydocx/__init__.py
+++ b/pydocx/__init__.py
@@ -1,8 +1,15 @@
-from .parsers import *
+from .parsers import Docx2LaTex, Docx2Html, Docx2Markdown
+
def docx2html(path):
return Docx2Html(path).parsed
+
def docx2markdown(path):
return Docx2Markdown(path).parsed
+
+def docx2latex(path):
+ return Docx2LaTex(path).parsed
+
+VERSION = '0.3.3'
diff --git a/pydocx/exceptions.py b/pydocx/exceptions.py
new file mode 100644
index 00000000..cdff556a
--- /dev/null
+++ b/pydocx/exceptions.py
@@ -0,0 +1,2 @@
+class MalformedDocxException(Exception):
+ pass
diff --git a/pydocx/fixtures/all_configured_styles.docx b/pydocx/fixtures/all_configured_styles.docx
new file mode 100644
index 00000000..8f514372
Binary files /dev/null and b/pydocx/fixtures/all_configured_styles.docx differ
diff --git a/pydocx/fixtures/attachment_is_tiff.docx b/pydocx/fixtures/attachment_is_tiff.docx
new file mode 100644
index 00000000..774362ca
Binary files /dev/null and b/pydocx/fixtures/attachment_is_tiff.docx differ
diff --git a/pydocx/fixtures/bigger_font_size_to_header.docx b/pydocx/fixtures/bigger_font_size_to_header.docx
new file mode 100644
index 00000000..c722888b
Binary files /dev/null and b/pydocx/fixtures/bigger_font_size_to_header.docx differ
diff --git a/pydocx/fixtures/convert_p_to_h.docx b/pydocx/fixtures/convert_p_to_h.docx
new file mode 100644
index 00000000..53769e15
Binary files /dev/null and b/pydocx/fixtures/convert_p_to_h.docx differ
diff --git a/pydocx/fixtures/fake_headings_by_length.docx b/pydocx/fixtures/fake_headings_by_length.docx
new file mode 100644
index 00000000..a130f5ba
Binary files /dev/null and b/pydocx/fixtures/fake_headings_by_length.docx differ
diff --git a/pydocx/fixtures/greek_alphabet.docx b/pydocx/fixtures/greek_alphabet.docx
new file mode 100644
index 00000000..46ab5429
Binary files /dev/null and b/pydocx/fixtures/greek_alphabet.docx differ
diff --git a/pydocx/fixtures/has_image.docx b/pydocx/fixtures/has_image.docx
new file mode 100644
index 00000000..2ebd0bd0
Binary files /dev/null and b/pydocx/fixtures/has_image.docx differ
diff --git a/pydocx/fixtures/has_missing_image.docx b/pydocx/fixtures/has_missing_image.docx
new file mode 100644
index 00000000..996e6671
Binary files /dev/null and b/pydocx/fixtures/has_missing_image.docx differ
diff --git a/pydocx/fixtures/has_title.docx b/pydocx/fixtures/has_title.docx
new file mode 100644
index 00000000..a87d88ed
Binary files /dev/null and b/pydocx/fixtures/has_title.docx differ
diff --git a/pydocx/fixtures/header_footer_problem.docx b/pydocx/fixtures/header_footer_problem.docx
new file mode 100644
index 00000000..6bc49a7a
Binary files /dev/null and b/pydocx/fixtures/header_footer_problem.docx differ
diff --git a/pydocx/fixtures/headers.docx b/pydocx/fixtures/headers.docx
new file mode 100644
index 00000000..890104c7
Binary files /dev/null and b/pydocx/fixtures/headers.docx differ
diff --git a/pydocx/fixtures/headers_with_full_line_styles.docx b/pydocx/fixtures/headers_with_full_line_styles.docx
new file mode 100644
index 00000000..38d6f6a8
Binary files /dev/null and b/pydocx/fixtures/headers_with_full_line_styles.docx differ
diff --git a/pydocx/fixtures/inline_tags.docx b/pydocx/fixtures/inline_tags.docx
new file mode 100644
index 00000000..4aba2347
Binary files /dev/null and b/pydocx/fixtures/inline_tags.docx differ
diff --git a/pydocx/fixtures/justification.docx b/pydocx/fixtures/justification.docx
new file mode 100644
index 00000000..7f8a3bf1
Binary files /dev/null and b/pydocx/fixtures/justification.docx differ
diff --git a/pydocx/fixtures/list_in_table.docx b/pydocx/fixtures/list_in_table.docx
new file mode 100644
index 00000000..d1a87388
Binary files /dev/null and b/pydocx/fixtures/list_in_table.docx differ
diff --git a/pydocx/fixtures/list_to_header.docx b/pydocx/fixtures/list_to_header.docx
new file mode 100644
index 00000000..f9b3946e
Binary files /dev/null and b/pydocx/fixtures/list_to_header.docx differ
diff --git a/pydocx/fixtures/lists_with_styles.docx b/pydocx/fixtures/lists_with_styles.docx
new file mode 100644
index 00000000..c1c7ecf8
Binary files /dev/null and b/pydocx/fixtures/lists_with_styles.docx differ
diff --git a/pydocx/fixtures/localDpi.docx b/pydocx/fixtures/localDpi.docx
new file mode 100644
index 00000000..0f6d7f77
Binary files /dev/null and b/pydocx/fixtures/localDpi.docx differ
diff --git a/pydocx/fixtures/missing_content.docx b/pydocx/fixtures/missing_content.docx
new file mode 100644
index 00000000..21bed964
Binary files /dev/null and b/pydocx/fixtures/missing_content.docx differ
diff --git a/pydocx/fixtures/nested_lists.docx b/pydocx/fixtures/nested_lists.docx
new file mode 100644
index 00000000..f4000dfa
Binary files /dev/null and b/pydocx/fixtures/nested_lists.docx differ
diff --git a/pydocx/fixtures/nested_table_rowspan.docx b/pydocx/fixtures/nested_table_rowspan.docx
new file mode 100644
index 00000000..b43b8a0d
Binary files /dev/null and b/pydocx/fixtures/nested_table_rowspan.docx differ
diff --git a/pydocx/fixtures/nested_tables.docx b/pydocx/fixtures/nested_tables.docx
new file mode 100644
index 00000000..af704d4d
Binary files /dev/null and b/pydocx/fixtures/nested_tables.docx differ
diff --git a/pydocx/fixtures/resized_image.docx b/pydocx/fixtures/resized_image.docx
new file mode 100644
index 00000000..913099c4
Binary files /dev/null and b/pydocx/fixtures/resized_image.docx differ
diff --git a/pydocx/fixtures/shift_enter.docx b/pydocx/fixtures/shift_enter.docx
new file mode 100644
index 00000000..4128c0a2
Binary files /dev/null and b/pydocx/fixtures/shift_enter.docx differ
diff --git a/pydocx/fixtures/simple.docx b/pydocx/fixtures/simple.docx
new file mode 100644
index 00000000..1d2a1c23
Binary files /dev/null and b/pydocx/fixtures/simple.docx differ
diff --git a/pydocx/fixtures/simple_lists.docx b/pydocx/fixtures/simple_lists.docx
new file mode 100644
index 00000000..c09ad744
Binary files /dev/null and b/pydocx/fixtures/simple_lists.docx differ
diff --git a/pydocx/fixtures/simple_table.docx b/pydocx/fixtures/simple_table.docx
new file mode 100644
index 00000000..26de483c
Binary files /dev/null and b/pydocx/fixtures/simple_table.docx differ
diff --git a/pydocx/fixtures/special_chars.docx b/pydocx/fixtures/special_chars.docx
new file mode 100644
index 00000000..b4b9287f
Binary files /dev/null and b/pydocx/fixtures/special_chars.docx differ
diff --git a/pydocx/fixtures/split_header.docx b/pydocx/fixtures/split_header.docx
new file mode 100644
index 00000000..cc4bd5cf
Binary files /dev/null and b/pydocx/fixtures/split_header.docx differ
diff --git a/pydocx/fixtures/super_and_subscript.docx b/pydocx/fixtures/super_and_subscript.docx
new file mode 100644
index 00000000..06ea2d7a
Binary files /dev/null and b/pydocx/fixtures/super_and_subscript.docx differ
diff --git a/pydocx/fixtures/table_col_row_span.docx b/pydocx/fixtures/table_col_row_span.docx
new file mode 100644
index 00000000..856abfdf
Binary files /dev/null and b/pydocx/fixtures/table_col_row_span.docx differ
diff --git a/pydocx/fixtures/tables_in_lists.docx b/pydocx/fixtures/tables_in_lists.docx
new file mode 100644
index 00000000..11859541
Binary files /dev/null and b/pydocx/fixtures/tables_in_lists.docx differ
diff --git a/pydocx/fixtures/track_changes_on.docx b/pydocx/fixtures/track_changes_on.docx
new file mode 100644
index 00000000..dcb7ba1c
Binary files /dev/null and b/pydocx/fixtures/track_changes_on.docx differ
diff --git a/pydocx/fixtures/upper_alpha_all_bold.docx b/pydocx/fixtures/upper_alpha_all_bold.docx
new file mode 100644
index 00000000..d518b2c5
Binary files /dev/null and b/pydocx/fixtures/upper_alpha_all_bold.docx differ
diff --git a/pydocx/lxmlparser.py b/pydocx/lxmlparser.py
deleted file mode 100644
index 94b130d3..00000000
--- a/pydocx/lxmlparser.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import zipfile
-from lxml import etree
-from StringIO import StringIO
-__author__ = 'samportnow'
-
-#for el in tree.iter():
- # The way lists are handled could double visit certain elements; keep
- # track of which elements have been visited and skip any that have been
- # visited already.
- #if el in visited_nodes:
- #continue
-with zipfile.ZipFile('/Users/samportnow/Documents/pydocx/helloworld.docx') as f:
- document = f.read('word/document.xml')
- numbering= f.read('word/numbering.xml')
-parser=etree.XMLParser(ns_clean=True)
-document=StringIO(document)
-numbering=StringIO(numbering)
-numbering_tree=etree.parse(numbering,parser)
-numbering_namespace=numbering_tree.getroot().nsmap['w']
-visited_els=[]
-
-def get_parsed():
- parser=etree.XMLParser(ns_clean=True)
- tree=etree.parse(document,parser)
- namespace=tree.getroot().nsmap['w']
- #rpr is run properties for the paragraph mark
- paragraph=''
- run_text=''
- running_text=''
- for el in tree.iter():
- if el.tag=='{%s}p' %namespace:
- for wp in el.iter():
- if wp.tag =='{%s}ins' %namespace:
- for text in wp.iterchildren():
- if text not in visited_els:
- run_text +=''+get_text(text,namespace,visited_els)+'
'
- visited_els.append(text)
- if wp.tag=='{%s}r' %namespace and wp not in visited_els:
- run_text+=get_text(wp,namespace,visited_els)
- visited_els.append(wp)
- if not el.getchildren():
- run_text+='
'
- if wp.tag == '{%s}ilvl' %namespace:
- for lst in el.iter():
- if lst.find('{%s}numId' %namespace) is not None and el not in visited_els:
- numval = lst.find('{%s}numId' %namespace).attrib['{%s}val' %namespace]
- lst_type=get_list_style(numval)
- if get_text(lst,namespace,visited_els) and el not in visited_els and lst_type['{%s}val' %namespace] != 'bullet':
- if lst.getnext() is not None:
- if lst not in visited_els:
- while lst.getnext() is not None:
- if lst not in visited_els:
- text = get_text(lst,namespace,visited_els)
- next_txt = get_text(lst.getnext(),namespace,visited_els)
- running_text += text + next_txt
- visited_els.append(lst)
- visited_els.append(lst.getnext())
- lst=lst.getnext()
- else:
- run_text += '' + running_text + ''
- break
- else:
- run_text +='' + get_text(lst, namespace, visited_els) + ''
- visited_els.append(lst)
- print running_text
- return run_text
-
-
-def get_text(wp,namespace,visited_els):
- run_text= ''
- decorator = ''
- closing = ''
- if wp.find('{%s}tab' %namespace) is not None:
- run_text+='%nbsp'
- if wp.find('{%s}rPr' %namespace) is not None:
- for tag in wp.iter():
- if tag.find('{%s}u' %namespace) is not None:
- if wp.find('{%s}t' %namespace) is not None:
- decorator +=''
- closing += ''
- visited_els.append(wp.find('{%s}t' %namespace))
- if tag.find('{%s}i' %namespace) is not None:
- if wp.find('{%s}t' %namespace) is not None:
- decorator += ''
- closing += ''
- visited_els.append(wp.find('{%s}t' %namespace))
- if tag.find('{%s}b' %namespace) is not None:
- if wp.find('{%s}t' %namespace) is not None:
- decorator += ''
- closing += ''
- visited_els.append(wp.find('{%s}t' %namespace))
- run_text = wp.find('{%s}t' %namespace).text
- run_text = decorator + run_text + closing
- if wp.find('{%s}t' %namespace) is not None and wp.find('{%s}t' %namespace) not in visited_els:
- run_text+=wp.find('{%s}t' %namespace).text
- return run_text
-
-def get_list_style(numval):
- ids = numbering_tree.findall('{%s}num' %numbering_namespace)
- for id in ids:
- if id.attrib['{%s}numId' %numbering_namespace] == numval:
- abstractid=id.find('{%s}abstractNumId' %numbering_namespace)
- abstractid=abstractid.attrib['{%s}val' %numbering_namespace]
- style_information=numbering_tree.findall('{%s}abstractNum' %numbering_namespace)
- for info in style_information:
- if info.attrib['{%s}abstractNumId' %numbering_namespace] == abstractid:
- for i in info.iter():
- if i.find('{%s}numFmt' %numbering_namespace) is not None:
- return i.find('{%s}numFmt' %numbering_namespace).attrib
-
-print get_parsed()
diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py
index bfaad2a6..5950f267 100644
--- a/pydocx/parsers/Docx2Html.py
+++ b/pydocx/parsers/Docx2Html.py
@@ -1,21 +1,46 @@
-from pydocx.DocxParser import DocxParser
-
+import base64
import xml.sax.saxutils
+from pydocx.DocxParser import DocxParser
+
class Docx2Html(DocxParser):
@property
def parsed(self):
- self._parsed = self._parsed.replace('', '
')
- self._parsed = self._parsed.replace('
', '
')
- self._parsed = self._parsed.replace('
', '')
- return (
- '{content}'
- ).format(content=self._parsed)
+ content = self._parsed
+ content = "%(head)s%(content)s" % {
+ 'head': self.head(),
+ 'content': content,
+ }
+ return unicode(content)
+
+ def head(self):
+ return "%(style)s" % {
+ 'style': self.style(),
+ }
+
+ def style(self):
+ result = (
+ ''
+ ) % {
+ #multiple by (4/3) to get to px
+ 'width': (self.page_width * (4 / 3)),
+ }
+ return result
def escape(self, text):
return xml.sax.saxutils.quoteattr(text)[1:-1]
@@ -23,38 +48,111 @@ def escape(self, text):
def linebreak(self, pre=None):
return '
'
- def paragraph(self, text, pre=None):
+ def paragraph(self, text, math=False):
+ if math:
+ return '
'
return '' + text + '
'
+ def heading(self, text, heading_value):
+ return '<%(tag)s>%(text)s%(tag)s>' % {
+ 'tag': heading_value,
+ 'text': text,
+ }
+
def insertion(self, text, author, date):
return (
- "{text}"
- ).format(author=author, date=date, text=text)
+ "%(text)s"
+ ) % {
+ 'author': author,
+ 'date': date,
+ 'text': text,
+ }
+
+ def hyperlink(self, text, href):
+ if text == '':
+ return ''
+ return '%(text)s' % {
+ 'href': href,
+ 'text': text,
+ }
+
+ def image_handler(self, image_data, filename):
+ extension = filename.split('.')[-1].lower()
+ b64_encoded_src = 'data:image/%s;base64,%s' % (
+ extension,
+ base64.b64encode(image_data),
+ )
+ b64_encoded_src = self.escape(b64_encoded_src)
+ return b64_encoded_src
+
+ def image(self, image_data, filename, x, y):
+ src = self.image_handler(image_data, filename)
+ if not src:
+ return ''
+ if all([x, y]):
+ return '
' % (
+ src,
+ y,
+ x,
+ )
+ else:
+ return '
' % src
def deletion(self, text, author, date):
return (
- "{text}"
- ).format(author=author, date=date, text=text)
+ "%(text)s"
+ ) % {
+ 'author': author,
+ 'date': date,
+ 'text': text,
+ }
def list_element(self, text):
- return "- {text}
".format(text=text)
+ return "- %(text)s
" % {
+ 'text': text,
+ }
- def ordered_list(self, text):
- return "{text}
".format(text=text)
+ def ordered_list(self, text, list_style):
+ return '%(text)s
' % {
+ 'text': text,
+ 'list_style': list_style,
+ }
def unordered_list(self, text):
- return "".format(text=text)
+ return "" % {
+ 'text': text,
+ }
def bold(self, text):
- return '' + text + ''
+ return '' + text + ''
def italics(self, text):
- return '' + text + ''
+ return '' + text + ''
def underline(self, text):
- return '' + text + ''
+ return '' + text + ''
+
+ def caps(self, text):
+ return '' + text + ''
+
+ def small_caps(self, text):
+ return '' + text + ''
+
+ def strike(self, text):
+ return '' + text + ''
+
+ def hide(self, text):
+ return '' + text + ''
+
+ def superscript(self, text):
+ return '%(text)s' % {
+ 'text': text,
+ }
+
+ def subscript(self, text):
+ return '%(text)s' % {
+ 'text': text,
+ }
def tab(self):
# Insert before the text right?? So got the text and just do an insert
@@ -62,25 +160,96 @@ def tab(self):
return '    '
def table(self, text):
- return ''
+ return ''
def table_row(self, text):
return '' + text + '
'
- def table_cell(self, text):
- return '' + text + ' | '
+ def table_cell(
+ self, text, col='', row='',
+ is_last_row_item=False, is_list_item=False):
+ slug = '%(text)s | '
+ return slug % {
+ 'colspan': col,
+ 'rowspan': row,
+ 'text': text,
+ }
def page_break(self):
- return '
'
-
- def center_justify(self, text):
- return "" + text + '
'
-
- def right_justify(self, text):
- return "" + text + '
'
+ return '
'
+
+ def indent(self, text, just='', firstLine='', left='',
+ right='', hanging='', is_in_table=False):
+ slug = '%(text)s
"
+ return slug % {
+ 'text': text,
+ 'just': just,
+ 'firstLine': firstLine,
+ 'left': left,
+ 'right': right,
+ }
+
+ def break_tag(self, *args):
+ return '
'
- def indent(self, text, right, left, firstLine):
- return "{text}
".format(
- left=left,
- text=text,
- )
+ def change_orientation(self, parsed, orient):
+ return '
'
+
+ def empty_cell(self):
+ return ''
+
+ def num(self, text):
+ return '%(text)s' % {
+ 'text': text,
+ }
+
+ def den(self, text):
+ return '/%(text)s' % {
+ 'text': text,
+ }
+
+ def deg(self, text):
+ if text:
+ return '%(text)s' % {
+ 'text': text,
+ }
+ else:
+ return None
+
+ def exp(self, text):
+ return ' %s ' % text
+
+ def radical(self, deg, exp):
+ if deg:
+ return ' %(exp)s %(deg)s ' % {
+ 'exp': exp,
+ 'deg': deg,
+ }
+ else:
+ return ' %s ' % exp
+
+ def matrix(self, text):
+ return '%s' % text
+
+ def matrix_row(self, text):
+ return '%s' % text
+
+ def matrix_cell(self, text, is_last_row_item=False):
+ return '%s' % text
diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py
new file mode 100644
index 00000000..2afa08ef
--- /dev/null
+++ b/pydocx/parsers/Docx2LaTex.py
@@ -0,0 +1,310 @@
+# coding=utf-8
+
+import base64
+from pydocx.DocxParser import DocxParser
+from unicode_to_latex import unicode_to_latex
+
+
+class Docx2LaTex(DocxParser):
+
+ def __init__(self, *args, **kwargs):
+ self.table_info = []
+ self.counted_columns = False
+ self.previous_orient = ''
+ self.col_count = 0
+ self.matrix_col_count = 0
+ self.hit_list = False
+ self.line_break_in_table = False
+ super(Docx2LaTex, self).__init__(*args, **kwargs)
+
+ @property
+ def parsed(self):
+ content = self._parsed
+ content = r"%(head)s\begin{document}%(content)s\end{document}" % {
+ 'head': self.head(),
+ 'content': content}
+ return content.encode('utf-8')
+
+ def escape(self, text):
+ chars = ['%', '&', '#', '$', '~', '_', '^', '{', '}']
+ for ch in chars:
+ if ch in text:
+ text = text.replace(ch, '\\'+ch)
+ return text
+
+ def linebreak(self):
+ return '\n\n'
+
+ def paragraph(self, text, math=False):
+ if math:
+ matches = [match for match in unicode_to_latex if match in text]
+ for match in matches:
+ text = text.replace(match, unicode_to_latex[match])
+ return '$' + text + '$' + '\n\n'
+ return text + '\n\n'
+
+ def bold(self, text):
+ return r'\textbf {%s}' % text
+
+ def italics(self, text):
+ return r'\emph {%s}' % text
+
+ def underline(self, text):
+ return r'\underline {%s}' % text
+
+ def list_element(self, text):
+ return r'\item %s' % text + '\n'
+
+ def ordered_list(self, text, list_style):
+ self.hit_list = True
+ return r'\begin{enumerate} %s \end{enumerate}' % text
+
+ def unordered_list(self, text):
+ self.hit_list = True
+ return r'\begin{itemize} %s \end{itemize}' % text
+
+ def head(self):
+ return r'''\documentclass{article}\usepackage{hyperref}
+ \usepackage{graphicx}\usepackage{changes}
+ \usepackage{changepage}
+ \usepackage{hanging}\usepackage{multirow}
+ \usepackage{pbox}\usepackage{pdflscape}
+ \usepackage{ulem}\usepackage{comment}\usepackage{mathtools}'''
+
+ def heading(self, text, heading_value):
+ if heading_value == 'h1':
+ return r'\section{%s}' % text + '\n\n'
+ elif heading_value == 'h2':
+ return r'\subsection{%s}' % text + '\n\n'
+ elif heading_value == 'h3':
+ return r'\paragraph{%s}' % text + '\n\n'
+ elif heading_value == 'h4':
+ return r'\subparagraph{%s}' % text + '\n\n'
+ else:
+ return text + '\n\n'
+
+ def insertion(self, text, author, date):
+ return r'\added[id='+author+',remark='+date+']{%s}' % text
+
+ def hyperlink(self, text, href):
+ if text == '':
+ return ''
+ return r'\href{%(href)s}{%(text)s}' % {
+ 'href': href,
+ 'text': text,
+ }
+
+ def image_handler(self, image_data, filename):
+ extension = filename.split('.')[-1].lower()
+ b64_encoded_src = 'data:image/%s;base64,%s' % (
+ extension,
+ base64.b64encode(image_data),
+ )
+ b64_encoded_src = self.escape(b64_encoded_src)
+ return b64_encoded_src
+
+ def image(self, image_data, filename, x, y):
+ src = self.image_handler(image_data, filename)
+ if not src:
+ return ''
+ if all([x, y]):
+ if x.find('px') != -1:
+ x = x.replace('px', '')
+ x = float(x)
+ x = x * float(3) / float(4)
+ x = str(x) + 'pt'
+ elif y.find('px') != -1:
+ y = y.replace('px', '')
+ y = float(y)
+ y = y * float(3) / float(4)
+ y = str(y) + 'pt'
+ return r'\includegraphics[height=%spt, width=%s]{%s}' % (
+ y,
+ x,
+ src)
+ else:
+ return r'\includegraphics {%s}' % src
+
+ def tab(self):
+ return r'\qquad '
+
+ def table(self, text):
+ center = False
+ right = False
+ pcm = False
+ setup_cols = ''
+ for i in range(0, self.col_count):
+ match = next((
+ column for column in self.table_info
+ if 'Column' in column and column['Column'] == i), None)
+ if match:
+ if 'justify' in match:
+ if match['justify'] == 'center':
+ center = True
+ elif match['justify'] == 'right':
+ right = True
+ elif match['list']:
+ pcm = True
+ if center is True:
+ setup_cols += 'c'
+ center = False
+ elif right is True:
+ setup_cols += 'r'
+ right = False
+ elif pcm is True:
+ setup_cols += 'p{3cm}'
+ else:
+ setup_cols += 'l'
+ self.table_info = []
+ return '\n' + r'\begin{tabular}{%s}' % setup_cols\
+ + '\n' + r'%s\end{tabular}'\
+ % text + '\n\n'
+
+ def table_row(self, text):
+ self.counted_columns = True
+ return text
+
+ def table_cell(
+ self, text, col='', row='',
+ is_last_row_item=False, is_list_item=False):
+ if is_list_item:
+ self.columns = {}
+ self.columns['Column'] = self.col_count
+ self.columns['list'] = True
+ self.table_info.append(self.columns)
+ if col:
+ col = int(col)
+ if not self.counted_columns and col:
+ self.col_count += col
+ elif not self.counted_columns:
+ self.col_count += 1
+ if row:
+ row = int(row)
+ slug = ''
+ if col:
+ slug += r'\multicolumn{%s}{c}' % col
+ if row:
+ slug += r'\multirow{%s}{*}' % row
+ if self.line_break_in_table:
+ slug += r'\parbox{20cm}'
+ if text == '':
+ slug += '{}'
+ else:
+ slug += '{' + text + '}'
+ if is_last_row_item:
+ slug += r' \\' + '\n'
+ return slug
+ self.line_break_in_table = False
+ return '%s & ' % slug
+
+ def page_break(self):
+ return r'\newpage '
+
+ def indent(self, text, just='', firstLine='',
+ left='', right='', hanging='', is_in_table=False):
+ if not is_in_table:
+ slug = ''
+ if hanging:
+ hanging = float(hanging)
+ hanging = hanging * float(3)/float(4)
+ return r'\begin{hangparas}{%spt}{1} %s ' \
+ r'\end{hangparas}' % (hanging, text) + '\n'
+ if left or right:
+ if left:
+ left = float(left)
+ left = left * float(3) / float(4)
+ left = '%spt' % (left)
+ if right:
+ right = float(right)
+ right = right * float(3) / float(4)
+ right = '%spt' % (right)
+ slug += r'\begin{adjustwidth}{%s}{%s}' % (left, right)
+ if firstLine:
+ slug += r'\setlength{\parindent}{'+firstLine+r'pt}\indent '
+ if just:
+ if just == 'left':
+ slug += r'\begin{flushright} '
+ elif just == 'center':
+ slug += r'\begin{center} '
+ elif just == 'right':
+ slug += r'\begin{flushleft} '
+ slug += text
+ if just:
+ if just == 'left':
+ slug += r'\end{flushright}'
+ if just == 'center':
+ slug += r'\end{center}'
+ if just == 'right':
+ slug += r'\end{flushleft}'
+ if left or right:
+ slug += r'\end{adjustwidth}'
+ return slug
+ else:
+ self.columns = {}
+ self.columns['Column'] = self.col_count
+ self.columns['justify'] = just
+ if self.columns not in self.table_info:
+ self.table_info.append(self.columns)
+ return text
+
+ def break_tag(self, is_in_table):
+ if is_in_table:
+ self.line_break_in_table = True
+ return r'\\'
+
+ def deletion(self, text, author, date):
+ return r'\deleted[id='+author+',remark='+date+']{%s}' % text
+
+ def caps(self, text):
+ return r'\MakeUppercase{%s}' % text
+
+ def small_caps(self, text):
+ return r'\textsx{%s}' % text
+
+ def strike(self, text):
+ return r'\sout{%s}' % text
+
+ def hide(self, text):
+ return r'\begin{comment}%s\end{comment}' % text
+
+ def superscript(self, text):
+ return r'\textsuperscript{%s}' % text
+
+ def subscript(self, text):
+ return r'\textsubscript{%s}' % text
+
+ def empty_cell(self):
+ return ' & '
+
+ def radical(self, deg, exp):
+ if deg:
+ return deg + exp
+ else:
+ return r'\sqrt' + exp
+
+ def num(self, text):
+ return r'\frac{%s}' % text
+
+ def den(self, text):
+ return r'{%s}' % text
+
+ def deg(self, text):
+ if text:
+ return r'\sqrt[%s]' % text
+ else:
+ return ''
+
+ def exp(self, text):
+ return r'{%s}' % text
+
+ def matrix(self, text):
+ return r'\begin{matrix} %s \end{matrix}' % text
+
+ def matrix_row(self, text):
+ return text
+
+ def matrix_cell(self, text, is_last_matrix_row_item):
+ if is_last_matrix_row_item:
+ return text + r'\\'
+ else:
+ return text + r'&'
diff --git a/pydocx/parsers/Docx2Markdown.py b/pydocx/parsers/Docx2Markdown.py
index 1bb43e16..d023df7a 100644
--- a/pydocx/parsers/Docx2Markdown.py
+++ b/pydocx/parsers/Docx2Markdown.py
@@ -1,5 +1,6 @@
from pydocx.DocxParser import DocxParser
+
class Docx2Markdown(DocxParser):
def escape(self, text):
return text
@@ -17,8 +18,9 @@ def bold(self, text):
return '**' + text + '**'
def italics(self, text):
- # TODO do we need a "pre" variable, so I can check for *italics**italics* and turn it into *italicsitatlics*?
+ # TODO do we need a "pre" variable, so I can check for
+ # *italics**italics* and turn it into *italicsitatlics*?
return '*' + text + '*'
def underline(self, text):
- return '***' +text + '***'
\ No newline at end of file
+ return '***' + text + '***'
diff --git a/pydocx/parsers/__init__.py b/pydocx/parsers/__init__.py
index a9524657..f6bb520f 100644
--- a/pydocx/parsers/__init__.py
+++ b/pydocx/parsers/__init__.py
@@ -1,2 +1,4 @@
-from .Docx2Html import *
-from .Docx2Markdown import *
\ No newline at end of file
+from pydocx.parsers.Docx2Html import Docx2Html
+from pydocx.parsers.Docx2Markdown import Docx2Markdown
+from pydocx.parsers.Docx2LaTex import Docx2LaTex
+__all__ = (Docx2Html, Docx2Markdown, Docx2LaTex)
diff --git a/pydocx/parsers/unicode_to_latex.py b/pydocx/parsers/unicode_to_latex.py
new file mode 100644
index 00000000..8be9a073
--- /dev/null
+++ b/pydocx/parsers/unicode_to_latex.py
@@ -0,0 +1,2355 @@
+# original XML at http://www.w3.org/Math/characters/unicode.xml
+# XSL for conversion: https://gist.github.com/798546
+
+
+unicode_to_latex = {
+ u"\u0023": "\\#",
+ u"\u0025": "\\%",
+ u"\u0027": "\\textquotesingle ",
+ u"\u005E": "\\^{}",
+ u"\u005F": "\\_",
+ u"\u0060": "\\textasciigrave ",
+ u"\u007C": "\\vert ",
+ u"\u007E": "\\textasciitilde ",
+ u"\u00A1": "\\textexclamdown ",
+ u"\u00A2": "\\textcent ",
+ u"\u00A3": "\\textsterling ",
+ u"\u00A4": "\\textcurrency ",
+ u"\u00A5": "\\textyen ",
+ u"\u00A6": "\\textbrokenbar ",
+ u"\u00A7": "\\textsection ",
+ u"\u00A8": "\\textasciidieresis ",
+ u"\u00A9": "\\textcopyright ",
+ u"\u00AA": "\\textordfeminine ",
+ u"\u00AB": "\\guillemotleft ",
+ u"\u00AC": "\\lnot ",
+ u"\u00AD": "\\-",
+ u"\u00AE": "\\textregistered ",
+ u"\u00AF": "\\textasciimacron ",
+ u"\u00B0": "\\textdegree ",
+ u"\u00B1": "\\pm ",
+ u"\u00B2": "{^2}",
+ u"\u00B3": "{^3}",
+ u"\u00B4": "\\textasciiacute ",
+ u"\u00B5": "\\mathrm{\\mu}",
+ u"\u00B6": "\\textparagraph ",
+ u"\u00B7": "\\cdot ",
+ u"\u00B8": "\\c{}",
+ u"\u00B9": "{^1}",
+ u"\u00BA": "\\textordmasculine ",
+ u"\u00BB": "\\guillemotright ",
+ u"\u00BC": "\\textonequarter ",
+ u"\u00BD": "\\textonehalf ",
+ u"\u00BE": "\\textthreequarters ",
+ u"\u00BF": "\\textquestiondown ",
+ u"\u00C0": "\\`{A}",
+ u"\u00C1": "\\'{A}",
+ u"\u00C2": "\\^{A}",
+ u"\u00C3": "\\~{A}",
+ u"\u00C4": "\\\"{A}",
+ u"\u00C5": "\\AA ",
+ u"\u00C6": "\\AE ",
+ u"\u00C7": "\\c{C}",
+ u"\u00C8": "\\`{E}",
+ u"\u00C9": "\\'{E}",
+ u"\u00CA": "\\^{E}",
+ u"\u00CB": "\\\"{E}",
+ u"\u00CC": "\\`{I}",
+ u"\u00CD": "\\'{I}",
+ u"\u00CE": "\\^{I}",
+ u"\u00CF": "\\\"{I}",
+ u"\u00D0": "\\DH ",
+ u"\u00D1": "\\~{N}",
+ u"\u00D2": "\\`{O}",
+ u"\u00D3": "\\'{O}",
+ u"\u00D4": "\\^{O}",
+ u"\u00D5": "\\~{O}",
+ u"\u00D6": "\\\"{O}",
+ u"\u00D7": "\\texttimes ",
+ u"\u00D8": "\\O ",
+ u"\u00D9": "\\`{U}",
+ u"\u00DA": "\\'{U}",
+ u"\u00DB": "\\^{U}",
+ u"\u00DC": "\\\"{U}",
+ u"\u00DD": "\\'{Y}",
+ u"\u00DE": "\\TH ",
+ u"\u00DF": "\\ss ",
+ u"\u00E0": "\\`{a}",
+ u"\u00E1": "\\'{a}",
+ u"\u00E2": "\\^{a}",
+ u"\u00E3": "\\~{a}",
+ u"\u00E4": "\\\"{a}",
+ u"\u00E5": "\\aa ",
+ u"\u00E6": "\\ae ",
+ u"\u00E7": "\\c{c}",
+ u"\u00E8": "\\`{e}",
+ u"\u00E9": "\\'{e}",
+ u"\u00EA": "\\^{e}",
+ u"\u00EB": "\\\"{e}",
+ u"\u00EC": "\\`{\\i}",
+ u"\u00ED": "\\'{\\i}",
+ u"\u00EE": "\\^{\\i}",
+ u"\u00EF": "\\\"{\\i}",
+ u"\u00F0": "\\dh ",
+ u"\u00F1": "\\~{n}",
+ u"\u00F2": "\\`{o}",
+ u"\u00F3": "\\'{o}",
+ u"\u00F4": "\\^{o}",
+ u"\u00F5": "\\~{o}",
+ u"\u00F6": "\\\"{o}",
+ u"\u00F7": "\\div ",
+ u"\u00F8": "\\o ",
+ u"\u00F9": "\\`{u}",
+ u"\u00FA": "\\'{u}",
+ u"\u00FB": "\\^{u}",
+ u"\u00FC": "\\\"{u}",
+ u"\u00FD": "\\'{y}",
+ u"\u00FE": "\\th ",
+ u"\u00FF": "\\\"{y}",
+ u"\u0100": "\\={A}",
+ u"\u0101": "\\={a}",
+ u"\u0102": "\\u{A}",
+ u"\u0103": "\\u{a}",
+ u"\u0104": "\\k{A}",
+ u"\u0105": "\\k{a}",
+ u"\u0106": "\\'{C}",
+ u"\u0107": "\\'{c}",
+ u"\u0108": "\\^{C}",
+ u"\u0109": "\\^{c}",
+ u"\u010A": "\\.{C}",
+ u"\u010B": "\\.{c}",
+ u"\u010C": "\\v{C}",
+ u"\u010D": "\\v{c}",
+ u"\u010E": "\\v{D}",
+ u"\u010F": "\\v{d}",
+ u"\u0110": "\\DJ ",
+ u"\u0111": "\\dj ",
+ u"\u0112": "\\={E}",
+ u"\u0113": "\\={e}",
+ u"\u0114": "\\u{E}",
+ u"\u0115": "\\u{e}",
+ u"\u0116": "\\.{E}",
+ u"\u0117": "\\.{e}",
+ u"\u0118": "\\k{E}",
+ u"\u0119": "\\k{e}",
+ u"\u011A": "\\v{E}",
+ u"\u011B": "\\v{e}",
+ u"\u011C": "\\^{G}",
+ u"\u011D": "\\^{g}",
+ u"\u011E": "\\u{G}",
+ u"\u011F": "\\u{g}",
+ u"\u0120": "\\.{G}",
+ u"\u0121": "\\.{g}",
+ u"\u0122": "\\c{G}",
+ u"\u0123": "\\c{g}",
+ u"\u0124": "\\^{H}",
+ u"\u0125": "\\^{h}",
+ u"\u0126": "{\\fontencoding{LELA}\\selectfont\\char40}",
+ u"\u0127": "\\Elzxh ",
+ u"\u0128": "\\~{I}",
+ u"\u0129": "\\~{\\i}",
+ u"\u012A": "\\={I}",
+ u"\u012B": "\\={\\i}",
+ u"\u012C": "\\u{I}",
+ u"\u012D": "\\u{\\i}",
+ u"\u012E": "\\k{I}",
+ u"\u012F": "\\k{i}",
+ u"\u0130": "\\.{I}",
+ u"\u0131": "\\i ",
+ u"\u0132": "IJ",
+ u"\u0133": "ij",
+ u"\u0134": "\\^{J}",
+ u"\u0135": "\\^{\\j}",
+ u"\u0136": "\\c{K}",
+ u"\u0137": "\\c{k}",
+ u"\u0138": "{\\fontencoding{LELA}\\selectfont\\char91}",
+ u"\u0139": "\\'{L}",
+ u"\u013A": "\\'{l}",
+ u"\u013B": "\\c{L}",
+ u"\u013C": "\\c{l}",
+ u"\u013D": "\\v{L}",
+ u"\u013E": "\\v{l}",
+ u"\u013F": "{\\fontencoding{LELA}\\selectfont\\char201}",
+ u"\u0140": "{\\fontencoding{LELA}\\selectfont\\char202}",
+ u"\u0141": "\\L ",
+ u"\u0142": "\\l ",
+ u"\u0143": "\\'{N}",
+ u"\u0144": "\\'{n}",
+ u"\u0145": "\\c{N}",
+ u"\u0146": "\\c{n}",
+ u"\u0147": "\\v{N}",
+ u"\u0148": "\\v{n}",
+ u"\u0149": "'n",
+ u"\u014A": "\\NG ",
+ u"\u014B": "\\ng ",
+ u"\u014C": "\\={O}",
+ u"\u014D": "\\={o}",
+ u"\u014E": "\\u{O}",
+ u"\u014F": "\\u{o}",
+ u"\u0150": "\\H{O}",
+ u"\u0151": "\\H{o}",
+ u"\u0152": "\\OE ",
+ u"\u0153": "\\oe ",
+ u"\u0154": "\\'{R}",
+ u"\u0155": "\\'{r}",
+ u"\u0156": "\\c{R}",
+ u"\u0157": "\\c{r}",
+ u"\u0158": "\\v{R}",
+ u"\u0159": "\\v{r}",
+ u"\u015A": "\\'{S}",
+ u"\u015B": "\\'{s}",
+ u"\u015C": "\\^{S}",
+ u"\u015D": "\\^{s}",
+ u"\u015E": "\\c{S}",
+ u"\u015F": "\\c{s}",
+ u"\u0160": "\\v{S}",
+ u"\u0161": "\\v{s}",
+ u"\u0162": "\\c{T}",
+ u"\u0163": "\\c{t}",
+ u"\u0164": "\\v{T}",
+ u"\u0165": "\\v{t}",
+ u"\u0166": "{\\fontencoding{LELA}\\selectfont\\char47}",
+ u"\u0167": "{\\fontencoding{LELA}\\selectfont\\char63}",
+ u"\u0168": "\\~{U}",
+ u"\u0169": "\\~{u}",
+ u"\u016A": "\\={U}",
+ u"\u016B": "\\={u}",
+ u"\u016C": "\\u{U}",
+ u"\u016D": "\\u{u}",
+ u"\u016E": "\\r{U}",
+ u"\u016F": "\\r{u}",
+ u"\u0170": "\\H{U}",
+ u"\u0171": "\\H{u}",
+ u"\u0172": "\\k{U}",
+ u"\u0173": "\\k{u}",
+ u"\u0174": "\\^{W}",
+ u"\u0175": "\\^{w}",
+ u"\u0176": "\\^{Y}",
+ u"\u0177": "\\^{y}",
+ u"\u0178": "\\\"{Y}",
+ u"\u0179": "\\'{Z}",
+ u"\u017A": "\\'{z}",
+ u"\u017B": "\\.{Z}",
+ u"\u017C": "\\.{z}",
+ u"\u017D": "\\v{Z}",
+ u"\u017E": "\\v{z}",
+ u"\u0195": "\\texthvlig ",
+ u"\u019E": "\\textnrleg ",
+ u"\u01AA": "\\eth ",
+ u"\u01BA": "{\\fontencoding{LELA}\\selectfont\\char195}",
+ u"\u01C2": "\\textdoublepipe ",
+ u"\u01F5": "\\'{g}",
+ u"\u0250": "\\Elztrna ",
+ u"\u0252": "\\Elztrnsa ",
+ u"\u0254": "\\Elzopeno ",
+ u"\u0256": "\\Elzrtld ",
+ u"\u0258": "{\\fontencoding{LEIP}\\selectfont\\char61}",
+ u"\u0259": "\\Elzschwa ",
+ u"\u025B": "\\varepsilon ",
+ u"\u0263": "\\Elzpgamma ",
+ u"\u0264": "\\Elzpbgam ",
+ u"\u0265": "\\Elztrnh ",
+ u"\u026C": "\\Elzbtdl ",
+ u"\u026D": "\\Elzrtll ",
+ u"\u026F": "\\Elztrnm ",
+ u"\u0270": "\\Elztrnmlr ",
+ u"\u0271": "\\Elzltlmr ",
+ u"\u0272": "\\Elzltln ",
+ u"\u0273": "\\Elzrtln ",
+ u"\u0277": "\\Elzclomeg ",
+ u"\u0278": "\\textphi ",
+ u"\u0279": "\\Elztrnr ",
+ u"\u027A": "\\Elztrnrl ",
+ u"\u027B": "\\Elzrttrnr ",
+ u"\u027C": "\\Elzrl ",
+ u"\u027D": "\\Elzrtlr ",
+ u"\u027E": "\\Elzfhr ",
+ u"\u027F": "{\\fontencoding{LEIP}\\selectfont\\char202}",
+ u"\u0282": "\\Elzrtls ",
+ u"\u0283": "\\Elzesh ",
+ u"\u0287": "\\Elztrnt ",
+ u"\u0288": "\\Elzrtlt ",
+ u"\u028A": "\\Elzpupsil ",
+ u"\u028B": "\\Elzpscrv ",
+ u"\u028C": "\\Elzinvv ",
+ u"\u028D": "\\Elzinvw ",
+ u"\u028E": "\\Elztrny ",
+ u"\u0290": "\\Elzrtlz ",
+ u"\u0292": "\\Elzyogh ",
+ u"\u0294": "\\Elzglst ",
+ u"\u0295": "\\Elzreglst ",
+ u"\u0296": "\\Elzinglst ",
+ u"\u029E": "\\textturnk ",
+ u"\u02A4": "\\Elzdyogh ",
+ u"\u02A7": "\\Elztesh ",
+ u"\u02C7": "\\textasciicaron ",
+ u"\u02C8": "\\Elzverts ",
+ u"\u02CC": "\\Elzverti ",
+ u"\u02D0": "\\Elzlmrk ",
+ u"\u02D1": "\\Elzhlmrk ",
+ u"\u02D2": "\\Elzsbrhr ",
+ u"\u02D3": "\\Elzsblhr ",
+ u"\u02D4": "\\Elzrais ",
+ u"\u02D5": "\\Elzlow ",
+ u"\u02D8": "\\textasciibreve ",
+ u"\u02D9": "\\textperiodcentered ",
+ u"\u02DA": "\\r{}",
+ u"\u02DB": "\\k{}",
+ u"\u02DC": "\\texttildelow ",
+ u"\u02DD": "\\H{}",
+ u"\u02E5": "\\tone{55}",
+ u"\u02E6": "\\tone{44}",
+ u"\u02E7": "\\tone{33}",
+ u"\u02E8": "\\tone{22}",
+ u"\u02E9": "\\tone{11}",
+ u"\u0300": "\\`",
+ u"\u0301": "\\'",
+ u"\u0302": "\\^",
+ u"\u0303": "\\~",
+ u"\u0304": "\\=",
+ u"\u0306": "\\u",
+ u"\u0307": "\\.",
+ u"\u0308": "\\\"",
+ u"\u030A": "\\r",
+ u"\u030B": "\\H",
+ u"\u030C": "\\v",
+ u"\u030F": "\\cyrchar\\C",
+ u"\u0311": "{\\fontencoding{LECO}\\selectfont\\char177}",
+ u"\u0318": "{\\fontencoding{LECO}\\selectfont\\char184}",
+ u"\u0319": "{\\fontencoding{LECO}\\selectfont\\char185}",
+ u"\u0321": "\\Elzpalh ",
+ u"\u0322": "\\Elzrh ",
+ u"\u0327": "\\c",
+ u"\u0328": "\\k",
+ u"\u032A": "\\Elzsbbrg ",
+ u"\u032B": "{\\fontencoding{LECO}\\selectfont\\char203}",
+ u"\u032F": "{\\fontencoding{LECO}\\selectfont\\char207}",
+ u"\u0335": "\\Elzxl ",
+ u"\u0336": "\\Elzbar ",
+ u"\u0337": "{\\fontencoding{LECO}\\selectfont\\char215}",
+ u"\u0338": "{\\fontencoding{LECO}\\selectfont\\char216}",
+ u"\u033A": "{\\fontencoding{LECO}\\selectfont\\char218}",
+ u"\u033B": "{\\fontencoding{LECO}\\selectfont\\char219}",
+ u"\u033C": "{\\fontencoding{LECO}\\selectfont\\char220}",
+ u"\u033D": "{\\fontencoding{LECO}\\selectfont\\char221}",
+ u"\u0361": "{\\fontencoding{LECO}\\selectfont\\char225}",
+ u"\u0386": "\\'{A}",
+ u"\u0388": "\\'{E}",
+ u"\u0389": "\\'{H}",
+ u"\u038A": "\\'{}{I}",
+ u"\u038C": "\\'{}O",
+ u"\u038E": "\\mathrm{'Y}",
+ u"\u038F": "\\mathrm{'\\Omega}",
+ u"\u0390": "\\acute{\\ddot{\\iota}}",
+ u"\u0391": "\\Alpha ",
+ u"\u0392": "\\Beta ",
+ u"\u0393": "\\Gamma ",
+ u"\u0394": "\\Delta ",
+ u"\u0395": "\\Epsilon ",
+ u"\u0396": "\\Zeta ",
+ u"\u0397": "\\Eta ",
+ u"\u0398": "\\Theta ",
+ u"\u0399": "\\Iota ",
+ u"\u039A": "\\Kappa ",
+ u"\u039B": "\\Lambda ",
+ u"\u039E": "\\Xi ",
+ u"\u03A0": "\\Pi ",
+ u"\u03A1": "\\Rho ",
+ u"\u03A3": "\\Sigma ",
+ u"\u03A4": "\\Tau ",
+ u"\u03A5": "\\Upsilon ",
+ u"\u03A6": "\\Phi ",
+ u"\u03A7": "\\Chi ",
+ u"\u03A8": "\\Psi ",
+ u"\u03A9": "\\Omega ",
+ u"\u03AA": "\\mathrm{\\ddot{I}}",
+ u"\u03AB": "\\mathrm{\\ddot{Y}}",
+ u"\u03AC": "\\'{$\\alpha$}",
+ u"\u03AD": "\\acute{\\epsilon}",
+ u"\u03AE": "\\acute{\\eta}",
+ u"\u03AF": "\\acute{\\iota}",
+ u"\u03B0": "\\acute{\\ddot{\\upsilon}}",
+ u"\u03B1": "\\alpha ",
+ u"\u03B2": "\\beta ",
+ u"\u03B3": "\\gamma ",
+ u"\u03B4": "\\delta ",
+ u"\u03B5": "\\epsilon ",
+ u"\u03B6": "\\zeta ",
+ u"\u03B7": "\\eta ",
+ u"\u03B8": "\\texttheta ",
+ u"\u03B9": "\\iota ",
+ u"\u03BA": "\\kappa ",
+ u"\u03BB": "\\lambda ",
+ u"\u03BC": "\\mu ",
+ u"\u03BD": "\\nu ",
+ u"\u03BE": "\\xi ",
+ u"\u03C0": "\\pi ",
+ u"\u03C1": "\\rho ",
+ u"\u03C2": "\\varsigma ",
+ u"\u03C3": "\\sigma ",
+ u"\u03C4": "\\tau ",
+ u"\u03C5": "\\upsilon ",
+ u"\u03C6": "\\varphi ",
+ u"\u03C7": "\\chi ",
+ u"\u03C8": "\\psi ",
+ u"\u03C9": "\\omega ",
+ u"\u03CA": "\\ddot{\\iota}",
+ u"\u03CB": "\\ddot{\\upsilon}",
+ u"\u03CC": "\\'{o}",
+ u"\u03CD": "\\acute{\\upsilon}",
+ u"\u03CE": "\\acute{\\omega}",
+ u"\u03D0": "\\Pisymbol{ppi022}{87}",
+ u"\u03D1": "\\textvartheta ",
+ u"\u03D2": "\\Upsilon ",
+ u"\u03D5": "\\phi ",
+ u"\u03D6": "\\varpi ",
+ u"\u03DA": "\\Stigma ",
+ u"\u03DC": "\\Digamma ",
+ u"\u03DD": "\\digamma ",
+ u"\u03DE": "\\Koppa ",
+ u"\u03E0": "\\Sampi ",
+ u"\u03F0": "\\varkappa ",
+ u"\u03F1": "\\varrho ",
+ u"\u03F4": "\\textTheta ",
+ u"\u03F6": "\\backepsilon ",
+ u"\u0401": "\\cyrchar\\CYRYO ",
+ u"\u0402": "\\cyrchar\\CYRDJE ",
+ u"\u0403": "\\cyrchar{\\'\\CYRG}",
+ u"\u0404": "\\cyrchar\\CYRIE ",
+ u"\u0405": "\\cyrchar\\CYRDZE ",
+ u"\u0406": "\\cyrchar\\CYRII ",
+ u"\u0407": "\\cyrchar\\CYRYI ",
+ u"\u0408": "\\cyrchar\\CYRJE ",
+ u"\u0409": "\\cyrchar\\CYRLJE ",
+ u"\u040A": "\\cyrchar\\CYRNJE ",
+ u"\u040B": "\\cyrchar\\CYRTSHE ",
+ u"\u040C": "\\cyrchar{\\'\\CYRK}",
+ u"\u040E": "\\cyrchar\\CYRUSHRT ",
+ u"\u040F": "\\cyrchar\\CYRDZHE ",
+ u"\u0410": "\\cyrchar\\CYRA ",
+ u"\u0411": "\\cyrchar\\CYRB ",
+ u"\u0412": "\\cyrchar\\CYRV ",
+ u"\u0413": "\\cyrchar\\CYRG ",
+ u"\u0414": "\\cyrchar\\CYRD ",
+ u"\u0415": "\\cyrchar\\CYRE ",
+ u"\u0416": "\\cyrchar\\CYRZH ",
+ u"\u0417": "\\cyrchar\\CYRZ ",
+ u"\u0418": "\\cyrchar\\CYRI ",
+ u"\u0419": "\\cyrchar\\CYRISHRT ",
+ u"\u041A": "\\cyrchar\\CYRK ",
+ u"\u041B": "\\cyrchar\\CYRL ",
+ u"\u041C": "\\cyrchar\\CYRM ",
+ u"\u041D": "\\cyrchar\\CYRN ",
+ u"\u041E": "\\cyrchar\\CYRO ",
+ u"\u041F": "\\cyrchar\\CYRP ",
+ u"\u0420": "\\cyrchar\\CYRR ",
+ u"\u0421": "\\cyrchar\\CYRS ",
+ u"\u0422": "\\cyrchar\\CYRT ",
+ u"\u0423": "\\cyrchar\\CYRU ",
+ u"\u0424": "\\cyrchar\\CYRF ",
+ u"\u0425": "\\cyrchar\\CYRH ",
+ u"\u0426": "\\cyrchar\\CYRC ",
+ u"\u0427": "\\cyrchar\\CYRCH ",
+ u"\u0428": "\\cyrchar\\CYRSH ",
+ u"\u0429": "\\cyrchar\\CYRSHCH ",
+ u"\u042A": "\\cyrchar\\CYRHRDSN ",
+ u"\u042B": "\\cyrchar\\CYRERY ",
+ u"\u042C": "\\cyrchar\\CYRSFTSN ",
+ u"\u042D": "\\cyrchar\\CYREREV ",
+ u"\u042E": "\\cyrchar\\CYRYU ",
+ u"\u042F": "\\cyrchar\\CYRYA ",
+ u"\u0430": "\\cyrchar\\cyra ",
+ u"\u0431": "\\cyrchar\\cyrb ",
+ u"\u0432": "\\cyrchar\\cyrv ",
+ u"\u0433": "\\cyrchar\\cyrg ",
+ u"\u0434": "\\cyrchar\\cyrd ",
+ u"\u0435": "\\cyrchar\\cyre ",
+ u"\u0436": "\\cyrchar\\cyrzh ",
+ u"\u0437": "\\cyrchar\\cyrz ",
+ u"\u0438": "\\cyrchar\\cyri ",
+ u"\u0439": "\\cyrchar\\cyrishrt ",
+ u"\u043A": "\\cyrchar\\cyrk ",
+ u"\u043B": "\\cyrchar\\cyrl ",
+ u"\u043C": "\\cyrchar\\cyrm ",
+ u"\u043D": "\\cyrchar\\cyrn ",
+ u"\u043E": "\\cyrchar\\cyro ",
+ u"\u043F": "\\cyrchar\\cyrp ",
+ u"\u0440": "\\cyrchar\\cyrr ",
+ u"\u0441": "\\cyrchar\\cyrs ",
+ u"\u0442": "\\cyrchar\\cyrt ",
+ u"\u0443": "\\cyrchar\\cyru ",
+ u"\u0444": "\\cyrchar\\cyrf ",
+ u"\u0445": "\\cyrchar\\cyrh ",
+ u"\u0446": "\\cyrchar\\cyrc ",
+ u"\u0447": "\\cyrchar\\cyrch ",
+ u"\u0448": "\\cyrchar\\cyrsh ",
+ u"\u0449": "\\cyrchar\\cyrshch ",
+ u"\u044A": "\\cyrchar\\cyrhrdsn ",
+ u"\u044B": "\\cyrchar\\cyrery ",
+ u"\u044C": "\\cyrchar\\cyrsftsn ",
+ u"\u044D": "\\cyrchar\\cyrerev ",
+ u"\u044E": "\\cyrchar\\cyryu ",
+ u"\u044F": "\\cyrchar\\cyrya ",
+ u"\u0451": "\\cyrchar\\cyryo ",
+ u"\u0452": "\\cyrchar\\cyrdje ",
+ u"\u0453": "\\cyrchar{\\'\\cyrg}",
+ u"\u0454": "\\cyrchar\\cyrie ",
+ u"\u0455": "\\cyrchar\\cyrdze ",
+ u"\u0456": "\\cyrchar\\cyrii ",
+ u"\u0457": "\\cyrchar\\cyryi ",
+ u"\u0458": "\\cyrchar\\cyrje ",
+ u"\u0459": "\\cyrchar\\cyrlje ",
+ u"\u045A": "\\cyrchar\\cyrnje ",
+ u"\u045B": "\\cyrchar\\cyrtshe ",
+ u"\u045C": "\\cyrchar{\\'\\cyrk}",
+ u"\u045E": "\\cyrchar\\cyrushrt ",
+ u"\u045F": "\\cyrchar\\cyrdzhe ",
+ u"\u0460": "\\cyrchar\\CYROMEGA ",
+ u"\u0461": "\\cyrchar\\cyromega ",
+ u"\u0462": "\\cyrchar\\CYRYAT ",
+ u"\u0464": "\\cyrchar\\CYRIOTE ",
+ u"\u0465": "\\cyrchar\\cyriote ",
+ u"\u0466": "\\cyrchar\\CYRLYUS ",
+ u"\u0467": "\\cyrchar\\cyrlyus ",
+ u"\u0468": "\\cyrchar\\CYRIOTLYUS ",
+ u"\u0469": "\\cyrchar\\cyriotlyus ",
+ u"\u046A": "\\cyrchar\\CYRBYUS ",
+ u"\u046C": "\\cyrchar\\CYRIOTBYUS ",
+ u"\u046D": "\\cyrchar\\cyriotbyus ",
+ u"\u046E": "\\cyrchar\\CYRKSI ",
+ u"\u046F": "\\cyrchar\\cyrksi ",
+ u"\u0470": "\\cyrchar\\CYRPSI ",
+ u"\u0471": "\\cyrchar\\cyrpsi ",
+ u"\u0472": "\\cyrchar\\CYRFITA ",
+ u"\u0474": "\\cyrchar\\CYRIZH ",
+ u"\u0478": "\\cyrchar\\CYRUK ",
+ u"\u0479": "\\cyrchar\\cyruk ",
+ u"\u047A": "\\cyrchar\\CYROMEGARND ",
+ u"\u047B": "\\cyrchar\\cyromegarnd ",
+ u"\u047C": "\\cyrchar\\CYROMEGATITLO ",
+ u"\u047D": "\\cyrchar\\cyromegatitlo ",
+ u"\u047E": "\\cyrchar\\CYROT ",
+ u"\u047F": "\\cyrchar\\cyrot ",
+ u"\u0480": "\\cyrchar\\CYRKOPPA ",
+ u"\u0481": "\\cyrchar\\cyrkoppa ",
+ u"\u0482": "\\cyrchar\\cyrthousands ",
+ u"\u0488": "\\cyrchar\\cyrhundredthousands ",
+ u"\u0489": "\\cyrchar\\cyrmillions ",
+ u"\u048C": "\\cyrchar\\CYRSEMISFTSN ",
+ u"\u048D": "\\cyrchar\\cyrsemisftsn ",
+ u"\u048E": "\\cyrchar\\CYRRTICK ",
+ u"\u048F": "\\cyrchar\\cyrrtick ",
+ u"\u0490": "\\cyrchar\\CYRGUP ",
+ u"\u0491": "\\cyrchar\\cyrgup ",
+ u"\u0492": "\\cyrchar\\CYRGHCRS ",
+ u"\u0493": "\\cyrchar\\cyrghcrs ",
+ u"\u0494": "\\cyrchar\\CYRGHK ",
+ u"\u0495": "\\cyrchar\\cyrghk ",
+ u"\u0496": "\\cyrchar\\CYRZHDSC ",
+ u"\u0497": "\\cyrchar\\cyrzhdsc ",
+ u"\u0498": "\\cyrchar\\CYRZDSC ",
+ u"\u0499": "\\cyrchar\\cyrzdsc ",
+ u"\u049A": "\\cyrchar\\CYRKDSC ",
+ u"\u049B": "\\cyrchar\\cyrkdsc ",
+ u"\u049C": "\\cyrchar\\CYRKVCRS ",
+ u"\u049D": "\\cyrchar\\cyrkvcrs ",
+ u"\u049E": "\\cyrchar\\CYRKHCRS ",
+ u"\u049F": "\\cyrchar\\cyrkhcrs ",
+ u"\u04A0": "\\cyrchar\\CYRKBEAK ",
+ u"\u04A1": "\\cyrchar\\cyrkbeak ",
+ u"\u04A2": "\\cyrchar\\CYRNDSC ",
+ u"\u04A3": "\\cyrchar\\cyrndsc ",
+ u"\u04A4": "\\cyrchar\\CYRNG ",
+ u"\u04A5": "\\cyrchar\\cyrng ",
+ u"\u04A6": "\\cyrchar\\CYRPHK ",
+ u"\u04A7": "\\cyrchar\\cyrphk ",
+ u"\u04A8": "\\cyrchar\\CYRABHHA ",
+ u"\u04A9": "\\cyrchar\\cyrabhha ",
+ u"\u04AA": "\\cyrchar\\CYRSDSC ",
+ u"\u04AB": "\\cyrchar\\cyrsdsc ",
+ u"\u04AC": "\\cyrchar\\CYRTDSC ",
+ u"\u04AD": "\\cyrchar\\cyrtdsc ",
+ u"\u04AE": "\\cyrchar\\CYRY ",
+ u"\u04AF": "\\cyrchar\\cyry ",
+ u"\u04B0": "\\cyrchar\\CYRYHCRS ",
+ u"\u04B1": "\\cyrchar\\cyryhcrs ",
+ u"\u04B2": "\\cyrchar\\CYRHDSC ",
+ u"\u04B3": "\\cyrchar\\cyrhdsc ",
+ u"\u04B4": "\\cyrchar\\CYRTETSE ",
+ u"\u04B5": "\\cyrchar\\cyrtetse ",
+ u"\u04B6": "\\cyrchar\\CYRCHRDSC ",
+ u"\u04B7": "\\cyrchar\\cyrchrdsc ",
+ u"\u04B8": "\\cyrchar\\CYRCHVCRS ",
+ u"\u04B9": "\\cyrchar\\cyrchvcrs ",
+ u"\u04BA": "\\cyrchar\\CYRSHHA ",
+ u"\u04BB": "\\cyrchar\\cyrshha ",
+ u"\u04BC": "\\cyrchar\\CYRABHCH ",
+ u"\u04BD": "\\cyrchar\\cyrabhch ",
+ u"\u04BE": "\\cyrchar\\CYRABHCHDSC ",
+ u"\u04BF": "\\cyrchar\\cyrabhchdsc ",
+ u"\u04C0": "\\cyrchar\\CYRpalochka ",
+ u"\u04C3": "\\cyrchar\\CYRKHK ",
+ u"\u04C4": "\\cyrchar\\cyrkhk ",
+ u"\u04C7": "\\cyrchar\\CYRNHK ",
+ u"\u04C8": "\\cyrchar\\cyrnhk ",
+ u"\u04CB": "\\cyrchar\\CYRCHLDSC ",
+ u"\u04CC": "\\cyrchar\\cyrchldsc ",
+ u"\u04D4": "\\cyrchar\\CYRAE ",
+ u"\u04D5": "\\cyrchar\\cyrae ",
+ u"\u04D8": "\\cyrchar\\CYRSCHWA ",
+ u"\u04D9": "\\cyrchar\\cyrschwa ",
+ u"\u04E0": "\\cyrchar\\CYRABHDZE ",
+ u"\u04E1": "\\cyrchar\\cyrabhdze ",
+ u"\u04E8": "\\cyrchar\\CYROTLD ",
+ u"\u04E9": "\\cyrchar\\cyrotld ",
+ u"\u2002": "\\hspace{0.6em}",
+ u"\u2003": "\\hspace{1em}",
+ u"\u2004": "\\hspace{0.33em}",
+ u"\u2005": "\\hspace{0.25em}",
+ u"\u2006": "\\hspace{0.166em}",
+ u"\u2007": "\\hphantom{0}",
+ u"\u2008": "\\hphantom{,}",
+ u"\u2009": "\\hspace{0.167em}",
+ u"\u2009-0200A-0200A": "\\;",
+ u"\u200A": "\\mkern1mu ",
+ u"\u2013": "\\textendash ",
+ u"\u2014": "\\textemdash ",
+ u"\u2015": "\\rule{1em}{1pt}",
+ u"\u2016": "\\Vert ",
+ u"\u201B": "\\Elzreapos ",
+ u"\u201C": "\\textquotedblleft ",
+ u"\u201D": "\\textquotedblright ",
+ u"\u201E": ",,",
+ u"\u2020": "\\textdagger ",
+ u"\u2021": "\\textdaggerdbl ",
+ u"\u2022": "\\textbullet ",
+ u"\u2025": "..",
+ u"\u2026": "\\ldots ",
+ u"\u2030": "\\textperthousand ",
+ u"\u2031": "\\textpertenthousand ",
+ u"\u2032": "{'}",
+ u"\u2033": "{''}",
+ u"\u2034": "{'''}",
+ u"\u2035": "\\backprime ",
+ u"\u2039": "\\guilsinglleft ",
+ u"\u203A": "\\guilsinglright ",
+ u"\u2057": "''''",
+ u"\u205F": "\\mkern4mu ",
+ u"\u2060": "\\nolinebreak ",
+ u"\u20A7": "\\ensuremath{\\Elzpes}",
+ u"\u20AC": "\\mbox{\\texteuro} ",
+ u"\u20DB": "\\dddot ",
+ u"\u20DC": "\\ddddot ",
+ u"\u2102": "\\mathbb{C}",
+ u"\u210A": "\\mathscr{g}",
+ u"\u210B": "\\mathscr{H}",
+ u"\u210C": "\\mathfrak{H}",
+ u"\u210D": "\\mathbb{H}",
+ u"\u210F": "\\hslash ",
+ u"\u2110": "\\mathscr{I}",
+ u"\u2111": "\\mathfrak{I}",
+ u"\u2112": "\\mathscr{L}",
+ u"\u2113": "\\mathscr{l}",
+ u"\u2115": "\\mathbb{N}",
+ u"\u2116": "\\cyrchar\\textnumero ",
+ u"\u2118": "\\wp ",
+ u"\u2119": "\\mathbb{P}",
+ u"\u211A": "\\mathbb{Q}",
+ u"\u211B": "\\mathscr{R}",
+ u"\u211C": "\\mathfrak{R}",
+ u"\u211D": "\\mathbb{R}",
+ u"\u211E": "\\Elzxrat ",
+ u"\u2122": "\\texttrademark ",
+ u"\u2124": "\\mathbb{Z}",
+ u"\u2126": "\\Omega ",
+ u"\u2127": "\\mho ",
+ u"\u2128": "\\mathfrak{Z}",
+ u"\u2129": "\\ElsevierGlyph{2129}",
+ u"\u212B": "\\AA ",
+ u"\u212C": "\\mathscr{B}",
+ u"\u212D": "\\mathfrak{C}",
+ u"\u212F": "\\mathscr{e}",
+ u"\u2130": "\\mathscr{E}",
+ u"\u2131": "\\mathscr{F}",
+ u"\u2133": "\\mathscr{M}",
+ u"\u2134": "\\mathscr{o}",
+ u"\u2135": "\\aleph ",
+ u"\u2136": "\\beth ",
+ u"\u2137": "\\gimel ",
+ u"\u2138": "\\daleth ",
+ u"\u2153": "\\textfrac{1}{3}",
+ u"\u2154": "\\textfrac{2}{3}",
+ u"\u2155": "\\textfrac{1}{5}",
+ u"\u2156": "\\textfrac{2}{5}",
+ u"\u2157": "\\textfrac{3}{5}",
+ u"\u2158": "\\textfrac{4}{5}",
+ u"\u2159": "\\textfrac{1}{6}",
+ u"\u215A": "\\textfrac{5}{6}",
+ u"\u215B": "\\textfrac{1}{8}",
+ u"\u215C": "\\textfrac{3}{8}",
+ u"\u215D": "\\textfrac{5}{8}",
+ u"\u215E": "\\textfrac{7}{8}",
+ u"\u2190": "\\leftarrow ",
+ u"\u2191": "\\uparrow ",
+ u"\u2192": "\\rightarrow ",
+ u"\u2193": "\\downarrow ",
+ u"\u2194": "\\leftrightarrow ",
+ u"\u2195": "\\updownarrow ",
+ u"\u2196": "\\nwarrow ",
+ u"\u2197": "\\nearrow ",
+ u"\u2198": "\\searrow ",
+ u"\u2199": "\\swarrow ",
+ u"\u219A": "\\nleftarrow ",
+ u"\u219B": "\\nrightarrow ",
+ u"\u219C": "\\arrowwaveright ",
+ u"\u219D": "\\arrowwaveright ",
+ u"\u219E": "\\twoheadleftarrow ",
+ u"\u21A0": "\\twoheadrightarrow ",
+ u"\u21A2": "\\leftarrowtail ",
+ u"\u21A3": "\\rightarrowtail ",
+ u"\u21A6": "\\mapsto ",
+ u"\u21A9": "\\hookleftarrow ",
+ u"\u21AA": "\\hookrightarrow ",
+ u"\u21AB": "\\looparrowleft ",
+ u"\u21AC": "\\looparrowright ",
+ u"\u21AD": "\\leftrightsquigarrow ",
+ u"\u21AE": "\\nleftrightarrow ",
+ u"\u21B0": "\\Lsh ",
+ u"\u21B1": "\\Rsh ",
+ u"\u21B3": "\\ElsevierGlyph{21B3}",
+ u"\u21B6": "\\curvearrowleft ",
+ u"\u21B7": "\\curvearrowright ",
+ u"\u21BA": "\\circlearrowleft ",
+ u"\u21BB": "\\circlearrowright ",
+ u"\u21BC": "\\leftharpoonup ",
+ u"\u21BD": "\\leftharpoondown ",
+ u"\u21BE": "\\upharpoonright ",
+ u"\u21BF": "\\upharpoonleft ",
+ u"\u21C0": "\\rightharpoonup ",
+ u"\u21C1": "\\rightharpoondown ",
+ u"\u21C2": "\\downharpoonright ",
+ u"\u21C3": "\\downharpoonleft ",
+ u"\u21C4": "\\rightleftarrows ",
+ u"\u21C5": "\\dblarrowupdown ",
+ u"\u21C6": "\\leftrightarrows ",
+ u"\u21C7": "\\leftleftarrows ",
+ u"\u21C8": "\\upuparrows ",
+ u"\u21C9": "\\rightrightarrows ",
+ u"\u21CA": "\\downdownarrows ",
+ u"\u21CB": "\\leftrightharpoons ",
+ u"\u21CC": "\\rightleftharpoons ",
+ u"\u21CD": "\\nLeftarrow ",
+ u"\u21CE": "\\nLeftrightarrow ",
+ u"\u21CF": "\\nRightarrow ",
+ u"\u21D0": "\\Leftarrow ",
+ u"\u21D1": "\\Uparrow ",
+ u"\u21D2": "\\Rightarrow ",
+ u"\u21D3": "\\Downarrow ",
+ u"\u21D4": "\\Leftrightarrow ",
+ u"\u21D5": "\\Updownarrow ",
+ u"\u21DA": "\\Lleftarrow ",
+ u"\u21DB": "\\Rrightarrow ",
+ u"\u21DD": "\\rightsquigarrow ",
+ u"\u21F5": "\\DownArrowUpArrow ",
+ u"\u2200": "\\forall ",
+ u"\u2201": "\\complement ",
+ u"\u2202": "\\partial ",
+ u"\u2203": "\\exists ",
+ u"\u2204": "\\nexists ",
+ u"\u2205": "\\varnothing ",
+ u"\u2207": "\\nabla ",
+ u"\u2208": "\\in ",
+ u"\u2209": "\\not\\in ",
+ u"\u220B": "\\ni ",
+ u"\u220C": "\\not\\ni ",
+ u"\u220F": "\\prod ",
+ u"\u2210": "\\coprod ",
+ u"\u2211": "\\sum ",
+ u"\u2213": "\\mp ",
+ u"\u2214": "\\dotplus ",
+ u"\u2216": "\\setminus ",
+ u"\u2217": "{_\\ast}",
+ u"\u2218": "\\circ ",
+ u"\u2219": "\\bullet ",
+ u"\u221A": "\\surd ",
+ u"\u221D": "\\propto ",
+ u"\u221E": "\\infty ",
+ u"\u221F": "\\rightangle ",
+ u"\u2220": "\\angle ",
+ u"\u2221": "\\measuredangle ",
+ u"\u2222": "\\sphericalangle ",
+ u"\u2223": "\\mid ",
+ u"\u2224": "\\nmid ",
+ u"\u2225": "\\parallel ",
+ u"\u2226": "\\nparallel ",
+ u"\u2227": "\\wedge ",
+ u"\u2228": "\\vee ",
+ u"\u2229": "\\cap ",
+ u"\u222A": "\\cup ",
+ u"\u222B": "\\int ",
+ u"\u222C": "\\int\\!\\int ",
+ u"\u222D": "\\int\\!\\int\\!\\int ",
+ u"\u222E": "\\oint ",
+ u"\u222F": "\\surfintegral ",
+ u"\u2230": "\\volintegral ",
+ u"\u2231": "\\clwintegral ",
+ u"\u2232": "\\ElsevierGlyph{2232}",
+ u"\u2233": "\\ElsevierGlyph{2233}",
+ u"\u2234": "\\therefore ",
+ u"\u2235": "\\because ",
+ u"\u2237": "\\Colon ",
+ u"\u2238": "\\ElsevierGlyph{2238}",
+ u"\u223A": "\\mathbin{{:}\\!\\!{-}\\!\\!{:}}",
+ u"\u223B": "\\homothetic ",
+ u"\u223C": "\\sim ",
+ u"\u223D": "\\backsim ",
+ u"\u223E": "\\lazysinv ",
+ u"\u2240": "\\wr ",
+ u"\u2241": "\\not\\sim ",
+ u"\u2242": "\\ElsevierGlyph{2242}",
+ u"\u2242-00338": "\\NotEqualTilde ",
+ u"\u2243": "\\simeq ",
+ u"\u2244": "\\not\\simeq ",
+ u"\u2245": "\\cong ",
+ u"\u2246": "\\approxnotequal ",
+ u"\u2247": "\\not\\cong ",
+ u"\u2248": "\\approx ",
+ u"\u2249": "\\not\\approx ",
+ u"\u224A": "\\approxeq ",
+ u"\u224B": "\\tildetrpl ",
+ u"\u224B-00338": "\\not\\apid ",
+ u"\u224C": "\\allequal ",
+ u"\u224D": "\\asymp ",
+ u"\u224E": "\\Bumpeq ",
+ u"\u224E-00338": "\\NotHumpDownHump ",
+ u"\u224F": "\\bumpeq ",
+ u"\u224F-00338": "\\NotHumpEqual ",
+ u"\u2250": "\\doteq ",
+ u"\u2250-00338": "\\not\\doteq",
+ u"\u2251": "\\doteqdot ",
+ u"\u2252": "\\fallingdotseq ",
+ u"\u2253": "\\risingdotseq ",
+ u"\u2254": ":=",
+ u"\u2255": "=:",
+ u"\u2256": "\\eqcirc ",
+ u"\u2257": "\\circeq ",
+ u"\u2259": "\\estimates ",
+ u"\u225A": "\\ElsevierGlyph{225A}",
+ u"\u225B": "\\starequal ",
+ u"\u225C": "\\triangleq ",
+ u"\u225F": "\\ElsevierGlyph{225F}",
+ u"\u2260": "\\not =",
+ u"\u2261": "\\equiv ",
+ u"\u2262": "\\not\\equiv ",
+ u"\u2264": "\\leq ",
+ u"\u2265": "\\geq ",
+ u"\u2266": "\\leqq ",
+ u"\u2267": "\\geqq ",
+ u"\u2268": "\\lneqq ",
+ u"\u2268-0FE00": "\\lvertneqq ",
+ u"\u2269": "\\gneqq ",
+ u"\u2269-0FE00": "\\gvertneqq ",
+ u"\u226A": "\\ll ",
+ u"\u226A-00338": "\\NotLessLess ",
+ u"\u226B": "\\gg ",
+ u"\u226B-00338": "\\NotGreaterGreater ",
+ u"\u226C": "\\between ",
+ u"\u226D": "\\not\\kern-0.3em\\times ",
+ u"\u226E": "\\not<",
+ u"\u226F": "\\not>",
+ u"\u2270": "\\not\\leq ",
+ u"\u2271": "\\not\\geq ",
+ u"\u2272": "\\lessequivlnt ",
+ u"\u2273": "\\greaterequivlnt ",
+ u"\u2274": "\\ElsevierGlyph{2274}",
+ u"\u2275": "\\ElsevierGlyph{2275}",
+ u"\u2276": "\\lessgtr ",
+ u"\u2277": "\\gtrless ",
+ u"\u2278": "\\notlessgreater ",
+ u"\u2279": "\\notgreaterless ",
+ u"\u227A": "\\prec ",
+ u"\u227B": "\\succ ",
+ u"\u227C": "\\preccurlyeq ",
+ u"\u227D": "\\succcurlyeq ",
+ u"\u227E": "\\precapprox ",
+ u"\u227E-00338": "\\NotPrecedesTilde ",
+ u"\u227F": "\\succapprox ",
+ u"\u227F-00338": "\\NotSucceedsTilde ",
+ u"\u2280": "\\not\\prec ",
+ u"\u2281": "\\not\\succ ",
+ u"\u2282": "\\subset ",
+ u"\u2283": "\\supset ",
+ u"\u2284": "\\not\\subset ",
+ u"\u2285": "\\not\\supset ",
+ u"\u2286": "\\subseteq ",
+ u"\u2287": "\\supseteq ",
+ u"\u2288": "\\not\\subseteq ",
+ u"\u2289": "\\not\\supseteq ",
+ u"\u228A": "\\subsetneq ",
+ u"\u228A-0FE00": "\\varsubsetneqq ",
+ u"\u228B": "\\supsetneq ",
+ u"\u228B-0FE00": "\\varsupsetneq ",
+ u"\u228E": "\\uplus ",
+ u"\u228F": "\\sqsubset ",
+ u"\u228F-00338": "\\NotSquareSubset ",
+ u"\u2290": "\\sqsupset ",
+ u"\u2290-00338": "\\NotSquareSuperset ",
+ u"\u2291": "\\sqsubseteq ",
+ u"\u2292": "\\sqsupseteq ",
+ u"\u2293": "\\sqcap ",
+ u"\u2294": "\\sqcup ",
+ u"\u2295": "\\oplus ",
+ u"\u2296": "\\ominus ",
+ u"\u2297": "\\otimes ",
+ u"\u2298": "\\oslash ",
+ u"\u2299": "\\odot ",
+ u"\u229A": "\\circledcirc ",
+ u"\u229B": "\\circledast ",
+ u"\u229D": "\\circleddash ",
+ u"\u229E": "\\boxplus ",
+ u"\u229F": "\\boxminus ",
+ u"\u22A0": "\\boxtimes ",
+ u"\u22A1": "\\boxdot ",
+ u"\u22A2": "\\vdash ",
+ u"\u22A3": "\\dashv ",
+ u"\u22A4": "\\top ",
+ u"\u22A5": "\\perp ",
+ u"\u22A7": "\\truestate ",
+ u"\u22A8": "\\forcesextra ",
+ u"\u22A9": "\\Vdash ",
+ u"\u22AA": "\\Vvdash ",
+ u"\u22AB": "\\VDash ",
+ u"\u22AC": "\\nvdash ",
+ u"\u22AD": "\\nvDash ",
+ u"\u22AE": "\\nVdash ",
+ u"\u22AF": "\\nVDash ",
+ u"\u22B2": "\\vartriangleleft ",
+ u"\u22B3": "\\vartriangleright ",
+ u"\u22B4": "\\trianglelefteq ",
+ u"\u22B5": "\\trianglerighteq ",
+ u"\u22B6": "\\original ",
+ u"\u22B7": "\\image ",
+ u"\u22B8": "\\multimap ",
+ u"\u22B9": "\\hermitconjmatrix ",
+ u"\u22BA": "\\intercal ",
+ u"\u22BB": "\\veebar ",
+ u"\u22BE": "\\rightanglearc ",
+ u"\u22C0": "\\ElsevierGlyph{22C0}",
+ u"\u22C1": "\\ElsevierGlyph{22C1}",
+ u"\u22C2": "\\bigcap ",
+ u"\u22C3": "\\bigcup ",
+ u"\u22C4": "\\diamond ",
+ u"\u22C5": "\\cdot ",
+ u"\u22C6": "\\star ",
+ u"\u22C7": "\\divideontimes ",
+ u"\u22C8": "\\bowtie ",
+ u"\u22C9": "\\ltimes ",
+ u"\u22CA": "\\rtimes ",
+ u"\u22CB": "\\leftthreetimes ",
+ u"\u22CC": "\\rightthreetimes ",
+ u"\u22CD": "\\backsimeq ",
+ u"\u22CE": "\\curlyvee ",
+ u"\u22CF": "\\curlywedge ",
+ u"\u22D0": "\\Subset ",
+ u"\u22D1": "\\Supset ",
+ u"\u22D2": "\\Cap ",
+ u"\u22D3": "\\Cup ",
+ u"\u22D4": "\\pitchfork ",
+ u"\u22D6": "\\lessdot ",
+ u"\u22D7": "\\gtrdot ",
+ u"\u22D8": "\\verymuchless ",
+ u"\u22D9": "\\verymuchgreater ",
+ u"\u22DA": "\\lesseqgtr ",
+ u"\u22DB": "\\gtreqless ",
+ u"\u22DE": "\\curlyeqprec ",
+ u"\u22DF": "\\curlyeqsucc ",
+ u"\u22E2": "\\not\\sqsubseteq ",
+ u"\u22E3": "\\not\\sqsupseteq ",
+ u"\u22E5": "\\Elzsqspne ",
+ u"\u22E6": "\\lnsim ",
+ u"\u22E7": "\\gnsim ",
+ u"\u22E8": "\\precedesnotsimilar ",
+ u"\u22E9": "\\succnsim ",
+ u"\u22EA": "\\ntriangleleft ",
+ u"\u22EB": "\\ntriangleright ",
+ u"\u22EC": "\\ntrianglelefteq ",
+ u"\u22ED": "\\ntrianglerighteq ",
+ u"\u22EE": "\\vdots ",
+ u"\u22EF": "\\cdots ",
+ u"\u22F0": "\\upslopeellipsis ",
+ u"\u22F1": "\\downslopeellipsis ",
+ u"\u2305": "\\barwedge ",
+ u"\u2306": "\\perspcorrespond ",
+ u"\u2308": "\\lceil ",
+ u"\u2309": "\\rceil ",
+ u"\u230A": "\\lfloor ",
+ u"\u230B": "\\rfloor ",
+ u"\u2315": "\\recorder ",
+ u"\u2316": "\\mathchar\"2208",
+ u"\u231C": "\\ulcorner ",
+ u"\u231D": "\\urcorner ",
+ u"\u231E": "\\llcorner ",
+ u"\u231F": "\\lrcorner ",
+ u"\u2322": "\\frown ",
+ u"\u2323": "\\smile ",
+ u"\u2329": "\\langle ",
+ u"\u232A": "\\rangle ",
+ u"\u233D": "\\ElsevierGlyph{E838}",
+ u"\u23A3": "\\Elzdlcorn ",
+ u"\u23B0": "\\lmoustache ",
+ u"\u23B1": "\\rmoustache ",
+ u"\u2423": "\\textvisiblespace ",
+ u"\u2460": "\\ding{172}",
+ u"\u2461": "\\ding{173}",
+ u"\u2462": "\\ding{174}",
+ u"\u2463": "\\ding{175}",
+ u"\u2464": "\\ding{176}",
+ u"\u2465": "\\ding{177}",
+ u"\u2466": "\\ding{178}",
+ u"\u2467": "\\ding{179}",
+ u"\u2468": "\\ding{180}",
+ u"\u2469": "\\ding{181}",
+ u"\u24C8": "\\circledS ",
+ u"\u2506": "\\Elzdshfnc ",
+ u"\u2519": "\\Elzsqfnw ",
+ u"\u2571": "\\diagup ",
+ u"\u25A0": "\\ding{110}",
+ u"\u25A1": "\\square ",
+ u"\u25AA": "\\blacksquare ",
+ u"\u25AD": "\\fbox{~~}",
+ u"\u25AF": "\\Elzvrecto ",
+ u"\u25B1": "\\ElsevierGlyph{E381}",
+ u"\u25B2": "\\ding{115}",
+ u"\u25B3": "\\bigtriangleup ",
+ u"\u25B4": "\\blacktriangle ",
+ u"\u25B5": "\\vartriangle ",
+ u"\u25B8": "\\blacktriangleright ",
+ u"\u25B9": "\\triangleright ",
+ u"\u25BC": "\\ding{116}",
+ u"\u25BD": "\\bigtriangledown ",
+ u"\u25BE": "\\blacktriangledown ",
+ u"\u25BF": "\\triangledown ",
+ u"\u25C2": "\\blacktriangleleft ",
+ u"\u25C3": "\\triangleleft ",
+ u"\u25C6": "\\ding{117}",
+ u"\u25CA": "\\lozenge ",
+ u"\u25CB": "\\bigcirc ",
+ u"\u25CF": "\\ding{108}",
+ u"\u25D0": "\\Elzcirfl ",
+ u"\u25D1": "\\Elzcirfr ",
+ u"\u25D2": "\\Elzcirfb ",
+ u"\u25D7": "\\ding{119}",
+ u"\u25D8": "\\Elzrvbull ",
+ u"\u25E7": "\\Elzsqfl ",
+ u"\u25E8": "\\Elzsqfr ",
+ u"\u25EA": "\\Elzsqfse ",
+ u"\u25EF": "\\bigcirc ",
+ u"\u2605": "\\ding{72}",
+ u"\u2606": "\\ding{73}",
+ u"\u260E": "\\ding{37}",
+ u"\u261B": "\\ding{42}",
+ u"\u261E": "\\ding{43}",
+ u"\u263E": "\\rightmoon ",
+ u"\u263F": "\\mercury ",
+ u"\u2640": "\\venus ",
+ u"\u2642": "\\male ",
+ u"\u2643": "\\jupiter ",
+ u"\u2644": "\\saturn ",
+ u"\u2645": "\\uranus ",
+ u"\u2646": "\\neptune ",
+ u"\u2647": "\\pluto ",
+ u"\u2648": "\\aries ",
+ u"\u2649": "\\taurus ",
+ u"\u264A": "\\gemini ",
+ u"\u264B": "\\cancer ",
+ u"\u264C": "\\leo ",
+ u"\u264D": "\\virgo ",
+ u"\u264E": "\\libra ",
+ u"\u264F": "\\scorpio ",
+ u"\u2650": "\\sagittarius ",
+ u"\u2651": "\\capricornus ",
+ u"\u2652": "\\aquarius ",
+ u"\u2653": "\\pisces ",
+ u"\u2660": "\\ding{171}",
+ u"\u2662": "\\diamond ",
+ u"\u2663": "\\ding{168}",
+ u"\u2665": "\\ding{170}",
+ u"\u2666": "\\ding{169}",
+ u"\u2669": "\\quarternote ",
+ u"\u266A": "\\eighthnote ",
+ u"\u266D": "\\flat ",
+ u"\u266E": "\\natural ",
+ u"\u266F": "\\sharp ",
+ u"\u2701": "\\ding{33}",
+ u"\u2702": "\\ding{34}",
+ u"\u2703": "\\ding{35}",
+ u"\u2704": "\\ding{36}",
+ u"\u2706": "\\ding{38}",
+ u"\u2707": "\\ding{39}",
+ u"\u2708": "\\ding{40}",
+ u"\u2709": "\\ding{41}",
+ u"\u270C": "\\ding{44}",
+ u"\u270D": "\\ding{45}",
+ u"\u270E": "\\ding{46}",
+ u"\u270F": "\\ding{47}",
+ u"\u2710": "\\ding{48}",
+ u"\u2711": "\\ding{49}",
+ u"\u2712": "\\ding{50}",
+ u"\u2713": "\\ding{51}",
+ u"\u2714": "\\ding{52}",
+ u"\u2715": "\\ding{53}",
+ u"\u2716": "\\ding{54}",
+ u"\u2717": "\\ding{55}",
+ u"\u2718": "\\ding{56}",
+ u"\u2719": "\\ding{57}",
+ u"\u271A": "\\ding{58}",
+ u"\u271B": "\\ding{59}",
+ u"\u271C": "\\ding{60}",
+ u"\u271D": "\\ding{61}",
+ u"\u271E": "\\ding{62}",
+ u"\u271F": "\\ding{63}",
+ u"\u2720": "\\ding{64}",
+ u"\u2721": "\\ding{65}",
+ u"\u2722": "\\ding{66}",
+ u"\u2723": "\\ding{67}",
+ u"\u2724": "\\ding{68}",
+ u"\u2725": "\\ding{69}",
+ u"\u2726": "\\ding{70}",
+ u"\u2727": "\\ding{71}",
+ u"\u2729": "\\ding{73}",
+ u"\u272A": "\\ding{74}",
+ u"\u272B": "\\ding{75}",
+ u"\u272C": "\\ding{76}",
+ u"\u272D": "\\ding{77}",
+ u"\u272E": "\\ding{78}",
+ u"\u272F": "\\ding{79}",
+ u"\u2730": "\\ding{80}",
+ u"\u2731": "\\ding{81}",
+ u"\u2732": "\\ding{82}",
+ u"\u2733": "\\ding{83}",
+ u"\u2734": "\\ding{84}",
+ u"\u2735": "\\ding{85}",
+ u"\u2736": "\\ding{86}",
+ u"\u2737": "\\ding{87}",
+ u"\u2738": "\\ding{88}",
+ u"\u2739": "\\ding{89}",
+ u"\u273A": "\\ding{90}",
+ u"\u273B": "\\ding{91}",
+ u"\u273C": "\\ding{92}",
+ u"\u273D": "\\ding{93}",
+ u"\u273E": "\\ding{94}",
+ u"\u273F": "\\ding{95}",
+ u"\u2740": "\\ding{96}",
+ u"\u2741": "\\ding{97}",
+ u"\u2742": "\\ding{98}",
+ u"\u2743": "\\ding{99}",
+ u"\u2744": "\\ding{100}",
+ u"\u2745": "\\ding{101}",
+ u"\u2746": "\\ding{102}",
+ u"\u2747": "\\ding{103}",
+ u"\u2748": "\\ding{104}",
+ u"\u2749": "\\ding{105}",
+ u"\u274A": "\\ding{106}",
+ u"\u274B": "\\ding{107}",
+ u"\u274D": "\\ding{109}",
+ u"\u274F": "\\ding{111}",
+ u"\u2750": "\\ding{112}",
+ u"\u2751": "\\ding{113}",
+ u"\u2752": "\\ding{114}",
+ u"\u2756": "\\ding{118}",
+ u"\u2758": "\\ding{120}",
+ u"\u2759": "\\ding{121}",
+ u"\u275A": "\\ding{122}",
+ u"\u275B": "\\ding{123}",
+ u"\u275C": "\\ding{124}",
+ u"\u275D": "\\ding{125}",
+ u"\u275E": "\\ding{126}",
+ u"\u2761": "\\ding{161}",
+ u"\u2762": "\\ding{162}",
+ u"\u2763": "\\ding{163}",
+ u"\u2764": "\\ding{164}",
+ u"\u2765": "\\ding{165}",
+ u"\u2766": "\\ding{166}",
+ u"\u2767": "\\ding{167}",
+ u"\u2776": "\\ding{182}",
+ u"\u2777": "\\ding{183}",
+ u"\u2778": "\\ding{184}",
+ u"\u2779": "\\ding{185}",
+ u"\u277A": "\\ding{186}",
+ u"\u277B": "\\ding{187}",
+ u"\u277C": "\\ding{188}",
+ u"\u277D": "\\ding{189}",
+ u"\u277E": "\\ding{190}",
+ u"\u277F": "\\ding{191}",
+ u"\u2780": "\\ding{192}",
+ u"\u2781": "\\ding{193}",
+ u"\u2782": "\\ding{194}",
+ u"\u2783": "\\ding{195}",
+ u"\u2784": "\\ding{196}",
+ u"\u2785": "\\ding{197}",
+ u"\u2786": "\\ding{198}",
+ u"\u2787": "\\ding{199}",
+ u"\u2788": "\\ding{200}",
+ u"\u2789": "\\ding{201}",
+ u"\u278A": "\\ding{202}",
+ u"\u278B": "\\ding{203}",
+ u"\u278C": "\\ding{204}",
+ u"\u278D": "\\ding{205}",
+ u"\u278E": "\\ding{206}",
+ u"\u278F": "\\ding{207}",
+ u"\u2790": "\\ding{208}",
+ u"\u2791": "\\ding{209}",
+ u"\u2792": "\\ding{210}",
+ u"\u2793": "\\ding{211}",
+ u"\u2794": "\\ding{212}",
+ u"\u2798": "\\ding{216}",
+ u"\u2799": "\\ding{217}",
+ u"\u279A": "\\ding{218}",
+ u"\u279B": "\\ding{219}",
+ u"\u279C": "\\ding{220}",
+ u"\u279D": "\\ding{221}",
+ u"\u279E": "\\ding{222}",
+ u"\u279F": "\\ding{223}",
+ u"\u27A0": "\\ding{224}",
+ u"\u27A1": "\\ding{225}",
+ u"\u27A2": "\\ding{226}",
+ u"\u27A3": "\\ding{227}",
+ u"\u27A4": "\\ding{228}",
+ u"\u27A5": "\\ding{229}",
+ u"\u27A6": "\\ding{230}",
+ u"\u27A7": "\\ding{231}",
+ u"\u27A8": "\\ding{232}",
+ u"\u27A9": "\\ding{233}",
+ u"\u27AA": "\\ding{234}",
+ u"\u27AB": "\\ding{235}",
+ u"\u27AC": "\\ding{236}",
+ u"\u27AD": "\\ding{237}",
+ u"\u27AE": "\\ding{238}",
+ u"\u27AF": "\\ding{239}",
+ u"\u27B1": "\\ding{241}",
+ u"\u27B2": "\\ding{242}",
+ u"\u27B3": "\\ding{243}",
+ u"\u27B4": "\\ding{244}",
+ u"\u27B5": "\\ding{245}",
+ u"\u27B6": "\\ding{246}",
+ u"\u27B7": "\\ding{247}",
+ u"\u27B8": "\\ding{248}",
+ u"\u27B9": "\\ding{249}",
+ u"\u27BA": "\\ding{250}",
+ u"\u27BB": "\\ding{251}",
+ u"\u27BC": "\\ding{252}",
+ u"\u27BD": "\\ding{253}",
+ u"\u27BE": "\\ding{254}",
+ u"\u27F5": "\\longleftarrow ",
+ u"\u27F6": "\\longrightarrow ",
+ u"\u27F7": "\\longleftrightarrow ",
+ u"\u27F8": "\\Longleftarrow ",
+ u"\u27F9": "\\Longrightarrow ",
+ u"\u27FA": "\\Longleftrightarrow ",
+ u"\u27FC": "\\longmapsto ",
+ u"\u27FF": "\\sim\\joinrel\\leadsto",
+ u"\u2905": "\\ElsevierGlyph{E212}",
+ u"\u2912": "\\UpArrowBar ",
+ u"\u2913": "\\DownArrowBar ",
+ u"\u2923": "\\ElsevierGlyph{E20C}",
+ u"\u2924": "\\ElsevierGlyph{E20D}",
+ u"\u2925": "\\ElsevierGlyph{E20B}",
+ u"\u2926": "\\ElsevierGlyph{E20A}",
+ u"\u2927": "\\ElsevierGlyph{E211}",
+ u"\u2928": "\\ElsevierGlyph{E20E}",
+ u"\u2929": "\\ElsevierGlyph{E20F}",
+ u"\u292A": "\\ElsevierGlyph{E210}",
+ u"\u2933": "\\ElsevierGlyph{E21C}",
+ u"\u2933-00338": "\\ElsevierGlyph{E21D}",
+ u"\u2936": "\\ElsevierGlyph{E21A}",
+ u"\u2937": "\\ElsevierGlyph{E219}",
+ u"\u2940": "\\Elolarr ",
+ u"\u2941": "\\Elorarr ",
+ u"\u2942": "\\ElzRlarr ",
+ u"\u2944": "\\ElzrLarr ",
+ u"\u2947": "\\Elzrarrx ",
+ u"\u294E": "\\LeftRightVector ",
+ u"\u294F": "\\RightUpDownVector ",
+ u"\u2950": "\\DownLeftRightVector ",
+ u"\u2951": "\\LeftUpDownVector ",
+ u"\u2952": "\\LeftVectorBar ",
+ u"\u2953": "\\RightVectorBar ",
+ u"\u2954": "\\RightUpVectorBar ",
+ u"\u2955": "\\RightDownVectorBar ",
+ u"\u2956": "\\DownLeftVectorBar ",
+ u"\u2957": "\\DownRightVectorBar ",
+ u"\u2958": "\\LeftUpVectorBar ",
+ u"\u2959": "\\LeftDownVectorBar ",
+ u"\u295A": "\\LeftTeeVector ",
+ u"\u295B": "\\RightTeeVector ",
+ u"\u295C": "\\RightUpTeeVector ",
+ u"\u295D": "\\RightDownTeeVector ",
+ u"\u295E": "\\DownLeftTeeVector ",
+ u"\u295F": "\\DownRightTeeVector ",
+ u"\u2960": "\\LeftUpTeeVector ",
+ u"\u2961": "\\LeftDownTeeVector ",
+ u"\u296E": "\\UpEquilibrium ",
+ u"\u296F": "\\ReverseUpEquilibrium ",
+ u"\u2970": "\\RoundImplies ",
+ u"\u297C": "\\ElsevierGlyph{E214}",
+ u"\u297D": "\\ElsevierGlyph{E215}",
+ u"\u2980": "\\Elztfnc ",
+ u"\u2985": "\\ElsevierGlyph{3018}",
+ u"\u2986": "\\Elroang ",
+ u"\u2993": "<\\kern-0.58em(",
+ u"\u2994": "\\ElsevierGlyph{E291}",
+ u"\u2999": "\\Elzddfnc ",
+ u"\u299C": "\\Angle ",
+ u"\u29A0": "\\Elzlpargt ",
+ u"\u29B5": "\\ElsevierGlyph{E260}",
+ u"\u29B6": "\\ElsevierGlyph{E61B}",
+ u"\u29CA": "\\ElzLap ",
+ u"\u29CB": "\\Elzdefas ",
+ u"\u29CF": "\\LeftTriangleBar ",
+ u"\u29CF-00338": "\\NotLeftTriangleBar ",
+ u"\u29D0": "\\RightTriangleBar ",
+ u"\u29D0-00338": "\\NotRightTriangleBar ",
+ u"\u29DC": "\\ElsevierGlyph{E372}",
+ u"\u29EB": "\\blacklozenge ",
+ u"\u29F4": "\\RuleDelayed ",
+ u"\u2A04": "\\Elxuplus ",
+ u"\u2A05": "\\ElzThr ",
+ u"\u2A06": "\\Elxsqcup ",
+ u"\u2A07": "\\ElzInf ",
+ u"\u2A08": "\\ElzSup ",
+ u"\u2A0D": "\\ElzCint ",
+ u"\u2A0F": "\\clockoint ",
+ u"\u2A10": "\\ElsevierGlyph{E395}",
+ u"\u2A16": "\\sqrint ",
+ u"\u2A25": "\\ElsevierGlyph{E25A}",
+ u"\u2A2A": "\\ElsevierGlyph{E25B}",
+ u"\u2A2D": "\\ElsevierGlyph{E25C}",
+ u"\u2A2E": "\\ElsevierGlyph{E25D}",
+ u"\u2A2F": "\\ElzTimes ",
+ u"\u2A34": "\\ElsevierGlyph{E25E}",
+ u"\u2A35": "\\ElsevierGlyph{E25E}",
+ u"\u2A3C": "\\ElsevierGlyph{E259}",
+ u"\u2A3F": "\\amalg ",
+ u"\u2A53": "\\ElzAnd ",
+ u"\u2A54": "\\ElzOr ",
+ u"\u2A55": "\\ElsevierGlyph{E36E}",
+ u"\u2A56": "\\ElOr ",
+ u"\u2A5E": "\\perspcorrespond ",
+ u"\u2A5F": "\\Elzminhat ",
+ u"\u2A63": "\\ElsevierGlyph{225A}",
+ u"\u2A6E": "\\stackrel{*}{=}",
+ u"\u2A75": "\\Equal ",
+ u"\u2A7D": "\\leqslant ",
+ u"\u2A7D-00338": "\\nleqslant ",
+ u"\u2A7E": "\\geqslant ",
+ u"\u2A7E-00338": "\\ngeqslant ",
+ u"\u2A85": "\\lessapprox ",
+ u"\u2A86": "\\gtrapprox ",
+ u"\u2A87": "\\lneq ",
+ u"\u2A88": "\\gneq ",
+ u"\u2A89": "\\lnapprox ",
+ u"\u2A8A": "\\gnapprox ",
+ u"\u2A8B": "\\lesseqqgtr ",
+ u"\u2A8C": "\\gtreqqless ",
+ u"\u2A95": "\\eqslantless ",
+ u"\u2A96": "\\eqslantgtr ",
+ u"\u2A9D": "\\Pisymbol{ppi020}{117}",
+ u"\u2A9E": "\\Pisymbol{ppi020}{105}",
+ u"\u2AA1": "\\NestedLessLess ",
+ u"\u2AA1-00338": "\\NotNestedLessLess ",
+ u"\u2AA2": "\\NestedGreaterGreater ",
+ u"\u2AA2-00338": "\\NotNestedGreaterGreater ",
+ u"\u2AAF": "\\preceq ",
+ u"\u2AAF-00338": "\\not\\preceq ",
+ u"\u2AB0": "\\succeq ",
+ u"\u2AB0-00338": "\\not\\succeq ",
+ u"\u2AB5": "\\precneqq ",
+ u"\u2AB6": "\\succneqq ",
+ u"\u2AB7": "\\precapprox ",
+ u"\u2AB8": "\\succapprox ",
+ u"\u2AB9": "\\precnapprox ",
+ u"\u2ABA": "\\succnapprox ",
+ u"\u2AC5": "\\subseteqq ",
+ u"\u2AC5-00338": "\\nsubseteqq ",
+ u"\u2AC6": "\\supseteqq ",
+ u"\u2AC6-00338": "\\nsupseteqq",
+ u"\u2ACB": "\\subsetneqq ",
+ u"\u2ACC": "\\supsetneqq ",
+ u"\u2AEB": "\\ElsevierGlyph{E30D}",
+ u"\u2AF6": "\\Elztdcol ",
+ u"\u2AFD": "{{/}\\!\\!{/}}",
+ u"\u2AFD-020E5": "{\\rlap{\\textbackslash}{{/}\\!\\!{/}}}",
+ u"\u300A": "\\ElsevierGlyph{300A}",
+ u"\u300B": "\\ElsevierGlyph{300B}",
+ u"\u3018": "\\ElsevierGlyph{3018}",
+ u"\u3019": "\\ElsevierGlyph{3019}",
+ u"\u301A": "\\openbracketleft ",
+ u"\u301B": "\\openbracketright ",
+ u"\uFB00": "ff",
+ u"\uFB01": "fi",
+ u"\uFB02": "fl",
+ u"\uFB03": "ffi",
+ u"\uFB04": "ffl",
+ u"\uD400": "\\mathbf{A}",
+ u"\uD401": "\\mathbf{B}",
+ u"\uD402": "\\mathbf{C}",
+ u"\uD403": "\\mathbf{D}",
+ u"\uD404": "\\mathbf{E}",
+ u"\uD405": "\\mathbf{F}",
+ u"\uD406": "\\mathbf{G}",
+ u"\uD407": "\\mathbf{H}",
+ u"\uD408": "\\mathbf{I}",
+ u"\uD409": "\\mathbf{J}",
+ u"\uD40A": "\\mathbf{K}",
+ u"\uD40B": "\\mathbf{L}",
+ u"\uD40C": "\\mathbf{M}",
+ u"\uD40D": "\\mathbf{N}",
+ u"\uD40E": "\\mathbf{O}",
+ u"\uD40F": "\\mathbf{P}",
+ u"\uD410": "\\mathbf{Q}",
+ u"\uD411": "\\mathbf{R}",
+ u"\uD412": "\\mathbf{S}",
+ u"\uD413": "\\mathbf{T}",
+ u"\uD414": "\\mathbf{U}",
+ u"\uD415": "\\mathbf{V}",
+ u"\uD416": "\\mathbf{W}",
+ u"\uD417": "\\mathbf{X}",
+ u"\uD418": "\\mathbf{Y}",
+ u"\uD419": "\\mathbf{Z}",
+ u"\uD41A": "\\mathbf{a}",
+ u"\uD41B": "\\mathbf{b}",
+ u"\uD41C": "\\mathbf{c}",
+ u"\uD41D": "\\mathbf{d}",
+ u"\uD41E": "\\mathbf{e}",
+ u"\uD41F": "\\mathbf{f}",
+ u"\uD420": "\\mathbf{g}",
+ u"\uD421": "\\mathbf{h}",
+ u"\uD422": "\\mathbf{i}",
+ u"\uD423": "\\mathbf{j}",
+ u"\uD424": "\\mathbf{k}",
+ u"\uD425": "\\mathbf{l}",
+ u"\uD426": "\\mathbf{m}",
+ u"\uD427": "\\mathbf{n}",
+ u"\uD428": "\\mathbf{o}",
+ u"\uD429": "\\mathbf{p}",
+ u"\uD42A": "\\mathbf{q}",
+ u"\uD42B": "\\mathbf{r}",
+ u"\uD42C": "\\mathbf{s}",
+ u"\uD42D": "\\mathbf{t}",
+ u"\uD42E": "\\mathbf{u}",
+ u"\uD42F": "\\mathbf{v}",
+ u"\uD430": "\\mathbf{w}",
+ u"\uD431": "\\mathbf{x}",
+ u"\uD432": "\\mathbf{y}",
+ u"\uD433": "\\mathbf{z}",
+ u"\uD434": "\\mathsl{A}",
+ u"\uD435": "\\mathsl{B}",
+ u"\uD436": "\\mathsl{C}",
+ u"\uD437": "\\mathsl{D}",
+ u"\uD438": "\\mathsl{E}",
+ u"\uD439": "\\mathsl{F}",
+ u"\uD43A": "\\mathsl{G}",
+ u"\uD43B": "\\mathsl{H}",
+ u"\uD43C": "\\mathsl{I}",
+ u"\uD43D": "\\mathsl{J}",
+ u"\uD43E": "\\mathsl{K}",
+ u"\uD43F": "\\mathsl{L}",
+ u"\uD440": "\\mathsl{M}",
+ u"\uD441": "\\mathsl{N}",
+ u"\uD442": "\\mathsl{O}",
+ u"\uD443": "\\mathsl{P}",
+ u"\uD444": "\\mathsl{Q}",
+ u"\uD445": "\\mathsl{R}",
+ u"\uD446": "\\mathsl{S}",
+ u"\uD447": "\\mathsl{T}",
+ u"\uD448": "\\mathsl{U}",
+ u"\uD449": "\\mathsl{V}",
+ u"\uD44A": "\\mathsl{W}",
+ u"\uD44B": "\\mathsl{X}",
+ u"\uD44C": "\\mathsl{Y}",
+ u"\uD44D": "\\mathsl{Z}",
+ u"\uD44E": "\\mathsl{a}",
+ u"\uD44F": "\\mathsl{b}",
+ u"\uD450": "\\mathsl{c}",
+ u"\uD451": "\\mathsl{d}",
+ u"\uD452": "\\mathsl{e}",
+ u"\uD453": "\\mathsl{f}",
+ u"\uD454": "\\mathsl{g}",
+ u"\uD456": "\\mathsl{i}",
+ u"\uD457": "\\mathsl{j}",
+ u"\uD458": "\\mathsl{k}",
+ u"\uD459": "\\mathsl{l}",
+ u"\uD45A": "\\mathsl{m}",
+ u"\uD45B": "\\mathsl{n}",
+ u"\uD45C": "\\mathsl{o}",
+ u"\uD45D": "\\mathsl{p}",
+ u"\uD45E": "\\mathsl{q}",
+ u"\uD45F": "\\mathsl{r}",
+ u"\uD460": "\\mathsl{s}",
+ u"\uD461": "\\mathsl{t}",
+ u"\uD462": "\\mathsl{u}",
+ u"\uD463": "\\mathsl{v}",
+ u"\uD464": "\\mathsl{w}",
+ u"\uD465": "\\mathsl{x}",
+ u"\uD466": "\\mathsl{y}",
+ u"\uD467": "\\mathsl{z}",
+ u"\uD468": "\\mathbit{A}",
+ u"\uD469": "\\mathbit{B}",
+ u"\uD46A": "\\mathbit{C}",
+ u"\uD46B": "\\mathbit{D}",
+ u"\uD46C": "\\mathbit{E}",
+ u"\uD46D": "\\mathbit{F}",
+ u"\uD46E": "\\mathbit{G}",
+ u"\uD46F": "\\mathbit{H}",
+ u"\uD470": "\\mathbit{I}",
+ u"\uD471": "\\mathbit{J}",
+ u"\uD472": "\\mathbit{K}",
+ u"\uD473": "\\mathbit{L}",
+ u"\uD474": "\\mathbit{M}",
+ u"\uD475": "\\mathbit{N}",
+ u"\uD476": "\\mathbit{O}",
+ u"\uD477": "\\mathbit{P}",
+ u"\uD478": "\\mathbit{Q}",
+ u"\uD479": "\\mathbit{R}",
+ u"\uD47A": "\\mathbit{S}",
+ u"\uD47B": "\\mathbit{T}",
+ u"\uD47C": "\\mathbit{U}",
+ u"\uD47D": "\\mathbit{V}",
+ u"\uD47E": "\\mathbit{W}",
+ u"\uD47F": "\\mathbit{X}",
+ u"\uD480": "\\mathbit{Y}",
+ u"\uD481": "\\mathbit{Z}",
+ u"\uD482": "\\mathbit{a}",
+ u"\uD483": "\\mathbit{b}",
+ u"\uD484": "\\mathbit{c}",
+ u"\uD485": "\\mathbit{d}",
+ u"\uD486": "\\mathbit{e}",
+ u"\uD487": "\\mathbit{f}",
+ u"\uD488": "\\mathbit{g}",
+ u"\uD489": "\\mathbit{h}",
+ u"\uD48A": "\\mathbit{i}",
+ u"\uD48B": "\\mathbit{j}",
+ u"\uD48C": "\\mathbit{k}",
+ u"\uD48D": "\\mathbit{l}",
+ u"\uD48E": "\\mathbit{m}",
+ u"\uD48F": "\\mathbit{n}",
+ u"\uD490": "\\mathbit{o}",
+ u"\uD491": "\\mathbit{p}",
+ u"\uD492": "\\mathbit{q}",
+ u"\uD493": "\\mathbit{r}",
+ u"\uD494": "\\mathbit{s}",
+ u"\uD495": "\\mathbit{t}",
+ u"\uD496": "\\mathbit{u}",
+ u"\uD497": "\\mathbit{v}",
+ u"\uD498": "\\mathbit{w}",
+ u"\uD499": "\\mathbit{x}",
+ u"\uD49A": "\\mathbit{y}",
+ u"\uD49B": "\\mathbit{z}",
+ u"\uD49C": "\\mathscr{A}",
+ u"\uD49E": "\\mathscr{C}",
+ u"\uD49F": "\\mathscr{D}",
+ u"\uD4A2": "\\mathscr{G}",
+ u"\uD4A5": "\\mathscr{J}",
+ u"\uD4A6": "\\mathscr{K}",
+ u"\uD4A9": "\\mathscr{N}",
+ u"\uD4AA": "\\mathscr{O}",
+ u"\uD4AB": "\\mathscr{P}",
+ u"\uD4AC": "\\mathscr{Q}",
+ u"\uD4AE": "\\mathscr{S}",
+ u"\uD4AF": "\\mathscr{T}",
+ u"\uD4B0": "\\mathscr{U}",
+ u"\uD4B1": "\\mathscr{V}",
+ u"\uD4B2": "\\mathscr{W}",
+ u"\uD4B3": "\\mathscr{X}",
+ u"\uD4B4": "\\mathscr{Y}",
+ u"\uD4B5": "\\mathscr{Z}",
+ u"\uD4B6": "\\mathscr{a}",
+ u"\uD4B7": "\\mathscr{b}",
+ u"\uD4B8": "\\mathscr{c}",
+ u"\uD4B9": "\\mathscr{d}",
+ u"\uD4BB": "\\mathscr{f}",
+ u"\uD4BD": "\\mathscr{h}",
+ u"\uD4BE": "\\mathscr{i}",
+ u"\uD4BF": "\\mathscr{j}",
+ u"\uD4C0": "\\mathscr{k}",
+ u"\uD4C1": "\\mathscr{l}",
+ u"\uD4C2": "\\mathscr{m}",
+ u"\uD4C3": "\\mathscr{n}",
+ u"\uD4C5": "\\mathscr{p}",
+ u"\uD4C6": "\\mathscr{q}",
+ u"\uD4C7": "\\mathscr{r}",
+ u"\uD4C8": "\\mathscr{s}",
+ u"\uD4C9": "\\mathscr{t}",
+ u"\uD4CA": "\\mathscr{u}",
+ u"\uD4CB": "\\mathscr{v}",
+ u"\uD4CC": "\\mathscr{w}",
+ u"\uD4CD": "\\mathscr{x}",
+ u"\uD4CE": "\\mathscr{y}",
+ u"\uD4CF": "\\mathscr{z}",
+ u"\uD4D0": "\\mathmit{A}",
+ u"\uD4D1": "\\mathmit{B}",
+ u"\uD4D2": "\\mathmit{C}",
+ u"\uD4D3": "\\mathmit{D}",
+ u"\uD4D4": "\\mathmit{E}",
+ u"\uD4D5": "\\mathmit{F}",
+ u"\uD4D6": "\\mathmit{G}",
+ u"\uD4D7": "\\mathmit{H}",
+ u"\uD4D8": "\\mathmit{I}",
+ u"\uD4D9": "\\mathmit{J}",
+ u"\uD4DA": "\\mathmit{K}",
+ u"\uD4DB": "\\mathmit{L}",
+ u"\uD4DC": "\\mathmit{M}",
+ u"\uD4DD": "\\mathmit{N}",
+ u"\uD4DE": "\\mathmit{O}",
+ u"\uD4DF": "\\mathmit{P}",
+ u"\uD4E0": "\\mathmit{Q}",
+ u"\uD4E1": "\\mathmit{R}",
+ u"\uD4E2": "\\mathmit{S}",
+ u"\uD4E3": "\\mathmit{T}",
+ u"\uD4E4": "\\mathmit{U}",
+ u"\uD4E5": "\\mathmit{V}",
+ u"\uD4E6": "\\mathmit{W}",
+ u"\uD4E7": "\\mathmit{X}",
+ u"\uD4E8": "\\mathmit{Y}",
+ u"\uD4E9": "\\mathmit{Z}",
+ u"\uD4EA": "\\mathmit{a}",
+ u"\uD4EB": "\\mathmit{b}",
+ u"\uD4EC": "\\mathmit{c}",
+ u"\uD4ED": "\\mathmit{d}",
+ u"\uD4EE": "\\mathmit{e}",
+ u"\uD4EF": "\\mathmit{f}",
+ u"\uD4F0": "\\mathmit{g}",
+ u"\uD4F1": "\\mathmit{h}",
+ u"\uD4F2": "\\mathmit{i}",
+ u"\uD4F3": "\\mathmit{j}",
+ u"\uD4F4": "\\mathmit{k}",
+ u"\uD4F5": "\\mathmit{l}",
+ u"\uD4F6": "\\mathmit{m}",
+ u"\uD4F7": "\\mathmit{n}",
+ u"\uD4F8": "\\mathmit{o}",
+ u"\uD4F9": "\\mathmit{p}",
+ u"\uD4FA": "\\mathmit{q}",
+ u"\uD4FB": "\\mathmit{r}",
+ u"\uD4FC": "\\mathmit{s}",
+ u"\uD4FD": "\\mathmit{t}",
+ u"\uD4FE": "\\mathmit{u}",
+ u"\uD4FF": "\\mathmit{v}",
+ u"\uD500": "\\mathmit{w}",
+ u"\uD501": "\\mathmit{x}",
+ u"\uD502": "\\mathmit{y}",
+ u"\uD503": "\\mathmit{z}",
+ u"\uD504": "\\mathfrak{A}",
+ u"\uD505": "\\mathfrak{B}",
+ u"\uD507": "\\mathfrak{D}",
+ u"\uD508": "\\mathfrak{E}",
+ u"\uD509": "\\mathfrak{F}",
+ u"\uD50A": "\\mathfrak{G}",
+ u"\uD50D": "\\mathfrak{J}",
+ u"\uD50E": "\\mathfrak{K}",
+ u"\uD50F": "\\mathfrak{L}",
+ u"\uD510": "\\mathfrak{M}",
+ u"\uD511": "\\mathfrak{N}",
+ u"\uD512": "\\mathfrak{O}",
+ u"\uD513": "\\mathfrak{P}",
+ u"\uD514": "\\mathfrak{Q}",
+ u"\uD516": "\\mathfrak{S}",
+ u"\uD517": "\\mathfrak{T}",
+ u"\uD518": "\\mathfrak{U}",
+ u"\uD519": "\\mathfrak{V}",
+ u"\uD51A": "\\mathfrak{W}",
+ u"\uD51B": "\\mathfrak{X}",
+ u"\uD51C": "\\mathfrak{Y}",
+ u"\uD51E": "\\mathfrak{a}",
+ u"\uD51F": "\\mathfrak{b}",
+ u"\uD520": "\\mathfrak{c}",
+ u"\uD521": "\\mathfrak{d}",
+ u"\uD522": "\\mathfrak{e}",
+ u"\uD523": "\\mathfrak{f}",
+ u"\uD524": "\\mathfrak{g}",
+ u"\uD525": "\\mathfrak{h}",
+ u"\uD526": "\\mathfrak{i}",
+ u"\uD527": "\\mathfrak{j}",
+ u"\uD528": "\\mathfrak{k}",
+ u"\uD529": "\\mathfrak{l}",
+ u"\uD52A": "\\mathfrak{m}",
+ u"\uD52B": "\\mathfrak{n}",
+ u"\uD52C": "\\mathfrak{o}",
+ u"\uD52D": "\\mathfrak{p}",
+ u"\uD52E": "\\mathfrak{q}",
+ u"\uD52F": "\\mathfrak{r}",
+ u"\uD530": "\\mathfrak{s}",
+ u"\uD531": "\\mathfrak{t}",
+ u"\uD532": "\\mathfrak{u}",
+ u"\uD533": "\\mathfrak{v}",
+ u"\uD534": "\\mathfrak{w}",
+ u"\uD535": "\\mathfrak{x}",
+ u"\uD536": "\\mathfrak{y}",
+ u"\uD537": "\\mathfrak{z}",
+ u"\uD538": "\\mathbb{A}",
+ u"\uD539": "\\mathbb{B}",
+ u"\uD53B": "\\mathbb{D}",
+ u"\uD53C": "\\mathbb{E}",
+ u"\uD53D": "\\mathbb{F}",
+ u"\uD53E": "\\mathbb{G}",
+ u"\uD540": "\\mathbb{I}",
+ u"\uD541": "\\mathbb{J}",
+ u"\uD542": "\\mathbb{K}",
+ u"\uD543": "\\mathbb{L}",
+ u"\uD544": "\\mathbb{M}",
+ u"\uD546": "\\mathbb{O}",
+ u"\uD54A": "\\mathbb{S}",
+ u"\uD54B": "\\mathbb{T}",
+ u"\uD54C": "\\mathbb{U}",
+ u"\uD54D": "\\mathbb{V}",
+ u"\uD54E": "\\mathbb{W}",
+ u"\uD54F": "\\mathbb{X}",
+ u"\uD550": "\\mathbb{Y}",
+ u"\uD552": "\\mathbb{a}",
+ u"\uD553": "\\mathbb{b}",
+ u"\uD554": "\\mathbb{c}",
+ u"\uD555": "\\mathbb{d}",
+ u"\uD556": "\\mathbb{e}",
+ u"\uD557": "\\mathbb{f}",
+ u"\uD558": "\\mathbb{g}",
+ u"\uD559": "\\mathbb{h}",
+ u"\uD55A": "\\mathbb{i}",
+ u"\uD55B": "\\mathbb{j}",
+ u"\uD55C": "\\mathbb{k}",
+ u"\uD55D": "\\mathbb{l}",
+ u"\uD55E": "\\mathbb{m}",
+ u"\uD55F": "\\mathbb{n}",
+ u"\uD560": "\\mathbb{o}",
+ u"\uD561": "\\mathbb{p}",
+ u"\uD562": "\\mathbb{q}",
+ u"\uD563": "\\mathbb{r}",
+ u"\uD564": "\\mathbb{s}",
+ u"\uD565": "\\mathbb{t}",
+ u"\uD566": "\\mathbb{u}",
+ u"\uD567": "\\mathbb{v}",
+ u"\uD568": "\\mathbb{w}",
+ u"\uD569": "\\mathbb{x}",
+ u"\uD56A": "\\mathbb{y}",
+ u"\uD56B": "\\mathbb{z}",
+ u"\uD56C": "\\mathslbb{A}",
+ u"\uD56D": "\\mathslbb{B}",
+ u"\uD56E": "\\mathslbb{C}",
+ u"\uD56F": "\\mathslbb{D}",
+ u"\uD570": "\\mathslbb{E}",
+ u"\uD571": "\\mathslbb{F}",
+ u"\uD572": "\\mathslbb{G}",
+ u"\uD573": "\\mathslbb{H}",
+ u"\uD574": "\\mathslbb{I}",
+ u"\uD575": "\\mathslbb{J}",
+ u"\uD576": "\\mathslbb{K}",
+ u"\uD577": "\\mathslbb{L}",
+ u"\uD578": "\\mathslbb{M}",
+ u"\uD579": "\\mathslbb{N}",
+ u"\uD57A": "\\mathslbb{O}",
+ u"\uD57B": "\\mathslbb{P}",
+ u"\uD57C": "\\mathslbb{Q}",
+ u"\uD57D": "\\mathslbb{R}",
+ u"\uD57E": "\\mathslbb{S}",
+ u"\uD57F": "\\mathslbb{T}",
+ u"\uD580": "\\mathslbb{U}",
+ u"\uD581": "\\mathslbb{V}",
+ u"\uD582": "\\mathslbb{W}",
+ u"\uD583": "\\mathslbb{X}",
+ u"\uD584": "\\mathslbb{Y}",
+ u"\uD585": "\\mathslbb{Z}",
+ u"\uD586": "\\mathslbb{a}",
+ u"\uD587": "\\mathslbb{b}",
+ u"\uD588": "\\mathslbb{c}",
+ u"\uD589": "\\mathslbb{d}",
+ u"\uD58A": "\\mathslbb{e}",
+ u"\uD58B": "\\mathslbb{f}",
+ u"\uD58C": "\\mathslbb{g}",
+ u"\uD58D": "\\mathslbb{h}",
+ u"\uD58E": "\\mathslbb{i}",
+ u"\uD58F": "\\mathslbb{j}",
+ u"\uD590": "\\mathslbb{k}",
+ u"\uD591": "\\mathslbb{l}",
+ u"\uD592": "\\mathslbb{m}",
+ u"\uD593": "\\mathslbb{n}",
+ u"\uD594": "\\mathslbb{o}",
+ u"\uD595": "\\mathslbb{p}",
+ u"\uD596": "\\mathslbb{q}",
+ u"\uD597": "\\mathslbb{r}",
+ u"\uD598": "\\mathslbb{s}",
+ u"\uD599": "\\mathslbb{t}",
+ u"\uD59A": "\\mathslbb{u}",
+ u"\uD59B": "\\mathslbb{v}",
+ u"\uD59C": "\\mathslbb{w}",
+ u"\uD59D": "\\mathslbb{x}",
+ u"\uD59E": "\\mathslbb{y}",
+ u"\uD59F": "\\mathslbb{z}",
+ u"\uD5A0": "\\mathsf{A}",
+ u"\uD5A1": "\\mathsf{B}",
+ u"\uD5A2": "\\mathsf{C}",
+ u"\uD5A3": "\\mathsf{D}",
+ u"\uD5A4": "\\mathsf{E}",
+ u"\uD5A5": "\\mathsf{F}",
+ u"\uD5A6": "\\mathsf{G}",
+ u"\uD5A7": "\\mathsf{H}",
+ u"\uD5A8": "\\mathsf{I}",
+ u"\uD5A9": "\\mathsf{J}",
+ u"\uD5AA": "\\mathsf{K}",
+ u"\uD5AB": "\\mathsf{L}",
+ u"\uD5AC": "\\mathsf{M}",
+ u"\uD5AD": "\\mathsf{N}",
+ u"\uD5AE": "\\mathsf{O}",
+ u"\uD5AF": "\\mathsf{P}",
+ u"\uD5B0": "\\mathsf{Q}",
+ u"\uD5B1": "\\mathsf{R}",
+ u"\uD5B2": "\\mathsf{S}",
+ u"\uD5B3": "\\mathsf{T}",
+ u"\uD5B4": "\\mathsf{U}",
+ u"\uD5B5": "\\mathsf{V}",
+ u"\uD5B6": "\\mathsf{W}",
+ u"\uD5B7": "\\mathsf{X}",
+ u"\uD5B8": "\\mathsf{Y}",
+ u"\uD5B9": "\\mathsf{Z}",
+ u"\uD5BA": "\\mathsf{a}",
+ u"\uD5BB": "\\mathsf{b}",
+ u"\uD5BC": "\\mathsf{c}",
+ u"\uD5BD": "\\mathsf{d}",
+ u"\uD5BE": "\\mathsf{e}",
+ u"\uD5BF": "\\mathsf{f}",
+ u"\uD5C0": "\\mathsf{g}",
+ u"\uD5C1": "\\mathsf{h}",
+ u"\uD5C2": "\\mathsf{i}",
+ u"\uD5C3": "\\mathsf{j}",
+ u"\uD5C4": "\\mathsf{k}",
+ u"\uD5C5": "\\mathsf{l}",
+ u"\uD5C6": "\\mathsf{m}",
+ u"\uD5C7": "\\mathsf{n}",
+ u"\uD5C8": "\\mathsf{o}",
+ u"\uD5C9": "\\mathsf{p}",
+ u"\uD5CA": "\\mathsf{q}",
+ u"\uD5CB": "\\mathsf{r}",
+ u"\uD5CC": "\\mathsf{s}",
+ u"\uD5CD": "\\mathsf{t}",
+ u"\uD5CE": "\\mathsf{u}",
+ u"\uD5CF": "\\mathsf{v}",
+ u"\uD5D0": "\\mathsf{w}",
+ u"\uD5D1": "\\mathsf{x}",
+ u"\uD5D2": "\\mathsf{y}",
+ u"\uD5D3": "\\mathsf{z}",
+ u"\uD5D4": "\\mathsfbf{A}",
+ u"\uD5D5": "\\mathsfbf{B}",
+ u"\uD5D6": "\\mathsfbf{C}",
+ u"\uD5D7": "\\mathsfbf{D}",
+ u"\uD5D8": "\\mathsfbf{E}",
+ u"\uD5D9": "\\mathsfbf{F}",
+ u"\uD5DA": "\\mathsfbf{G}",
+ u"\uD5DB": "\\mathsfbf{H}",
+ u"\uD5DC": "\\mathsfbf{I}",
+ u"\uD5DD": "\\mathsfbf{J}",
+ u"\uD5DE": "\\mathsfbf{K}",
+ u"\uD5DF": "\\mathsfbf{L}",
+ u"\uD5E0": "\\mathsfbf{M}",
+ u"\uD5E1": "\\mathsfbf{N}",
+ u"\uD5E2": "\\mathsfbf{O}",
+ u"\uD5E3": "\\mathsfbf{P}",
+ u"\uD5E4": "\\mathsfbf{Q}",
+ u"\uD5E5": "\\mathsfbf{R}",
+ u"\uD5E6": "\\mathsfbf{S}",
+ u"\uD5E7": "\\mathsfbf{T}",
+ u"\uD5E8": "\\mathsfbf{U}",
+ u"\uD5E9": "\\mathsfbf{V}",
+ u"\uD5EA": "\\mathsfbf{W}",
+ u"\uD5EB": "\\mathsfbf{X}",
+ u"\uD5EC": "\\mathsfbf{Y}",
+ u"\uD5ED": "\\mathsfbf{Z}",
+ u"\uD5EE": "\\mathsfbf{a}",
+ u"\uD5EF": "\\mathsfbf{b}",
+ u"\uD5F0": "\\mathsfbf{c}",
+ u"\uD5F1": "\\mathsfbf{d}",
+ u"\uD5F2": "\\mathsfbf{e}",
+ u"\uD5F3": "\\mathsfbf{f}",
+ u"\uD5F4": "\\mathsfbf{g}",
+ u"\uD5F5": "\\mathsfbf{h}",
+ u"\uD5F6": "\\mathsfbf{i}",
+ u"\uD5F7": "\\mathsfbf{j}",
+ u"\uD5F8": "\\mathsfbf{k}",
+ u"\uD5F9": "\\mathsfbf{l}",
+ u"\uD5FA": "\\mathsfbf{m}",
+ u"\uD5FB": "\\mathsfbf{n}",
+ u"\uD5FC": "\\mathsfbf{o}",
+ u"\uD5FD": "\\mathsfbf{p}",
+ u"\uD5FE": "\\mathsfbf{q}",
+ u"\uD5FF": "\\mathsfbf{r}",
+ u"\uD600": "\\mathsfbf{s}",
+ u"\uD601": "\\mathsfbf{t}",
+ u"\uD602": "\\mathsfbf{u}",
+ u"\uD603": "\\mathsfbf{v}",
+ u"\uD604": "\\mathsfbf{w}",
+ u"\uD605": "\\mathsfbf{x}",
+ u"\uD606": "\\mathsfbf{y}",
+ u"\uD607": "\\mathsfbf{z}",
+ u"\uD608": "\\mathsfsl{A}",
+ u"\uD609": "\\mathsfsl{B}",
+ u"\uD60A": "\\mathsfsl{C}",
+ u"\uD60B": "\\mathsfsl{D}",
+ u"\uD60C": "\\mathsfsl{E}",
+ u"\uD60D": "\\mathsfsl{F}",
+ u"\uD60E": "\\mathsfsl{G}",
+ u"\uD60F": "\\mathsfsl{H}",
+ u"\uD610": "\\mathsfsl{I}",
+ u"\uD611": "\\mathsfsl{J}",
+ u"\uD612": "\\mathsfsl{K}",
+ u"\uD613": "\\mathsfsl{L}",
+ u"\uD614": "\\mathsfsl{M}",
+ u"\uD615": "\\mathsfsl{N}",
+ u"\uD616": "\\mathsfsl{O}",
+ u"\uD617": "\\mathsfsl{P}",
+ u"\uD618": "\\mathsfsl{Q}",
+ u"\uD619": "\\mathsfsl{R}",
+ u"\uD61A": "\\mathsfsl{S}",
+ u"\uD61B": "\\mathsfsl{T}",
+ u"\uD61C": "\\mathsfsl{U}",
+ u"\uD61D": "\\mathsfsl{V}",
+ u"\uD61E": "\\mathsfsl{W}",
+ u"\uD61F": "\\mathsfsl{X}",
+ u"\uD620": "\\mathsfsl{Y}",
+ u"\uD621": "\\mathsfsl{Z}",
+ u"\uD622": "\\mathsfsl{a}",
+ u"\uD623": "\\mathsfsl{b}",
+ u"\uD624": "\\mathsfsl{c}",
+ u"\uD625": "\\mathsfsl{d}",
+ u"\uD626": "\\mathsfsl{e}",
+ u"\uD627": "\\mathsfsl{f}",
+ u"\uD628": "\\mathsfsl{g}",
+ u"\uD629": "\\mathsfsl{h}",
+ u"\uD62A": "\\mathsfsl{i}",
+ u"\uD62B": "\\mathsfsl{j}",
+ u"\uD62C": "\\mathsfsl{k}",
+ u"\uD62D": "\\mathsfsl{l}",
+ u"\uD62E": "\\mathsfsl{m}",
+ u"\uD62F": "\\mathsfsl{n}",
+ u"\uD630": "\\mathsfsl{o}",
+ u"\uD631": "\\mathsfsl{p}",
+ u"\uD632": "\\mathsfsl{q}",
+ u"\uD633": "\\mathsfsl{r}",
+ u"\uD634": "\\mathsfsl{s}",
+ u"\uD635": "\\mathsfsl{t}",
+ u"\uD636": "\\mathsfsl{u}",
+ u"\uD637": "\\mathsfsl{v}",
+ u"\uD638": "\\mathsfsl{w}",
+ u"\uD639": "\\mathsfsl{x}",
+ u"\uD63A": "\\mathsfsl{y}",
+ u"\uD63B": "\\mathsfsl{z}",
+ u"\uD63C": "\\mathsfbfsl{A}",
+ u"\uD63D": "\\mathsfbfsl{B}",
+ u"\uD63E": "\\mathsfbfsl{C}",
+ u"\uD63F": "\\mathsfbfsl{D}",
+ u"\uD640": "\\mathsfbfsl{E}",
+ u"\uD641": "\\mathsfbfsl{F}",
+ u"\uD642": "\\mathsfbfsl{G}",
+ u"\uD643": "\\mathsfbfsl{H}",
+ u"\uD644": "\\mathsfbfsl{I}",
+ u"\uD645": "\\mathsfbfsl{J}",
+ u"\uD646": "\\mathsfbfsl{K}",
+ u"\uD647": "\\mathsfbfsl{L}",
+ u"\uD648": "\\mathsfbfsl{M}",
+ u"\uD649": "\\mathsfbfsl{N}",
+ u"\uD64A": "\\mathsfbfsl{O}",
+ u"\uD64B": "\\mathsfbfsl{P}",
+ u"\uD64C": "\\mathsfbfsl{Q}",
+ u"\uD64D": "\\mathsfbfsl{R}",
+ u"\uD64E": "\\mathsfbfsl{S}",
+ u"\uD64F": "\\mathsfbfsl{T}",
+ u"\uD650": "\\mathsfbfsl{U}",
+ u"\uD651": "\\mathsfbfsl{V}",
+ u"\uD652": "\\mathsfbfsl{W}",
+ u"\uD653": "\\mathsfbfsl{X}",
+ u"\uD654": "\\mathsfbfsl{Y}",
+ u"\uD655": "\\mathsfbfsl{Z}",
+ u"\uD656": "\\mathsfbfsl{a}",
+ u"\uD657": "\\mathsfbfsl{b}",
+ u"\uD658": "\\mathsfbfsl{c}",
+ u"\uD659": "\\mathsfbfsl{d}",
+ u"\uD65A": "\\mathsfbfsl{e}",
+ u"\uD65B": "\\mathsfbfsl{f}",
+ u"\uD65C": "\\mathsfbfsl{g}",
+ u"\uD65D": "\\mathsfbfsl{h}",
+ u"\uD65E": "\\mathsfbfsl{i}",
+ u"\uD65F": "\\mathsfbfsl{j}",
+ u"\uD660": "\\mathsfbfsl{k}",
+ u"\uD661": "\\mathsfbfsl{l}",
+ u"\uD662": "\\mathsfbfsl{m}",
+ u"\uD663": "\\mathsfbfsl{n}",
+ u"\uD664": "\\mathsfbfsl{o}",
+ u"\uD665": "\\mathsfbfsl{p}",
+ u"\uD666": "\\mathsfbfsl{q}",
+ u"\uD667": "\\mathsfbfsl{r}",
+ u"\uD668": "\\mathsfbfsl{s}",
+ u"\uD669": "\\mathsfbfsl{t}",
+ u"\uD66A": "\\mathsfbfsl{u}",
+ u"\uD66B": "\\mathsfbfsl{v}",
+ u"\uD66C": "\\mathsfbfsl{w}",
+ u"\uD66D": "\\mathsfbfsl{x}",
+ u"\uD66E": "\\mathsfbfsl{y}",
+ u"\uD66F": "\\mathsfbfsl{z}",
+ u"\uD670": "\\mathtt{A}",
+ u"\uD671": "\\mathtt{B}",
+ u"\uD672": "\\mathtt{C}",
+ u"\uD673": "\\mathtt{D}",
+ u"\uD674": "\\mathtt{E}",
+ u"\uD675": "\\mathtt{F}",
+ u"\uD676": "\\mathtt{G}",
+ u"\uD677": "\\mathtt{H}",
+ u"\uD678": "\\mathtt{I}",
+ u"\uD679": "\\mathtt{J}",
+ u"\uD67A": "\\mathtt{K}",
+ u"\uD67B": "\\mathtt{L}",
+ u"\uD67C": "\\mathtt{M}",
+ u"\uD67D": "\\mathtt{N}",
+ u"\uD67E": "\\mathtt{O}",
+ u"\uD67F": "\\mathtt{P}",
+ u"\uD680": "\\mathtt{Q}",
+ u"\uD681": "\\mathtt{R}",
+ u"\uD682": "\\mathtt{S}",
+ u"\uD683": "\\mathtt{T}",
+ u"\uD684": "\\mathtt{U}",
+ u"\uD685": "\\mathtt{V}",
+ u"\uD686": "\\mathtt{W}",
+ u"\uD687": "\\mathtt{X}",
+ u"\uD688": "\\mathtt{Y}",
+ u"\uD689": "\\mathtt{Z}",
+ u"\uD68A": "\\mathtt{a}",
+ u"\uD68B": "\\mathtt{b}",
+ u"\uD68C": "\\mathtt{c}",
+ u"\uD68D": "\\mathtt{d}",
+ u"\uD68E": "\\mathtt{e}",
+ u"\uD68F": "\\mathtt{f}",
+ u"\uD690": "\\mathtt{g}",
+ u"\uD691": "\\mathtt{h}",
+ u"\uD692": "\\mathtt{i}",
+ u"\uD693": "\\mathtt{j}",
+ u"\uD694": "\\mathtt{k}",
+ u"\uD695": "\\mathtt{l}",
+ u"\uD696": "\\mathtt{m}",
+ u"\uD697": "\\mathtt{n}",
+ u"\uD698": "\\mathtt{o}",
+ u"\uD699": "\\mathtt{p}",
+ u"\uD69A": "\\mathtt{q}",
+ u"\uD69B": "\\mathtt{r}",
+ u"\uD69C": "\\mathtt{s}",
+ u"\uD69D": "\\mathtt{t}",
+ u"\uD69E": "\\mathtt{u}",
+ u"\uD69F": "\\mathtt{v}",
+ u"\uD6A0": "\\mathtt{w}",
+ u"\uD6A1": "\\mathtt{x}",
+ u"\uD6A2": "\\mathtt{y}",
+ u"\uD6A3": "\\mathtt{z}",
+ u"\uD6A8": "\\mathbf{\\Alpha}",
+ u"\uD6A9": "\\mathbf{\\Beta}",
+ u"\uD6AA": "\\mathbf{\\Gamma}",
+ u"\uD6AB": "\\mathbf{\\Delta}",
+ u"\uD6AC": "\\mathbf{\\Epsilon}",
+ u"\uD6AD": "\\mathbf{\\Zeta}",
+ u"\uD6AE": "\\mathbf{\\Eta}",
+ u"\uD6AF": "\\mathbf{\\Theta}",
+ u"\uD6B0": "\\mathbf{\\Iota}",
+ u"\uD6B1": "\\mathbf{\\Kappa}",
+ u"\uD6B2": "\\mathbf{\\Lambda}",
+ u"\uD6B5": "\\mathbf{\\Xi}",
+ u"\uD6B7": "\\mathbf{\\Pi}",
+ u"\uD6B8": "\\mathbf{\\Rho}",
+ u"\uD6B9": "\\mathbf{\\vartheta}",
+ u"\uD6BA": "\\mathbf{\\Sigma}",
+ u"\uD6BB": "\\mathbf{\\Tau}",
+ u"\uD6BC": "\\mathbf{\\Upsilon}",
+ u"\uD6BD": "\\mathbf{\\Phi}",
+ u"\uD6BE": "\\mathbf{\\Chi}",
+ u"\uD6BF": "\\mathbf{\\Psi}",
+ u"\uD6C0": "\\mathbf{\\Omega}",
+ u"\uD6C1": "\\mathbf{\\nabla}",
+ u"\uD6C2": "\\mathbf{\\Alpha}",
+ u"\uD6C3": "\\mathbf{\\Beta}",
+ u"\uD6C4": "\\mathbf{\\Gamma}",
+ u"\uD6C5": "\\mathbf{\\Delta}",
+ u"\uD6C6": "\\mathbf{\\Epsilon}",
+ u"\uD6C7": "\\mathbf{\\Zeta}",
+ u"\uD6C8": "\\mathbf{\\Eta}",
+ u"\uD6C9": "\\mathbf{\\theta}",
+ u"\uD6CA": "\\mathbf{\\Iota}",
+ u"\uD6CB": "\\mathbf{\\Kappa}",
+ u"\uD6CC": "\\mathbf{\\Lambda}",
+ u"\uD6CF": "\\mathbf{\\Xi}",
+ u"\uD6D1": "\\mathbf{\\Pi}",
+ u"\uD6D2": "\\mathbf{\\Rho}",
+ u"\uD6D3": "\\mathbf{\\varsigma}",
+ u"\uD6D4": "\\mathbf{\\Sigma}",
+ u"\uD6D5": "\\mathbf{\\Tau}",
+ u"\uD6D6": "\\mathbf{\\Upsilon}",
+ u"\uD6D7": "\\mathbf{\\Phi}",
+ u"\uD6D8": "\\mathbf{\\Chi}",
+ u"\uD6D9": "\\mathbf{\\Psi}",
+ u"\uD6DA": "\\mathbf{\\Omega}",
+ u"\uD6DB": "\\partial ",
+ u"\uD6DC": "\\in",
+ u"\uD6DD": "\\mathbf{\\vartheta}",
+ u"\uD6DE": "\\mathbf{\\varkappa}",
+ u"\uD6DF": "\\mathbf{\\phi}",
+ u"\uD6E0": "\\mathbf{\\varrho}",
+ u"\uD6E1": "\\mathbf{\\varpi}",
+ u"\uD6E2": "\\mathsl{\\Alpha}",
+ u"\uD6E3": "\\mathsl{\\Beta}",
+ u"\uD6E4": "\\mathsl{\\Gamma}",
+ u"\uD6E5": "\\mathsl{\\Delta}",
+ u"\uD6E6": "\\mathsl{\\Epsilon}",
+ u"\uD6E7": "\\mathsl{\\Zeta}",
+ u"\uD6E8": "\\mathsl{\\Eta}",
+ u"\uD6E9": "\\mathsl{\\Theta}",
+ u"\uD6EA": "\\mathsl{\\Iota}",
+ u"\uD6EB": "\\mathsl{\\Kappa}",
+ u"\uD6EC": "\\mathsl{\\Lambda}",
+ u"\uD6EF": "\\mathsl{\\Xi}",
+ u"\uD6F1": "\\mathsl{\\Pi}",
+ u"\uD6F2": "\\mathsl{\\Rho}",
+ u"\uD6F3": "\\mathsl{\\vartheta}",
+ u"\uD6F4": "\\mathsl{\\Sigma}",
+ u"\uD6F5": "\\mathsl{\\Tau}",
+ u"\uD6F6": "\\mathsl{\\Upsilon}",
+ u"\uD6F7": "\\mathsl{\\Phi}",
+ u"\uD6F8": "\\mathsl{\\Chi}",
+ u"\uD6F9": "\\mathsl{\\Psi}",
+ u"\uD6FA": "\\mathsl{\\Omega}",
+ u"\uD6FB": "\\mathsl{\\nabla}",
+ u"\uD6FC": "\\mathsl{\\Alpha}",
+ u"\uD6FD": "\\mathsl{\\Beta}",
+ u"\uD6FE": "\\mathsl{\\Gamma}",
+ u"\uD6FF": "\\mathsl{\\Delta}",
+ u"\uD700": "\\mathsl{\\Epsilon}",
+ u"\uD701": "\\mathsl{\\Zeta}",
+ u"\uD702": "\\mathsl{\\Eta}",
+ u"\uD703": "\\mathsl{\\Theta}",
+ u"\uD704": "\\mathsl{\\Iota}",
+ u"\uD705": "\\mathsl{\\Kappa}",
+ u"\uD706": "\\mathsl{\\Lambda}",
+ u"\uD709": "\\mathsl{\\Xi}",
+ u"\uD70B": "\\mathsl{\\Pi}",
+ u"\uD70C": "\\mathsl{\\Rho}",
+ u"\uD70D": "\\mathsl{\\varsigma}",
+ u"\uD70E": "\\mathsl{\\Sigma}",
+ u"\uD70F": "\\mathsl{\\Tau}",
+ u"\uD710": "\\mathsl{\\Upsilon}",
+ u"\uD711": "\\mathsl{\\Phi}",
+ u"\uD712": "\\mathsl{\\Chi}",
+ u"\uD713": "\\mathsl{\\Psi}",
+ u"\uD714": "\\mathsl{\\Omega}",
+ u"\uD715": "\\partial ",
+ u"\uD716": "\\in",
+ u"\uD717": "\\mathsl{\\vartheta}",
+ u"\uD718": "\\mathsl{\\varkappa}",
+ u"\uD719": "\\mathsl{\\phi}",
+ u"\uD71A": "\\mathsl{\\varrho}",
+ u"\uD71B": "\\mathsl{\\varpi}",
+ u"\uD71C": "\\mathbit{\\Alpha}",
+ u"\uD71D": "\\mathbit{\\Beta}",
+ u"\uD71E": "\\mathbit{\\Gamma}",
+ u"\uD71F": "\\mathbit{\\Delta}",
+ u"\uD720": "\\mathbit{\\Epsilon}",
+ u"\uD721": "\\mathbit{\\Zeta}",
+ u"\uD722": "\\mathbit{\\Eta}",
+ u"\uD723": "\\mathbit{\\Theta}",
+ u"\uD724": "\\mathbit{\\Iota}",
+ u"\uD725": "\\mathbit{\\Kappa}",
+ u"\uD726": "\\mathbit{\\Lambda}",
+ u"\uD729": "\\mathbit{\\Xi}",
+ u"\uD72B": "\\mathbit{\\Pi}",
+ u"\uD72C": "\\mathbit{\\Rho}",
+ u"\uD72D": "\\mathbit{O}",
+ u"\uD72E": "\\mathbit{\\Sigma}",
+ u"\uD72F": "\\mathbit{\\Tau}",
+ u"\uD730": "\\mathbit{\\Upsilon}",
+ u"\uD731": "\\mathbit{\\Phi}",
+ u"\uD732": "\\mathbit{\\Chi}",
+ u"\uD733": "\\mathbit{\\Psi}",
+ u"\uD734": "\\mathbit{\\Omega}",
+ u"\uD735": "\\mathbit{\\nabla}",
+ u"\uD736": "\\mathbit{\\Alpha}",
+ u"\uD737": "\\mathbit{\\Beta}",
+ u"\uD738": "\\mathbit{\\Gamma}",
+ u"\uD739": "\\mathbit{\\Delta}",
+ u"\uD73A": "\\mathbit{\\Epsilon}",
+ u"\uD73B": "\\mathbit{\\Zeta}",
+ u"\uD73C": "\\mathbit{\\Eta}",
+ u"\uD73D": "\\mathbit{\\Theta}",
+ u"\uD73E": "\\mathbit{\\Iota}",
+ u"\uD73F": "\\mathbit{\\Kappa}",
+ u"\uD740": "\\mathbit{\\Lambda}",
+ u"\uD743": "\\mathbit{\\Xi}",
+ u"\uD745": "\\mathbit{\\Pi}",
+ u"\uD746": "\\mathbit{\\Rho}",
+ u"\uD747": "\\mathbit{\\varsigma}",
+ u"\uD748": "\\mathbit{\\Sigma}",
+ u"\uD749": "\\mathbit{\\Tau}",
+ u"\uD74A": "\\mathbit{\\Upsilon}",
+ u"\uD74B": "\\mathbit{\\Phi}",
+ u"\uD74C": "\\mathbit{\\Chi}",
+ u"\uD74D": "\\mathbit{\\Psi}",
+ u"\uD74E": "\\mathbit{\\Omega}",
+ u"\uD74F": "\\partial ",
+ u"\uD750": "\\in",
+ u"\uD751": "\\mathbit{\\vartheta}",
+ u"\uD752": "\\mathbit{\\varkappa}",
+ u"\uD753": "\\mathbit{\\phi}",
+ u"\uD754": "\\mathbit{\\varrho}",
+ u"\uD755": "\\mathbit{\\varpi}",
+ u"\uD756": "\\mathsfbf{\\Alpha}",
+ u"\uD757": "\\mathsfbf{\\Beta}",
+ u"\uD758": "\\mathsfbf{\\Gamma}",
+ u"\uD759": "\\mathsfbf{\\Delta}",
+ u"\uD75A": "\\mathsfbf{\\Epsilon}",
+ u"\uD75B": "\\mathsfbf{\\Zeta}",
+ u"\uD75C": "\\mathsfbf{\\Eta}",
+ u"\uD75D": "\\mathsfbf{\\Theta}",
+ u"\uD75E": "\\mathsfbf{\\Iota}",
+ u"\uD75F": "\\mathsfbf{\\Kappa}",
+ u"\uD760": "\\mathsfbf{\\Lambda}",
+ u"\uD763": "\\mathsfbf{\\Xi}",
+ u"\uD765": "\\mathsfbf{\\Pi}",
+ u"\uD766": "\\mathsfbf{\\Rho}",
+ u"\uD767": "\\mathsfbf{\\vartheta}",
+ u"\uD768": "\\mathsfbf{\\Sigma}",
+ u"\uD769": "\\mathsfbf{\\Tau}",
+ u"\uD76A": "\\mathsfbf{\\Upsilon}",
+ u"\uD76B": "\\mathsfbf{\\Phi}",
+ u"\uD76C": "\\mathsfbf{\\Chi}",
+ u"\uD76D": "\\mathsfbf{\\Psi}",
+ u"\uD76E": "\\mathsfbf{\\Omega}",
+ u"\uD76F": "\\mathsfbf{\\nabla}",
+ u"\uD770": "\\mathsfbf{\\Alpha}",
+ u"\uD771": "\\mathsfbf{\\Beta}",
+ u"\uD772": "\\mathsfbf{\\Gamma}",
+ u"\uD773": "\\mathsfbf{\\Delta}",
+ u"\uD774": "\\mathsfbf{\\Epsilon}",
+ u"\uD775": "\\mathsfbf{\\Zeta}",
+ u"\uD776": "\\mathsfbf{\\Eta}",
+ u"\uD777": "\\mathsfbf{\\Theta}",
+ u"\uD778": "\\mathsfbf{\\Iota}",
+ u"\uD779": "\\mathsfbf{\\Kappa}",
+ u"\uD77A": "\\mathsfbf{\\Lambda}",
+ u"\uD77D": "\\mathsfbf{\\Xi}",
+ u"\uD77F": "\\mathsfbf{\\Pi}",
+ u"\uD780": "\\mathsfbf{\\Rho}",
+ u"\uD781": "\\mathsfbf{\\varsigma}",
+ u"\uD782": "\\mathsfbf{\\Sigma}",
+ u"\uD783": "\\mathsfbf{\\Tau}",
+ u"\uD784": "\\mathsfbf{\\Upsilon}",
+ u"\uD785": "\\mathsfbf{\\Phi}",
+ u"\uD786": "\\mathsfbf{\\Chi}",
+ u"\uD787": "\\mathsfbf{\\Psi}",
+ u"\uD788": "\\mathsfbf{\\Omega}",
+ u"\uD789": "\\partial ",
+ u"\uD78A": "\\in",
+ u"\uD78B": "\\mathsfbf{\\vartheta}",
+ u"\uD78C": "\\mathsfbf{\\varkappa}",
+ u"\uD78D": "\\mathsfbf{\\phi}",
+ u"\uD78E": "\\mathsfbf{\\varrho}",
+ u"\uD78F": "\\mathsfbf{\\varpi}",
+ u"\uD790": "\\mathsfbfsl{\\Alpha}",
+ u"\uD791": "\\mathsfbfsl{\\Beta}",
+ u"\uD792": "\\mathsfbfsl{\\Gamma}",
+ u"\uD793": "\\mathsfbfsl{\\Delta}",
+ u"\uD794": "\\mathsfbfsl{\\Epsilon}",
+ u"\uD795": "\\mathsfbfsl{\\Zeta}",
+ u"\uD796": "\\mathsfbfsl{\\Eta}",
+ u"\uD797": "\\mathsfbfsl{\\vartheta}",
+ u"\uD798": "\\mathsfbfsl{\\Iota}",
+ u"\uD799": "\\mathsfbfsl{\\Kappa}",
+ u"\uD79A": "\\mathsfbfsl{\\Lambda}",
+ u"\uD79D": "\\mathsfbfsl{\\Xi}",
+ u"\uD79F": "\\mathsfbfsl{\\Pi}",
+ u"\uD7A0": "\\mathsfbfsl{\\Rho}",
+ u"\uD7A1": "\\mathsfbfsl{\\vartheta}",
+ u"\uD7A2": "\\mathsfbfsl{\\Sigma}",
+ u"\uD7A3": "\\mathsfbfsl{\\Tau}",
+ u"\uD7A4": "\\mathsfbfsl{\\Upsilon}",
+ u"\uD7A5": "\\mathsfbfsl{\\Phi}",
+ u"\uD7A6": "\\mathsfbfsl{\\Chi}",
+ u"\uD7A7": "\\mathsfbfsl{\\Psi}",
+ u"\uD7A8": "\\mathsfbfsl{\\Omega}",
+ u"\uD7A9": "\\mathsfbfsl{\\nabla}",
+ u"\uD7AA": "\\mathsfbfsl{\\Alpha}",
+ u"\uD7AB": "\\mathsfbfsl{\\Beta}",
+ u"\uD7AC": "\\mathsfbfsl{\\Gamma}",
+ u"\uD7AD": "\\mathsfbfsl{\\Delta}",
+ u"\uD7AE": "\\mathsfbfsl{\\Epsilon}",
+ u"\uD7AF": "\\mathsfbfsl{\\Zeta}",
+ u"\uD7B0": "\\mathsfbfsl{\\Eta}",
+ u"\uD7B1": "\\mathsfbfsl{\\vartheta}",
+ u"\uD7B2": "\\mathsfbfsl{\\Iota}",
+ u"\uD7B3": "\\mathsfbfsl{\\Kappa}",
+ u"\uD7B4": "\\mathsfbfsl{\\Lambda}",
+ u"\uD7B7": "\\mathsfbfsl{\\Xi}",
+ u"\uD7B9": "\\mathsfbfsl{\\Pi}",
+ u"\uD7BA": "\\mathsfbfsl{\\Rho}",
+ u"\uD7BB": "\\mathsfbfsl{\\varsigma}",
+ u"\uD7BC": "\\mathsfbfsl{\\Sigma}",
+ u"\uD7BD": "\\mathsfbfsl{\\Tau}",
+ u"\uD7BE": "\\mathsfbfsl{\\Upsilon}",
+ u"\uD7BF": "\\mathsfbfsl{\\Phi}",
+ u"\uD7C0": "\\mathsfbfsl{\\Chi}",
+ u"\uD7C1": "\\mathsfbfsl{\\Psi}",
+ u"\uD7C2": "\\mathsfbfsl{\\Omega}",
+ u"\uD7C3": "\\partial ",
+ u"\uD7C4": "\\in",
+ u"\uD7C5": "\\mathsfbfsl{\\vartheta}",
+ u"\uD7C6": "\\mathsfbfsl{\\varkappa}",
+ u"\uD7C7": "\\mathsfbfsl{\\phi}",
+ u"\uD7C8": "\\mathsfbfsl{\\varrho}",
+ u"\uD7C9": "\\mathsfbfsl{\\varpi}",
+ u"\uD7CE": "\\mathbf{0}",
+ u"\uD7CF": "\\mathbf{1}",
+ u"\uD7D0": "\\mathbf{2}",
+ u"\uD7D1": "\\mathbf{3}",
+ u"\uD7D2": "\\mathbf{4}",
+ u"\uD7D3": "\\mathbf{5}",
+ u"\uD7D4": "\\mathbf{6}",
+ u"\uD7D5": "\\mathbf{7}",
+ u"\uD7D6": "\\mathbf{8}",
+ u"\uD7D7": "\\mathbf{9}",
+ u"\uD7D8": "\\mathbb{0}",
+ u"\uD7D9": "\\mathbb{1}",
+ u"\uD7DA": "\\mathbb{2}",
+ u"\uD7DB": "\\mathbb{3}",
+ u"\uD7DC": "\\mathbb{4}",
+ u"\uD7DD": "\\mathbb{5}",
+ u"\uD7DE": "\\mathbb{6}",
+ u"\uD7DF": "\\mathbb{7}",
+ u"\uD7E0": "\\mathbb{8}",
+ u"\uD7E1": "\\mathbb{9}",
+ u"\uD7E2": "\\mathsf{0}",
+ u"\uD7E3": "\\mathsf{1}",
+ u"\uD7E4": "\\mathsf{2}",
+ u"\uD7E5": "\\mathsf{3}",
+ u"\uD7E6": "\\mathsf{4}",
+ u"\uD7E7": "\\mathsf{5}",
+ u"\uD7E8": "\\mathsf{6}",
+ u"\uD7E9": "\\mathsf{7}",
+ u"\uD7EA": "\\mathsf{8}",
+ u"\uD7EB": "\\mathsf{9}",
+ u"\uD7EC": "\\mathsfbf{0}",
+ u"\uD7ED": "\\mathsfbf{1}",
+ u"\uD7EE": "\\mathsfbf{2}",
+ u"\uD7EF": "\\mathsfbf{3}",
+ u"\uD7F0": "\\mathsfbf{4}",
+ u"\uD7F1": "\\mathsfbf{5}",
+ u"\uD7F2": "\\mathsfbf{6}",
+ u"\uD7F3": "\\mathsfbf{7}",
+ u"\uD7F4": "\\mathsfbf{8}",
+ u"\uD7F5": "\\mathsfbf{9}",
+ u"\uD7F6": "\\mathtt{0}",
+ u"\uD7F7": "\\mathtt{1}",
+ u"\uD7F8": "\\mathtt{2}",
+ u"\uD7F9": "\\mathtt{3}",
+ u"\uD7FA": "\\mathtt{4}",
+ u"\uD7FB": "\\mathtt{5}",
+ u"\uD7FC": "\\mathtt{6}",
+ u"\uD7FD": "\\mathtt{7}",
+ u"\uD7FE": "\\mathtt{8}",
+ u"\uD7FF": "\\mathtt{9}",
+}
diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py
new file mode 100644
index 00000000..0d03d0bf
--- /dev/null
+++ b/pydocx/tests/__init__.py
@@ -0,0 +1,316 @@
+#from unittest import TestCase
+import re
+from contextlib import contextmanager
+
+from pydocx.parsers.Docx2Html import Docx2Html
+from pydocx.parsers.Docx2LaTex import Docx2LaTex
+from pydocx.utils import (
+ parse_xml_from_string,
+)
+from pydocx.tests.document_builder import DocxBuilder as DXB
+from unittest import TestCase
+
+STYLE = (
+ ''
+)
+
+BASE_HTML = '''
+
+
+
+ %s
+
+ %%s
+
+''' % STYLE
+
+BASE_LATEX = r'''\documentclass{article}\usepackage{hyperref}
+\usepackage{graphicx}\usepackage{changes}
+\usepackage{changepage}
+\usepackage{hanging}\usepackage{multirow}
+\usepackage{pbox}\usepackage{pdflscape}
+\usepackage{ulem}\usepackage{comment}\usepackage{mathtools}
+\begin{document}''' + "%s" + r'''\end{document}
+'''
+
+
+def assert_html_equal(actual_html, expected_html):
+ assert collapse_html(
+ actual_html,
+ ) == collapse_html(
+ expected_html
+ ), actual_html
+
+
+def assert_latex_equal(actual_latex, expected_latex):
+ assert collapse_latex(
+ actual_latex,
+ ) == collapse_latex(
+ expected_latex
+ ), actual_latex
+
+
+def collapse_latex(latex):
+
+ def smart_space(match):
+ # Put a space in between lines, unless exactly one side of the line
+ # break butts up against a tag.
+ before = match.group(1)
+ after = match.group(2)
+ space = ' '
+ return before + space + after
+ # Replace newlines and their surrounding
+ # whitespace with a single space (or
+ # empty string)
+ latex = re.sub(
+ r'(>?)\s*\s*()',
+ smart_space,
+ latex,
+ )
+ return latex.strip()
+
+
+def collapse_html(html):
+ """
+ Remove insignificant whitespace from the html.
+
+ >>> print collapse_html('''\\
+ ...
+ ... Heading
+ ...
+ ... ''')
+ Heading
+ >>> print collapse_html('''\\
+ ...
+ ... Paragraph with
+ ... multiple lines.
+ ...
+ ... ''')
+ Paragraph with multiple lines.
+ """
+ def smart_space(match):
+ # Put a space in between lines, unless exactly one side of the line
+ # break butts up against a tag.
+ before = match.group(1)
+ after = match.group(2)
+ space = ' '
+ if before == '>' or after == '<':
+ space = ''
+ return before + space + after
+ # Replace newlines and their surrounding whitespace with a single space (or
+ # empty string)
+ html = re.sub(
+ r'(>?)\s*\n\s*()',
+ smart_space,
+ html,
+ )
+ return html.strip()
+
+
+class XMLDocx2Latex(Docx2LaTex):
+
+ """
+ Create the object without passing in a path to the document, set them
+ manually.
+ """
+ def __init__(self, *args, **kwargs):
+ # Pass in nothing for the path
+ super(XMLDocx2Latex, self).__init__(path=None, *args, **kwargs)
+
+ def _build_data(
+ self,
+ path,
+ document_xml=None,
+ rels_dict=None,
+ numbering_dict=None,
+ styles_dict=None,
+ *args, **kwargs):
+ self._test_rels_dict = rels_dict
+ if rels_dict:
+ for value in rels_dict.values():
+ self._image_data['word/%s' % value] = 'word/%s' % value
+ self.numbering_root = None
+ if numbering_dict is not None:
+ self.numbering_root = parse_xml_from_string(
+ DXB.numbering(numbering_dict),
+ )
+ self.numbering_dict = numbering_dict
+ # Intentionally not calling super
+ if document_xml is not None:
+ self.root = parse_xml_from_string(document_xml)
+ self.zip_path = ''
+
+ # This is the standard page width for a word document, Also the page
+ # width that we are looking for in the test.
+ self.page_width = 612
+
+ self.styles_dict = styles_dict
+
+ def _parse_rels_root(self, *args, **kwargs):
+ if self._test_rels_dict is None:
+ return {}
+ return self._test_rels_dict
+
+ def get_list_style(self, num_id, ilvl):
+ try:
+ return self.numbering_dict[num_id][ilvl]
+ except KeyError:
+ return 'decimal'
+
+ def _parse_styles(self):
+ if self.styles_dict is None:
+ return {}
+ return self.styles_dict
+
+
+DEFAULT_NUMBERING_DICT = {
+ '1': {
+ '0': 'decimal',
+ '1': 'decimal',
+ },
+ '2': {
+ '0': 'lowerLetter',
+ '1': 'lowerLetter',
+ },
+}
+
+
+class XMLDocx2Html(Docx2Html):
+ """
+ Create the object without passing in a path to the document, set them
+ manually.
+ """
+ def __init__(self, *args, **kwargs):
+ # Pass in nothing for the path
+ super(XMLDocx2Html, self).__init__(path=None, *args, **kwargs)
+
+ def _build_data(
+ self,
+ path,
+ document_xml=None,
+ rels_dict=None,
+ numbering_dict=None,
+ styles_dict=None,
+ *args, **kwargs):
+ self._test_rels_dict = rels_dict
+ if rels_dict:
+ for value in rels_dict.values():
+ self._image_data['word/%s' % value] = 'word/%s' % value
+ self.numbering_root = None
+ if numbering_dict is not None:
+ self.numbering_root = parse_xml_from_string(
+ DXB.numbering(numbering_dict),
+ )
+ self.numbering_dict = numbering_dict
+ # Intentionally not calling super
+ if document_xml is not None:
+ self.root = parse_xml_from_string(document_xml)
+ self.zip_path = ''
+
+ # This is the standard page width for a word document, Also the page
+ # width that we are looking for in the test.
+ self.page_width = 612
+
+ self.styles_dict = styles_dict
+
+ def _parse_rels_root(self, *args, **kwargs):
+ if self._test_rels_dict is None:
+ return {}
+ return self._test_rels_dict
+
+ def get_list_style(self, num_id, ilvl):
+ try:
+ return self.numbering_dict[num_id][ilvl]
+ except KeyError:
+ return 'decimal'
+
+ def _parse_styles(self):
+ if self.styles_dict is None:
+ return {}
+ return self.styles_dict
+
+
+DEFAULT_NUMBERING_DICT = {
+ '1': {
+ '0': 'decimal',
+ '1': 'decimal',
+ },
+ '2': {
+ '0': 'lowerLetter',
+ '1': 'lowerLetter',
+ },
+}
+
+
+class _TranslationTestCase(TestCase):
+ expected_output = None
+ latex_expected_output = None
+ relationship_dict = None
+ styles_dict = None
+ numbering_dict = DEFAULT_NUMBERING_DICT
+ run_expected_output = True
+ parser = XMLDocx2Html
+ latex_parser = XMLDocx2Latex
+ latex_expected_output = None
+ use_base_html = True
+ convert_root_level_upper_roman = False
+
+ def get_xml(self):
+ raise NotImplementedError()
+
+ @contextmanager
+ def toggle_run_expected_output(self):
+ self.run_expected_output = not self.run_expected_output
+ yield
+ self.run_expected_output = not self.run_expected_output
+
+ def test_expected_output(self):
+ if self.expected_output is None:
+ raise NotImplementedError('expected_output is not defined')
+ if not self.run_expected_output:
+ return
+
+ # Create the xml
+ tree = self.get_xml()
+
+ # Verify the final output.
+ parser = self.parser
+ latex_parser = self.latex_parser
+
+ def image_handler(self, src, *args, **kwargs):
+ return src
+ parser.image_handler = image_handler
+ html = parser(
+ convert_root_level_upper_roman=self.convert_root_level_upper_roman,
+ document_xml=tree,
+ rels_dict=self.relationship_dict,
+ numbering_dict=self.numbering_dict,
+ styles_dict=self.styles_dict,
+ ).parsed
+ if self.use_base_html:
+ assert_html_equal(html, BASE_HTML % self.expected_output)
+ else:
+ assert_html_equal(html, self.expected_output)
+
+ latex_parser.image_handler = image_handler
+ latex = latex_parser(
+ convert_root_level_upper_roman=self.convert_root_level_upper_roman,
+ document_xml=tree,
+ rels_dict=self.relationship_dict,
+ numbering_dict=self.numbering_dict,
+ styles_dict=self.styles_dict,
+ ).parsed
+ assert_latex_equal(latex, BASE_LATEX % self.latex_expected_output)
diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py
new file mode 100644
index 00000000..b3b2689d
--- /dev/null
+++ b/pydocx/tests/document_builder.py
@@ -0,0 +1,346 @@
+from jinja2 import Environment, PackageLoader
+from pydocx.DocxParser import EMUS_PER_PIXEL
+
+templates = {
+ 'deg': 'deg.xml',
+ 'delete': 'text_delete.xml',
+ 'drawing': 'drawing.xml',
+ 'exp': 'exp.xml',
+ 'hyperlink': 'hyperlink.xml',
+ 'insert': 'insert.xml',
+ 'linebreak': 'linebreak.xml',
+ 'main': 'base.xml',
+ 'math': 'math.xml',
+ 'math_para': 'math_para.xml',
+ 'math_tag': 'math_tag.xml',
+ 'matrix': 'matrix.xml',
+ 'matrix_row': 'matrix_row.xml',
+ 'numbering': 'numbering.xml',
+ 'p': 'p.xml',
+ 'pict': 'pict.xml',
+ 'r': 'r.xml',
+ 'rad': 'rad.xml',
+ 'rpr': 'rpr.xml',
+ 'sdt': 'sdt.xml',
+ 'sectPr': 'sectPr.xml',
+ 'smartTag': 'smart_tag.xml',
+ 'style': 'style.xml',
+ 'styles': 'styles.xml',
+ 't': 't.xml',
+ 'table': 'table.xml',
+ 'tc': 'tc.xml',
+ 'tr': 'tr.xml',
+}
+
+env = Environment(
+ loader=PackageLoader(
+ 'pydocx.tests',
+ 'templates',
+ ),
+)
+
+
+class DocxBuilder(object):
+
+ @classmethod
+ def xml(self, body):
+ template = env.get_template(templates['main'])
+ return template.render(body=body)
+
+ @classmethod
+ def p_tag(
+ self,
+ text,
+ style='style0',
+ jc=None,
+ ):
+ if isinstance(text, str):
+ # Use create a single r tag based on the text and the bold
+ run_tag = DocxBuilder.r_tag(
+ [DocxBuilder.t_tag(text)],
+ )
+ run_tags = [run_tag]
+ elif isinstance(text, list):
+ run_tags = text
+ else:
+ run_tags = [self.r_tag([])]
+ template = env.get_template(templates['p'])
+
+ kwargs = {
+ 'run_tags': run_tags,
+ 'style': style,
+ 'jc': jc,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def linebreak(self):
+ template = env.get_template(templates['linebreak'])
+ kwargs = {}
+ return template.render(**kwargs)
+
+ @classmethod
+ def t_tag(self, text):
+ template = env.get_template(templates['t'])
+ kwargs = {
+ 'text': text,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def r_tag(
+ self,
+ elements,
+ rpr=None,
+ ):
+ template = env.get_template(templates['r'])
+ if rpr is None:
+ rpr = DocxBuilder.rpr_tag()
+ kwargs = {
+ 'elements': elements,
+ 'rpr': rpr,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def rpr_tag(self, inline_styles=None, *args, **kwargs):
+ if inline_styles is None:
+ inline_styles = {}
+ valid_styles = (
+ 'b',
+ 'i',
+ 'u',
+ 'caps',
+ 'smallCaps',
+ 'strike',
+ 'dstrike',
+ 'vanish',
+ 'webHidden',
+ 'vertAlign',
+ )
+ for key in inline_styles:
+ if key not in valid_styles:
+ raise AssertionError('%s is not a valid style' % key)
+ template = env.get_template(templates['rpr'])
+ kwargs = {
+ 'tags': inline_styles,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def hyperlink_tag(self, r_id, run_tags):
+ template = env.get_template(templates['hyperlink'])
+ kwargs = {
+ 'r_id': r_id,
+ 'run_tags': run_tags,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def insert_tag(self, run_tags):
+ template = env.get_template(templates['insert'])
+ kwargs = {
+ 'run_tags': run_tags,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def delete_tag(self, deleted_texts):
+ template = env.get_template(templates['delete'])
+ kwargs = {
+ 'deleted_texts': deleted_texts,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def smart_tag(self, run_tags):
+ template = env.get_template(templates['smartTag'])
+ kwargs = {
+ 'run_tags': run_tags,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def sdt_tag(self, p_tag):
+ template = env.get_template(templates['sdt'])
+ kwargs = {
+ 'p_tag': p_tag,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def li(self, text, ilvl, numId, bold=False):
+ if isinstance(text, str):
+ # Use create a single r tag based on the text and the bold
+ run_tag = DocxBuilder.r_tag([DocxBuilder.t_tag(text)], bold)
+ run_tags = [run_tag]
+ elif isinstance(text, list):
+ run_tags = []
+ for run_text, run_bold in text:
+ run_tags.append(
+ DocxBuilder.r_tag(
+ [DocxBuilder.t_tag(run_tags)],
+ run_bold,
+ ),
+ )
+ else:
+ raise AssertionError('text must be a string or a list')
+ template = env.get_template(templates['p'])
+
+ kwargs = {
+ 'run_tags': run_tags,
+ 'is_list': True,
+ 'ilvl': ilvl,
+ 'numId': numId,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def table_cell(self, paragraph, merge=False, merge_continue=False):
+ kwargs = {
+ 'paragraph': paragraph,
+ 'merge': merge,
+ 'merge_continue': merge_continue
+ }
+ template = env.get_template(templates['tc'])
+ return template.render(**kwargs)
+
+ @classmethod
+ def table_row(self, tcs):
+ template = env.get_template(templates['tr'])
+ return template.render(table_cells=tcs)
+
+ @classmethod
+ def table(self, trs):
+ template = env.get_template(templates['table'])
+ return template.render(table_rows=trs)
+
+ @classmethod
+ def drawing(self, r_id, height=None, width=None):
+ template = env.get_template(templates['drawing'])
+ if height is not None:
+ height = height * EMUS_PER_PIXEL
+ if width is not None:
+ width = width * EMUS_PER_PIXEL
+ kwargs = {
+ 'r_id': r_id,
+ 'height': height,
+ 'width': width,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def pict(self, r_id=None, height=None, width=None):
+ template = env.get_template(templates['pict'])
+ kwargs = {
+ 'r_id': r_id,
+ 'height': height,
+ 'width': width,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def sectPr_tag(self, p_tag):
+ template = env.get_template(templates['sectPr'])
+
+ kwargs = {
+ 'p_tag': p_tag,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def styles_xml(self, style_tags):
+ template = env.get_template(templates['styles'])
+
+ kwargs = {
+ 'style_tags': style_tags,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def style(self, style_id, value):
+ template = env.get_template(templates['style'])
+
+ kwargs = {
+ 'style_id': style_id,
+ 'value': value,
+ }
+
+ return template.render(**kwargs)
+
+ @classmethod
+ def numbering(self, numbering_dict):
+ template = env.get_template(templates['numbering'])
+
+ kwargs = {
+ 'numbering_dict': numbering_dict,
+ }
+
+ return template.render(**kwargs)
+
+ @classmethod
+ def math(self, rad=None, exp=None, deg=None):
+ template = env.get_template(templates['math'])
+ kwargs = {
+ 'rad': rad,
+ 'exp': exp,
+ 'deg': deg
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def exp(self, run_text=None):
+ template = env.get_template(templates['exp'])
+ kwargs = {
+ 'run_text': run_text
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def deg(self, run_text=None):
+ kwargs = {
+ 'run_text': run_text
+ }
+ template = env.get_template(templates['deg'])
+ return template.render(**kwargs)
+
+ @classmethod
+ def rad(self, exp, deg):
+ template = env.get_template(templates['rad'])
+ kwargs = {
+ 'exp': exp,
+ 'deg': deg,
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def math_paragraph(self, math_para):
+ template = env.get_template(templates['math_para'])
+ kwargs = {
+ 'math_para': math_para
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def matrix_row(self, matrix_cells):
+ template = env.get_template(templates['matrix_row'])
+ kwargs = {
+ 'matrix_cells': matrix_cells
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def matrix(self, matrix_rows):
+ template = env.get_template(templates['matrix'])
+ kwargs = {
+ 'matrix_rows': matrix_rows
+ }
+ return template.render(**kwargs)
+
+ @classmethod
+ def math_tag(self, math):
+ template = env.get_template(templates['math_tag'])
+ kwargs = {
+ 'math': math
+ }
+ return template.render(**kwargs)
diff --git a/pydocx/tests/templates/base.xml b/pydocx/tests/templates/base.xml
new file mode 100644
index 00000000..60027500
--- /dev/null
+++ b/pydocx/tests/templates/base.xml
@@ -0,0 +1,4 @@
+
+
+ {{ body }}
+
diff --git a/pydocx/tests/templates/deg.xml b/pydocx/tests/templates/deg.xml
new file mode 100644
index 00000000..25cf2ff5
--- /dev/null
+++ b/pydocx/tests/templates/deg.xml
@@ -0,0 +1,5 @@
+
+ {% if run_text %}
+ {{ run_text }}
+ {% endif %}
+
\ No newline at end of file
diff --git a/pydocx/tests/templates/drawing.xml b/pydocx/tests/templates/drawing.xml
new file mode 100644
index 00000000..dfd470b4
--- /dev/null
+++ b/pydocx/tests/templates/drawing.xml
@@ -0,0 +1,65 @@
+
+
+
+
+
+
+
+
+
+
+ 2397125
+
+
+ 0
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/pydocx/tests/templates/exp.xml b/pydocx/tests/templates/exp.xml
new file mode 100644
index 00000000..e2f2b24e
--- /dev/null
+++ b/pydocx/tests/templates/exp.xml
@@ -0,0 +1,5 @@
+
+ {% if run_text %}
+ {{ run_text }}
+ {% endif %}
+
\ No newline at end of file
diff --git a/pydocx/tests/templates/hyperlink.xml b/pydocx/tests/templates/hyperlink.xml
new file mode 100644
index 00000000..83645948
--- /dev/null
+++ b/pydocx/tests/templates/hyperlink.xml
@@ -0,0 +1,5 @@
+
+ {% for run_tag in run_tags %}
+ {{ run_tag }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/insert.xml b/pydocx/tests/templates/insert.xml
new file mode 100644
index 00000000..afeb2691
--- /dev/null
+++ b/pydocx/tests/templates/insert.xml
@@ -0,0 +1,5 @@
+
+ {% for run_tag in run_tags %}
+ {{ run_tag }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/linebreak.xml b/pydocx/tests/templates/linebreak.xml
new file mode 100644
index 00000000..ab92e811
--- /dev/null
+++ b/pydocx/tests/templates/linebreak.xml
@@ -0,0 +1 @@
+
diff --git a/pydocx/tests/templates/math.xml b/pydocx/tests/templates/math.xml
new file mode 100644
index 00000000..4eb00e8f
--- /dev/null
+++ b/pydocx/tests/templates/math.xml
@@ -0,0 +1,13 @@
+
+
+ {% if rad %}
+ {{ rad }}
+ {% endif %}
+ {% if deg %}
+ {{ deg }}
+ {% endif %}
+ {% if exp %}
+ {{ exp }}
+ {% endif %}
+
+
\ No newline at end of file
diff --git a/pydocx/tests/templates/math_para.xml b/pydocx/tests/templates/math_para.xml
new file mode 100644
index 00000000..34f86b92
--- /dev/null
+++ b/pydocx/tests/templates/math_para.xml
@@ -0,0 +1,5 @@
+
+ {% if math_para %}
+ {{ math_para }}
+ {% endif %}
+
diff --git a/pydocx/tests/templates/math_tag.xml b/pydocx/tests/templates/math_tag.xml
new file mode 100644
index 00000000..56a27508
--- /dev/null
+++ b/pydocx/tests/templates/math_tag.xml
@@ -0,0 +1,5 @@
+
+
+ {{math}}
+
+
\ No newline at end of file
diff --git a/pydocx/tests/templates/matrix.xml b/pydocx/tests/templates/matrix.xml
new file mode 100644
index 00000000..801cd06d
--- /dev/null
+++ b/pydocx/tests/templates/matrix.xml
@@ -0,0 +1,5 @@
+
+ {% for matrix_row in matrix_rows %}
+ {{ matrix_row }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/matrix_row.xml b/pydocx/tests/templates/matrix_row.xml
new file mode 100644
index 00000000..f91690b6
--- /dev/null
+++ b/pydocx/tests/templates/matrix_row.xml
@@ -0,0 +1,5 @@
+
+ {% for matrix_cell in matrix_cells %}
+ {{ matrix_cell }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/numbering.xml b/pydocx/tests/templates/numbering.xml
new file mode 100644
index 00000000..4eaac3cc
--- /dev/null
+++ b/pydocx/tests/templates/numbering.xml
@@ -0,0 +1,23 @@
+
+
+ {% for num_id, ilvl_data in numbering_dict.items() %}
+
+ {% for ilvl, format in ilvl_data.items() %}
+
+
+
+
+
+
+
+
+
+ {% endfor %}
+
+ {% endfor %}
+ {% for num_id in numbering_dict %}
+
+
+
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/p.xml b/pydocx/tests/templates/p.xml
new file mode 100644
index 00000000..7a78a060
--- /dev/null
+++ b/pydocx/tests/templates/p.xml
@@ -0,0 +1,19 @@
+
+
+
+ {% if is_list %}
+
+ {% if ilvl != None %}
+
+ {% endif %}
+ {% if numId != None %}
+
+ {% endif %}
+
+ {% endif %}
+ {% if jc %}{% endif %}
+
+ {% for run_tag in run_tags %}
+ {{ run_tag }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/pict.xml b/pydocx/tests/templates/pict.xml
new file mode 100644
index 00000000..26f772a3
--- /dev/null
+++ b/pydocx/tests/templates/pict.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ {% if r_id %}{% endif %}
+
+
+
+
diff --git a/pydocx/tests/templates/r.xml b/pydocx/tests/templates/r.xml
new file mode 100644
index 00000000..2f28a66b
--- /dev/null
+++ b/pydocx/tests/templates/r.xml
@@ -0,0 +1,6 @@
+
+ {{ rpr }}
+ {% for element in elements %}
+ {{ element }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/rad.xml b/pydocx/tests/templates/rad.xml
new file mode 100644
index 00000000..a29c203d
--- /dev/null
+++ b/pydocx/tests/templates/rad.xml
@@ -0,0 +1,8 @@
+
+ {% if deg %}
+ {{ deg }}
+ {% endif %}
+ {% if exp %}
+ {{ exp }}
+ {% endif %}
+
\ No newline at end of file
diff --git a/pydocx/tests/templates/rpr.xml b/pydocx/tests/templates/rpr.xml
new file mode 100644
index 00000000..f49eb08b
--- /dev/null
+++ b/pydocx/tests/templates/rpr.xml
@@ -0,0 +1,5 @@
+
+ {% for tag, value in tags.items() %}
+
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/sdt.xml b/pydocx/tests/templates/sdt.xml
new file mode 100644
index 00000000..fe9a7e77
--- /dev/null
+++ b/pydocx/tests/templates/sdt.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{ p_tag }}
+
+
diff --git a/pydocx/tests/templates/sectPr.xml b/pydocx/tests/templates/sectPr.xml
new file mode 100644
index 00000000..16a12050
--- /dev/null
+++ b/pydocx/tests/templates/sectPr.xml
@@ -0,0 +1,3 @@
+
+ {{ p_tag }}
+
diff --git a/pydocx/tests/templates/smart_tag.xml b/pydocx/tests/templates/smart_tag.xml
new file mode 100644
index 00000000..e45ee5b9
--- /dev/null
+++ b/pydocx/tests/templates/smart_tag.xml
@@ -0,0 +1,5 @@
+
+ {% for run_tag in run_tags %}
+ {{ run_tag }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/style.xml b/pydocx/tests/templates/style.xml
new file mode 100644
index 00000000..5fa9f00f
--- /dev/null
+++ b/pydocx/tests/templates/style.xml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/pydocx/tests/templates/styles.xml b/pydocx/tests/templates/styles.xml
new file mode 100644
index 00000000..a30e752e
--- /dev/null
+++ b/pydocx/tests/templates/styles.xml
@@ -0,0 +1,6 @@
+
+
+ {% for style in style_tags %}
+ {{ style }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/t.xml b/pydocx/tests/templates/t.xml
new file mode 100644
index 00000000..81d562b7
--- /dev/null
+++ b/pydocx/tests/templates/t.xml
@@ -0,0 +1,5 @@
+{% if text %}
+{{ text }}
+{% else %}
+
+{% endif %}
diff --git a/pydocx/tests/templates/table.xml b/pydocx/tests/templates/table.xml
new file mode 100644
index 00000000..e47783b6
--- /dev/null
+++ b/pydocx/tests/templates/table.xml
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {% for table_row in table_rows %}
+ {{ table_row }}
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/tc.xml b/pydocx/tests/templates/tc.xml
new file mode 100644
index 00000000..eff9ce0d
--- /dev/null
+++ b/pydocx/tests/templates/tc.xml
@@ -0,0 +1,28 @@
+
+
+
+ {% if merge_continue %}
+
+
+ {% endif %}
+ {% if merge %}
+
+
+ {% endif %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {% if paragraph %}
+ {{ paragraph }}
+ {% endif %}
+
diff --git a/pydocx/tests/templates/text_delete.xml b/pydocx/tests/templates/text_delete.xml
new file mode 100644
index 00000000..783b3ad3
--- /dev/null
+++ b/pydocx/tests/templates/text_delete.xml
@@ -0,0 +1,10 @@
+
+ {% for deleted_text in deleted_texts %}
+
+
+
+
+ {{ deleted_text }}
+
+ {% endfor %}
+
diff --git a/pydocx/tests/templates/tr.xml b/pydocx/tests/templates/tr.xml
new file mode 100644
index 00000000..6e2f6925
--- /dev/null
+++ b/pydocx/tests/templates/tr.xml
@@ -0,0 +1,8 @@
+
+
+
+
+ {% for table_cell in table_cells %}
+ {{ table_cell }}
+ {% endfor %}
+
diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py
new file mode 100644
index 00000000..d7b49b9c
--- /dev/null
+++ b/pydocx/tests/test_docx.py
@@ -0,0 +1,773 @@
+import base64
+
+from os import path
+
+from nose.plugins.skip import SkipTest
+
+from pydocx.tests import assert_html_equal, BASE_HTML
+from pydocx.parsers.Docx2Html import Docx2Html
+from pydocx.DocxParser import ZipFile
+
+
+def convert(path, *args, **kwargs):
+ return Docx2Html(path, *args, **kwargs).parsed
+
+
+def test_extract_html():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'simple.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+ Simple text
+
+
+ - one
+ - two
+ - three
+
+
+
+ Cell1 |
+ Cell2 |
+
+
+ Cell3 |
+ Cell4 |
+
+
+ ''')
+
+
+def test_nested_list():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'nested_lists.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+ - one
+ - two
+ - three
+
+ - AAA
+ - BBB
+ - CCC
+
+ - alpha
+
+
+
+
+ - four
+
+
+ - xxx
+
+ - yyy
+
+
+
+
+ ''')
+
+
+def test_simple_list():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'simple_lists.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+ - One
+
+
+ ''')
+
+
+def test_inline_tags():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'inline_tags.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % (
+ 'This sentence has some bold, '
+ 'some italics and some '
+ 'underline, '
+ 'as well as a hyperlink.
'
+ ))
+
+
+def test_all_configured_styles():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'all_configured_styles.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ aaa
+ bbb
+ ccc
+ ddd
+ eee
+ fff
+ ggg
+ hhh
+ iii
+ ''')
+
+
+def test_super_and_subscript():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'super_and_subscript.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAABBB
+ CCCDDD
+ ''')
+
+
+def test_unicode():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'greek_alphabet.docx',
+ )
+ actual_html = convert(file_path)
+ assert actual_html is not None
+ assert u'\u0391\u03b1' in actual_html
+
+
+def test_special_chars():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'special_chars.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ & < > link
''') # noqa
+
+
+def test_table_col_row_span():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'table_col_row_span.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+
+ AAA |
+
+
+ BBB |
+ CCC |
+
+
+ DDD |
+
+
+
+ EEE
+ |
+ FFF |
+
+
+
+ GGG
+ |
+
+
+
+
+ 1 |
+ 2 |
+ 3 |
+ 4 |
+
+
+ 5 |
+ 6 |
+ 7 |
+
+
+ 8 |
+ 9 |
+
+
+ 10 |
+ 11 |
+ 12 |
+ 13 |
+
+
+ ''')
+
+
+def test_nested_table_rowspan():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'nested_table_rowspan.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+
+ AAA |
+
+
+ BBB |
+
+
+
+ CCC |
+ DDD |
+
+
+ EEE |
+
+
+ |
+
+
+ ''')
+
+
+def test_nested_tables():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'nested_tables.docx',
+ )
+ actual_html = convert(file_path)
+ # Find out why br tag is there.
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+
+ AAA |
+ BBB |
+
+
+ CCC |
+
+
+
+ DDD |
+ EEE |
+
+
+ FFF |
+ GGG |
+
+
+ |
+
+
+ ''')
+
+
+def test_list_in_table():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'list_in_table.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+
+
+
+ - AAA
+ - BBB
+ - CCC
+
+ |
+
+
+ ''')
+
+
+def test_tables_in_lists():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'tables_in_lists.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+ - AAA
+ - BBB
+
+
+ CCC |
+ DDD |
+
+
+ EEE |
+ FFF |
+
+
+
+ - GGG
+
+ ''')
+
+
+def test_track_changes_on():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'track_changes_on.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ This was some content.
+ ''')
+
+
+def test_headers():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'headers.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ This is an H1
+ This is an H2
+ This is an H3
+ This is an H4
+ This is an H5
+ This is an H6
+ This is an H7
+ This is an H8
+ This is an H9
+ This is an H10
+ ''')
+
+
+def test_split_headers():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'split_header.docx',
+ )
+
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAA
BBB
CCC
+ ''')
+
+
+def get_image_data(docx_file_path, image_name):
+ """
+ Return base 64 encoded data for the image_name that is stored in the
+ docx_file_path.
+ """
+ with ZipFile(docx_file_path) as f:
+ images = [
+ e for e in f.infolist()
+ if e.filename == 'word/media/%s' % image_name
+ ]
+ if not images:
+ raise AssertionError('%s not in %s' % (image_name, docx_file_path))
+ data = f.read(images[0].filename)
+ return base64.b64encode(data)
+
+
+def test_has_image():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'has_image.docx',
+ )
+
+ actual_html = convert(file_path)
+ image_data = get_image_data(file_path, 'image1.gif')
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+ AAA
+
+
+ ''' % image_data)
+
+
+def test_local_dpi():
+ # The image in this file does not have a set height or width, show that the
+ # html will generate without it.
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'localDpi.docx',
+ )
+ actual_html = convert(file_path)
+ image_data = get_image_data(file_path, 'image1.jpeg')
+ assert_html_equal(actual_html, BASE_HTML % '''
+ 
+ ''' % image_data)
+
+
+def test_has_image_using_image_handler():
+ raise SkipTest('This needs to be converted to an xml test')
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'has_image.docx',
+ )
+
+ def image_handler(*args, **kwargs):
+ return 'test'
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAA
+ ''')
+
+
+def test_headers_with_full_line_styles():
+ raise SkipTest('This test is not yet passing')
+ # Show that if a natural header is completely bold/italics that
+ # bold/italics will get stripped out.
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'headers_with_full_line_styles.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAA
+ BBB
+ CCC
+ ''')
+
+
+def test_convert_p_to_h():
+ raise SkipTest('This test is not yet passing')
+ # Show when it is correct to convert a p tag to an h tag based on
+ # bold/italics
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'convert_p_to_h.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAA
+ BBB
+ CCC
+
+ - DDD
+ - EEE
+ - FFF
+
+
+
+ GGG |
+ HHH |
+
+
+ III |
+ JJJ |
+
+
+ ''')
+
+
+def test_fake_headings_by_length():
+ raise SkipTest('This test is not yet passing')
+ # Show that converting p tags to h tags has a length limit. If the p tag is
+ # supposed to be converted to an h tag but has more than seven words in the
+ # paragraph do not convert it.
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'fake_headings_by_length.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ Heading.
+ Still a heading.
+
+ This is not a heading because it is too many words.
+
+ ''')
+
+
+def test_shift_enter():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'shift_enter.docx',
+ )
+
+ # Test just the convert without clean_html to make sure the first
+ # break tag is present.
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAA
BBB
+ CCC
+
+ - DDD
EEE
+ - FFF
+
+
+
+ GGG HHH |
+ III JJJ |
+
+
+ KKK |
+ LLL |
+
+
+ ''')
+
+
+def test_lists_with_styles():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'lists_with_styles.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+ - AAA
+ - BBB
+
+ - CCC
+ - DDD
+
+ - EEE
+
+ - FFF
+
+
+
+
+
+
+
+ ''')
+
+
+def test_list_to_header():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'list_to_header.docx',
+ )
+ actual_html = convert(file_path, convert_root_level_upper_roman=True)
+ # It should be noted that list item `GGG` is upper roman in the word
+ # document to show that only top level upper romans get converted.
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAA
+
+ - BBB
+
+ CCC
+
+ - DDD
+
+ EEE
+
+ - FFF
+
+ - GGG
+
+
+
+ ''')
+
+
+def test_has_title():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'has_title.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ Title
+ Text
+ ''')
+
+
+def test_upper_alpha_all_bold():
+ raise SkipTest('This test is not yet passing')
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'upper_alpha_all_bold.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+ AAA
+ BBB
+ CCC
+ ''')
+
+
+def test_simple_table():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'simple_table.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+
+
+ Cell1
+ Cell3
+ |
+ Cell2
+ And I am writing in the table
+ |
+
+
+ Cell4 |
+
+
+ ''')
+
+
+def test_justification():
+ file_path = path.join(
+ path.abspath(path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'justification.docx',
+ )
+ actual_html = convert(file_path)
+ assert_html_equal(actual_html, BASE_HTML % '''
+
+
Center Justified
+
+
+
Right justified
+
+
+
+ Right justified and pushed in from right
+
+
+
+
+ Center justified and pushed in from left and it is
+ great and it is the coolest thing of all time and I like it and
+ I think it is cool
+
+
+
+
+ Left justified and pushed in from left
+
+
+ ''')
+
+
+def _converter(*args, **kwargs):
+ # Having a converter that does nothing is the same as if abiword fails to
+ # convert.
+ pass
+
+
+#def test_converter_broken():
+# file_path = 'test.doc'
+# assert_raises(
+# ConversionFailed,
+# lambda: convert(file_path, converter=_converter),
+# )
+
+
+def test_fall_back():
+ raise SkipTest('This test is not yet passing')
+ file_path = 'test.doc'
+
+ def fall_back(*args, **kwargs):
+ return 'success'
+ html = convert(file_path, fall_back=fall_back, converter=_converter)
+ assert html == 'success'
+
+
+#@mock.patch('docx2html.core.read_html_file')
+#@mock.patch('docx2html.core.get_zip_file_handler')
+#def test_html_files(patch_zip_handler, patch_read):
+def test_html_files():
+ raise SkipTest('This test is not yet passing')
+
+ def raise_assertion(*args, **kwargs):
+ raise AssertionError('Should not have called get_zip_file_handler')
+ #patch_zip_handler.side_effect = raise_assertion
+
+ def return_text(*args, **kwargs):
+ return 'test'
+ #patch_read.side_effect = return_text
+
+ # Try with an html file
+ file_path = 'test.html'
+
+ html = convert(file_path)
+ assert html == 'test'
+
+ # Try again with an htm file.
+ file_path = 'test.htm'
+
+ html = convert(file_path)
+ assert html == 'test'
diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py
new file mode 100644
index 00000000..9c947e53
--- /dev/null
+++ b/pydocx/tests/test_xml.py
@@ -0,0 +1,1551 @@
+# -*- coding: utf-8 -*-
+import os
+import time
+
+from nose.plugins.skip import SkipTest
+
+from pydocx.tests.document_builder import DocxBuilder as DXB
+from pydocx.tests import (
+ XMLDocx2Html,
+ _TranslationTestCase,
+)
+from pydocx.utils import parse_xml_from_string, find_all
+
+
+class BoldTestCase(_TranslationTestCase):
+ expected_output = """
+ AAA
+ BBB
+ CCC
+ """
+ latex_expected_output = r'''
+ \textbf{AAA}'''\
+ + "\n" + '''BBB''' + "\n" + 'CCC'
+
+ def get_xml(self):
+ tags = [
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('AAA')],
+ rpr=DXB.rpr_tag({'b': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('BBB')],
+ rpr=DXB.rpr_tag({'b': 'false'}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('CCC')],
+ rpr=DXB.rpr_tag({'b': '0'}),
+ ),
+ ],
+ ),
+ ]
+
+ body = ''
+ for tag in tags:
+ body += tag
+ xml = DXB.xml(body)
+ return xml
+
+
+class HyperlinkVanillaTestCase(_TranslationTestCase):
+
+ relationship_dict = {
+ 'rId0': 'www.google.com',
+ }
+
+ expected_output = '''
+ link.
+ '''
+
+ latex_expected_output = r'''
+ \href{www.google.com}{link}.
+ '''
+
+ def get_xml(self):
+ run_tags = []
+ run_tags.append(DXB.r_tag([DXB.t_tag('link')]))
+ run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)]
+ run_tags.append(DXB.r_tag([DXB.t_tag('.')]))
+ body = DXB.p_tag(run_tags)
+ xml = DXB.xml(body)
+ return xml
+
+
+class HyperlinkWithMultipleRunsTestCase(_TranslationTestCase):
+ relationship_dict = {
+ 'rId0': 'www.google.com',
+ }
+
+ expected_output = '''
+ link.
+ '''
+
+ latex_expected_output = r'''
+ \href{www.google.com}{link}.
+ '''
+
+ def get_xml(self):
+ run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'link']
+ run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)]
+ run_tags.append(DXB.r_tag([DXB.t_tag('.')]))
+ body = DXB.p_tag(run_tags)
+ xml = DXB.xml(body)
+ return xml
+
+
+class HyperlinkNoTextTestCase(_TranslationTestCase):
+ relationship_dict = {
+ 'rId0': 'www.google.com',
+ }
+
+ expected_output = ''
+
+ latex_expected_output = ''
+
+ def get_xml(self):
+ run_tags = []
+ run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)]
+ body = DXB.p_tag(run_tags)
+ xml = DXB.xml(body)
+ return xml
+
+
+class HyperlinkNotInRelsDictTestCase(_TranslationTestCase):
+ relationship_dict = {
+ # 'rId0': 'www.google.com', missing
+ }
+
+ expected_output = 'link.
'
+
+ latex_expected_output = r'''
+ link.
+ '''
+
+ def get_xml(self):
+ run_tags = []
+ run_tags.append(DXB.r_tag([DXB.t_tag('link')]))
+ run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)]
+ run_tags.append(DXB.r_tag([DXB.t_tag('.')]))
+ body = DXB.p_tag(run_tags)
+ xml = DXB.xml(body)
+ return xml
+
+
+class HyperlinkWithBreakTestCase(_TranslationTestCase):
+ relationship_dict = {
+ 'rId0': 'www.google.com',
+ }
+
+ expected_output = 'link
'
+
+ latex_expected_output = r'''
+ \href{www.google.com}{link\\}
+ '''
+
+ def get_xml(self):
+ run_tags = []
+ run_tags.append(DXB.r_tag([DXB.t_tag('link')]))
+ run_tags.append(DXB.r_tag([DXB.linebreak()]))
+ run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)]
+ body = DXB.p_tag(run_tags)
+ xml = DXB.xml(body)
+ return xml
+
+
+class ImageLocal(_TranslationTestCase):
+ relationship_dict = {
+ 'rId0': 'media/image1.jpeg',
+ 'rId1': 'media/image2.jpeg',
+ }
+ expected_output = '''
+ 
+ 
+ '''
+
+ latex_expected_output = r'''
+ \includegraphics {word/media/image1.jpeg}
+ ''' + '\n' + '''
+ \includegraphics {word/media/image2.jpeg}
+ '''
+
+ def get_xml(self):
+ drawing = DXB.drawing(height=None, width=None, r_id='rId0')
+ pict = DXB.pict(height=None, width=None, r_id='rId1')
+ tags = [
+ drawing,
+ pict,
+ ]
+ body = ''
+ for el in tags:
+ body += el
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class ImageTestCase(_TranslationTestCase):
+ relationship_dict = {
+ 'rId0': 'media/image1.jpeg',
+ 'rId1': 'media/image2.jpeg',
+ }
+ expected_output = '''
+
+
+
+
+
+
+ '''
+
+ latex_expected_output = r'''
+ \includegraphics[height=20pxpt, width=30.0pt]{word/media/image1.jpeg}
+ ''' + '\n' + '''
+ \includegraphics[height=21ptpt, width=41pt]{word/media/image2.jpeg}
+ '''
+
+ def get_xml(self):
+ drawing = DXB.drawing(height=20, width=40, r_id='rId0')
+ pict = DXB.pict(height=21, width=41, r_id='rId1')
+ tags = [
+ drawing,
+ pict,
+ ]
+ body = ''
+ for el in tags:
+ body += el
+
+ xml = DXB.xml(body)
+ return xml
+
+ def test_get_image_id(self):
+ parser = XMLDocx2Html(
+ document_xml=self.get_xml(),
+ rels_dict=self.relationship_dict,
+ )
+ tree = parse_xml_from_string(self.get_xml())
+ els = []
+ els.extend(find_all(tree, 'drawing'))
+ els.extend(find_all(tree, 'pict'))
+ image_ids = []
+ for el in els:
+ image_ids.append(parser._get_image_id(el))
+ expected = [
+ 'rId0',
+ 'rId1',
+ ]
+ self.assertEqual(
+ set(image_ids),
+ set(expected),
+ )
+
+ def test_get_image_sizes(self):
+ parser = XMLDocx2Html(
+ document_xml=self.get_xml(),
+ rels_dict=self.relationship_dict,
+ )
+ tree = parse_xml_from_string(self.get_xml())
+ els = []
+ els.extend(find_all(tree, 'drawing'))
+ els.extend(find_all(tree, 'pict'))
+ image_ids = []
+ for el in els:
+ image_ids.append(parser._get_image_size(el))
+ expected = [
+ ('40px', '20px'),
+ ('41pt', '21pt'),
+ ]
+ self.assertEqual(
+ set(image_ids),
+ set(expected),
+ )
+
+
+class ImageNotInRelsDictTestCase(_TranslationTestCase):
+ relationship_dict = {
+ # 'rId0': 'media/image1.jpeg',
+ }
+ expected_output = ''
+
+ latex_expected_output = ''
+
+ def get_xml(self):
+ drawing = DXB.drawing(height=20, width=40, r_id='rId0')
+ body = drawing
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class ImageNoSizeTestCase(_TranslationTestCase):
+ relationship_dict = {
+ 'rId0': os.path.join(
+ os.path.abspath(os.path.dirname(__file__)),
+ '..',
+ 'fixtures',
+ 'bullet_go_gray.png',
+ )
+ }
+ image_sizes = {
+ 'rId0': (0, 0),
+ }
+ expected_output = '''
+
+
+
+
+
+ ''' % relationship_dict['rId0']
+
+ latex_expected_output = r'\includegraphics{%s}' % relationship_dict['rId0']
+
+ @staticmethod
+ def image_handler(image_id, relationship_dict):
+ return relationship_dict.get(image_id)
+
+ def get_xml(self):
+ raise SkipTest(
+ 'Since we are not using PIL, we do not need this test yet.',
+ )
+ drawing = DXB.drawing('rId0')
+ tags = [
+ drawing,
+ ]
+ body = ''
+ for el in tags:
+ body += el
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class TableTag(_TranslationTestCase):
+ expected_output = '''
+
+
+ AAA |
+ BBB |
+
+
+ CCC |
+ DDD |
+
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{tabular}{ll}
+ {AAA} & {BBB} \\
+ {CCC} & {DDD} \\
+ \end{tabular}
+ '''
+
+ def get_xml(self):
+ cell1 = DXB.table_cell(paragraph=DXB.p_tag('AAA'))
+ cell2 = DXB.table_cell(paragraph=DXB.p_tag('CCC'))
+ cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB'))
+ cell4 = DXB.table_cell(paragraph=DXB.p_tag('DDD'))
+ rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])]
+ table = DXB.table(rows)
+ body = table
+ xml = DXB.xml(body)
+ return xml
+
+
+class RowSpanTestCase(_TranslationTestCase):
+
+ expected_output = '''
+
+
+ AAA |
+ BBB |
+
+
+ CCC |
+
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{tabular}{ll}
+ \multirow{2}{*}{AAA} & {BBB} \\
+ & {CCC} \\
+ \end{tabular}
+ '''
+
+ def get_xml(self):
+ cell1 = DXB.table_cell(
+ paragraph=DXB.p_tag('AAA'), merge=True, merge_continue=False)
+ cell2 = DXB.table_cell(
+ paragraph=DXB.p_tag(None), merge=False, merge_continue=True)
+ cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB'))
+ cell4 = DXB.table_cell(paragraph=DXB.p_tag('CCC'))
+ rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])]
+ table = DXB.table(rows)
+ body = table
+ xml = DXB.xml(body)
+ return xml
+
+
+class NestedTableTag(_TranslationTestCase):
+ expected_output = '''
+
+
+ AAA |
+ BBB |
+
+
+ CCC |
+
+
+
+ DDD |
+ EEE |
+
+
+ FFF |
+ GGG |
+
+
+ |
+
+
+ '''
+
+ latex_expected_output = r'''\begin{tabular}{ll}
+ {AAA} & {BBB} \\
+ {CCC} & {
+ \begin{tabular}{ll}
+ {DDD} & {EEE} \\
+ {FFF} & {GGG} \\
+ \end{tabular}
+ } \\
+ \end{tabular}'''
+
+ def get_xml(self):
+ cell1 = DXB.table_cell(paragraph=DXB.p_tag('DDD'))
+ cell2 = DXB.table_cell(paragraph=DXB.p_tag('FFF'))
+ cell3 = DXB.table_cell(paragraph=DXB.p_tag('EEE'))
+ cell4 = DXB.table_cell(paragraph=DXB.p_tag('GGG'))
+ rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])]
+ nested_table = DXB.table(rows)
+ cell1 = DXB.table_cell(paragraph=DXB.p_tag('AAA'))
+ cell2 = DXB.table_cell(paragraph=DXB.p_tag('CCC'))
+ cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB'))
+ cell4 = DXB.table_cell(nested_table)
+ rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])]
+ table = DXB.table(rows)
+ body = table
+ xml = DXB.xml(body)
+ return xml
+
+
+class TableWithInvalidTag(_TranslationTestCase):
+ expected_output = '''
+
+
+ AAA |
+ BBB |
+
+
+ |
+ DDD |
+
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{tabular}{ l l }
+ {AAA} & {BBB} \\
+ {} & {DDD} \\
+ \end{tabular}
+ '''
+
+ def get_xml(self):
+ cell1 = DXB.table_cell(paragraph=DXB.p_tag('AAA'))
+ cell2 = DXB.table_cell('CCC')
+ cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB'))
+ cell4 = DXB.table_cell(paragraph=DXB.p_tag('DDD'))
+ rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])]
+ table = DXB.table(rows)
+ body = table
+ xml = DXB.xml(body)
+ return xml
+
+
+class TableWithListAndParagraph(_TranslationTestCase):
+ expected_output = '''
+
+
+
+
+ - AAA
+ - BBB
+
+ CCC
+ DDD
+ |
+
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{tabular}{p{3cm}}
+ \parbox{20cm}{\begin{enumerate} \item AAA
+ \item BBB
+ \end{enumerate}CCC\\DDD} \\
+ \end{tabular}'''
+
+ def get_xml(self):
+ li_text = [
+ ('AAA', 0, 1),
+ ('BBB', 0, 1),
+ ]
+ lis = ''
+ for text, ilvl, numId in li_text:
+ lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+ els = [
+ lis,
+ DXB.p_tag('CCC'),
+ DXB.p_tag('DDD'),
+ ]
+ td = ''
+ for el in els:
+ td += el
+ cell1 = DXB.table_cell(td)
+ row = DXB.table_row([cell1])
+ table = DXB.table([row])
+ body = table
+ xml = DXB.xml(body)
+ return xml
+
+
+class SimpleListTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+ - BBB
+ - CCC
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA
+ \item BBB
+ \item CCC
+ \end {enumerate}
+ '''
+ # Ensure its not failing somewhere and falling back to decimal
+ numbering_dict = {
+ '1': {
+ '0': 'lowerLetter',
+ }
+ }
+
+ def get_xml(self):
+ li_text = [
+ ('AAA', 0, 1),
+ ('BBB', 0, 1),
+ ('CCC', 0, 1),
+ ]
+ lis = ''
+ for text, ilvl, numId in li_text:
+ lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+
+ xml = DXB.xml(lis)
+ return xml
+
+
+class SingleListItemTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+
+ '''
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA
+ \end {enumerate}
+ '''
+
+ # Ensure its not failing somewhere and falling back to decimal
+ numbering_dict = {
+ '1': {
+ '0': 'lowerLetter',
+ }
+ }
+
+ def get_xml(self):
+ li_text = [
+ ('AAA', 0, 1),
+ ]
+ lis = ''
+ for text, ilvl, numId in li_text:
+ lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+
+ xml = DXB.xml(lis)
+ return xml
+
+
+class ListWithContinuationTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
BBB
+ - CCC
+
+
+ DDD |
+ EEE |
+
+
+ FFF |
+ GGG |
+
+
+
+ - HHH
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA \\ BBB
+ \item CCC
+ \begin{tabular} {ll}
+ {DDD} & {EEE} \\
+ {FFF} & {GGG} \\
+ \end{tabular}
+ \item HHH
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ cell1 = DXB.table_cell(paragraph=DXB.p_tag('DDD'))
+ cell2 = DXB.table_cell(paragraph=DXB.p_tag('FFF'))
+ cell3 = DXB.table_cell(paragraph=DXB.p_tag('EEE'))
+ cell4 = DXB.table_cell(paragraph=DXB.p_tag('GGG'))
+ rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])]
+ table = DXB.table(rows)
+ tags = [
+ DXB.li(text='AAA', ilvl=0, numId=1),
+ DXB.p_tag('BBB'),
+ DXB.li(text='CCC', ilvl=0, numId=1),
+ table,
+ DXB.li(text='HHH', ilvl=0, numId=1),
+ ]
+ body = ''
+ for el in tags:
+ body += el
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class ListWithMultipleContinuationTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+
+
+
+ - DDD
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA
+ \begin{tabular} {l}
+ {BBB}\\
+ \end{tabular}
+ \begin{tabular} {l}
+ {CCC}\\
+ \end{tabular}
+ \item DDD
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ cell = DXB.table_cell(paragraph=DXB.p_tag('BBB'))
+ row = DXB.table_row([cell])
+ table1 = DXB.table([row])
+ cell = DXB.table_cell(paragraph=DXB.p_tag('CCC'))
+ row = DXB.table_row([cell])
+ table2 = DXB.table([row])
+ tags = [
+ DXB.li(text='AAA', ilvl=0, numId=1),
+ table1,
+ table2,
+ DXB.li(text='DDD', ilvl=0, numId=1),
+ ]
+ body = ''
+ for el in tags:
+ body += el
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class MangledIlvlTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+
+
+ - BBB
+
+ - CCC
+
+
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA
+ \end{enumerate}
+ \begin{enumerate}
+ \item BBB
+ \begin{enumerate}
+ \item CCC
+ \end{enumerate}
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ li_text = [
+ ('AAA', 0, 2),
+ ('BBB', 1, 1),
+ ('CCC', 0, 1),
+ ]
+ lis = ''
+ for text, ilvl, numId in li_text:
+ lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+
+ xml = DXB.xml(lis)
+ return xml
+
+
+class SeperateListsTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+
+
+ - BBB
+
+
+ - CCC
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA
+ \end{enumerate}
+ \begin{enumerate}
+ \item BBB
+ \end{enumerate}
+ \begin{enumerate}
+ \item CCC
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ li_text = [
+ ('AAA', 0, 2),
+ # Because AAA and CCC are part of the same list (same list id)
+ # and BBB is different, these need to be split into three
+ # lists (or lose everything from BBB and after.
+ ('BBB', 0, 1),
+ ('CCC', 0, 2),
+ ]
+ lis = ''
+ for text, ilvl, numId in li_text:
+ lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+
+ xml = DXB.xml(lis)
+ return xml
+
+
+class InvalidIlvlOrderTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+
+ - BBB
+
+ - CCC
+
+
+
+
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA
+ \begin{enumerate}
+ \item BBB
+ \begin{enumerate}
+ \item CCC
+ \end {enumerate}
+ \end{enumerate}
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ tags = [
+ DXB.li(text='AAA', ilvl=1, numId=1),
+ DXB.li(text='BBB', ilvl=3, numId=1),
+ DXB.li(text='CCC', ilvl=2, numId=1),
+ ]
+ body = ''
+ for el in tags:
+ body += el
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class DeeplyNestedTableTestCase(_TranslationTestCase):
+ expected_output = ''
+ run_expected_output = False
+
+ def get_xml(self):
+ paragraph = DXB.p_tag('AAA')
+
+ for _ in range(50):
+ cell = DXB.table_cell(paragraph)
+ row = DXB.table_cell([cell])
+ table = DXB.table([row])
+ body = table
+ xml = DXB.xml(body)
+ return xml
+
+ def test_performance(self):
+ with self.toggle_run_expected_output():
+ start_time = time.time()
+ try:
+ self.test_expected_output()
+ except AssertionError:
+ pass
+ end_time = time.time()
+ total_time = end_time - start_time
+ # This finishes in under a second on python 2.7
+ assert total_time < 3, total_time
+
+
+class NonStandardTextTagsTestCase(_TranslationTestCase):
+ expected_output = '''
+ insert
+ smarttag
+ '''
+
+ latex_expected_output = r'''
+ \added[id=, remark=]{insert} smarttag
+ '''
+
+ def get_xml(self):
+ run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'insert ']
+ insert_tag = DXB.insert_tag(run_tags)
+ run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'smarttag']
+ smart_tag = DXB.smart_tag(run_tags)
+
+ run_tags = [insert_tag, smart_tag]
+ body = DXB.p_tag(run_tags)
+ xml = DXB.xml(body)
+ return xml
+
+
+class RTagWithNoText(_TranslationTestCase):
+ expected_output = ''
+ latex_expected_output = ''
+
+ def get_xml(self):
+ p_tag = DXB.p_tag(None) # No text
+ run_tags = [p_tag]
+ # The bug is only present in a hyperlink
+ run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)]
+ body = DXB.p_tag(run_tags)
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class DeleteTagInList(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+ BBB
+
+ - CCC
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA \deleted[id=, remark=]{BBB}
+ \item CCC
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ delete_tags = DXB.delete_tag(['BBB'])
+ p_tag = DXB.p_tag([delete_tags])
+
+ body = DXB.li(text='AAA', ilvl=0, numId=0)
+ body += p_tag
+ body += DXB.li(text='CCC', ilvl=0, numId=0)
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class InsertTagInList(_TranslationTestCase):
+ expected_output = '''
+
+ - AAABBB
+
+ - CCC
+
+ '''
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA\added[id=,remark=]{BBB}
+ \item CCC
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB']
+ insert_tags = DXB.insert_tag(run_tags)
+ p_tag = DXB.p_tag([insert_tags])
+
+ body = DXB.li(text='AAA', ilvl=0, numId=0)
+ body += p_tag
+ body += DXB.li(text='CCC', ilvl=0, numId=0)
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class SmartTagInList(_TranslationTestCase):
+ expected_output = '''
+
+ - AAABBB
+
+ - CCC
+
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAABBB
+ \item CCC
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB']
+ smart_tag = DXB.smart_tag(run_tags)
+ p_tag = DXB.p_tag([smart_tag])
+
+ body = DXB.li(text='AAA', ilvl=0, numId=0)
+ body += p_tag
+ body += DXB.li(text='CCC', ilvl=0, numId=0)
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class SingleListItem(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+
+ BBB
+ '''
+
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA
+ \end{enumerate}''' + '\n' + 'BBB'
+
+ numbering_dict = {
+ '1': {
+ '0': 'lowerLetter',
+ }
+ }
+
+ def get_xml(self):
+ li = DXB.li(text='AAA', ilvl=0, numId=1)
+ p_tags = [
+ DXB.p_tag('BBB'),
+ ]
+ body = li
+ for p_tag in p_tags:
+ body += p_tag
+ xml = DXB.xml(body)
+ return xml
+
+
+class SimpleTableTest(_TranslationTestCase):
+ expected_output = '''
+
+
+ Blank |
+ Column 1 |
+ Column 2 |
+
+
+ Row 1 |
+ First |
+ Second |
+
+
+ Row 2 |
+ Third |
+ Fourth |
+
+
'''
+
+ latex_expected_output = r'''
+ \begin{tabular} { lll }
+ {Blank} & {Column 1} & {Column 2} \\
+ {Row 1} & {First} & {Second} \\
+ {Row 2} & {Third} & {Fourth} \\
+ \end{tabular}'''
+
+ def get_xml(self):
+ cell1 = DXB.table_cell(paragraph=DXB.p_tag('Blank'))
+ cell2 = DXB.table_cell(paragraph=DXB.p_tag('Row 1'))
+ cell3 = DXB.table_cell(paragraph=DXB.p_tag('Row 2'))
+ cell4 = DXB.table_cell(paragraph=DXB.p_tag('Column 1'))
+ cell5 = DXB.table_cell(paragraph=DXB.p_tag('First'))
+ cell6 = DXB.table_cell(paragraph=DXB.p_tag('Third'))
+ cell7 = DXB.table_cell(paragraph=DXB.p_tag('Column 2'))
+ cell8 = DXB.table_cell(paragraph=DXB.p_tag('Second'))
+ cell9 = DXB.table_cell(paragraph=DXB.p_tag('Fourth'))
+ rows = [DXB.table_row([cell1, cell4, cell7]),
+ DXB.table_row([cell2, cell5, cell8]),
+ DXB.table_row([cell3, cell6, cell9])]
+ table = DXB.table(rows)
+ body = table
+ xml = DXB.xml(body)
+ return xml
+
+
+class MissingIlvl(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+ BBB
+
+ - CCC
+
+ '''
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAA \\
+ BBB
+ \item CCC
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ li_text = [
+ ('AAA', 0, 1),
+ ('BBB', None, 1), # Because why not.
+ ('CCC', 0, 1),
+ ]
+ lis = ''
+ for text, ilvl, numId in li_text:
+ lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+ body = lis
+ xml = DXB.xml(body)
+ return xml
+
+
+class SameNumIdInTable(_TranslationTestCase):
+ expected_output = '''
+
+ - AAA
+
+
+ - CCC
+
+ '''
+ latex_expected_output = r'''
+ \begin{enumerate} \item AAA
+ \begin{tabular}{p{3cm}}
+ {\begin{enumerate} \item BBB
+ \end{enumerate}} \\
+ \end{tabular}
+ \item CCC
+ \end{enumerate}
+ '''
+
+ # Ensure its not failing somewhere and falling back to decimal
+ numbering_dict = {
+ '1': {
+ '0': 'lowerLetter',
+ }
+ }
+
+ def get_xml(self):
+ li_text = [
+ ('BBB', 0, 1),
+ ]
+ lis = ''
+ for text, ilvl, numId in li_text:
+ lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+ cell1 = DXB.table_cell(lis)
+ rows = DXB.table_row([cell1])
+ table = DXB.table([rows])
+ lis = ''
+ lis += DXB.li(text='AAA', ilvl=0, numId=1)
+ lis += table
+ lis += DXB.li(text='CCC', ilvl=0, numId=1)
+ body = lis
+ xml = DXB.xml(body)
+ return xml
+
+
+class SDTTestCase(_TranslationTestCase):
+ expected_output = '''
+
+ - AAABBB
+
+ - CCC
+
+ '''
+ latex_expected_output = r'''
+ \begin{enumerate}
+ \item AAABBB
+ \item CCC
+ \end{enumerate}
+ '''
+
+ def get_xml(self):
+ body = ''
+ body += DXB.li(text='AAA', ilvl=0, numId=0)
+ body += DXB.sdt_tag(p_tag=DXB.p_tag(text='BBB'))
+ body += DXB.li(text='CCC', ilvl=0, numId=0)
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class HeadingTestCase(_TranslationTestCase):
+ expected_output = '''
+ AAA
+ BBB
+ CCC
+ DDD
+ EEE
+ GGG
+ HHH
+ '''
+
+ latex_expected_output = r'''\section{AAA}
+ ''' + '\n' + '''
+ \subsection{BBB}
+ ''' + '\n' + '''
+ \paragraph{CCC}
+ ''' + '\n' + '''
+ \subparagraph{DDD}
+ ''' + '\n' + '''
+ EEE
+ ''' + '\n' + '''
+ GGG
+ ''' + '\n' + '''
+ HHH
+ '''
+
+ styles_dict = {
+ 'style0': 'heading 1',
+ 'style1': 'heading 2',
+ 'style2': 'heading 3',
+ 'style3': 'heading 4',
+ 'style4': 'heading 5',
+ 'style5': 'heading 6',
+ }
+
+ def get_xml(self):
+ p_tags = [
+ DXB.p_tag(text='AAA', style='style0'),
+ DXB.p_tag(text='BBB', style='style1'),
+ DXB.p_tag(text='CCC', style='style2'),
+ DXB.p_tag(text='DDD', style='style3'),
+ DXB.p_tag(text='EEE', style='style4'),
+ DXB.p_tag(text='GGG', style='style5'),
+ DXB.p_tag(text='HHH', style='garbage'),
+ ]
+ body = ''
+ for tag in p_tags:
+ body += tag
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class RomanNumeralToHeadingTestCase(_TranslationTestCase):
+ convert_root_level_upper_roman = True
+ numbering_dict = {
+ '1': {
+ '0': 'upperRoman',
+ '1': 'decimal',
+ '2': 'upperRoman',
+ },
+ '2': {
+ '0': 'upperRoman',
+ '1': 'decimal',
+ '2': 'upperRoman',
+ },
+ '3': {
+ '0': 'upperRoman',
+ '1': 'decimal',
+ '2': 'upperRoman',
+ },
+ }
+ expected_output = '''
+ AAA
+
+ - BBB
+
+ CCC
+
+ - DDD
+
+ EEE
+
+ - FFF
+
+ - GGG
+
+
+
+ '''
+
+ latex_expected_output = r'''
+ \subsection{AAA}\begin{enumerate} \item BBB
+ \end{enumerate}\subsection{CCC}\begin{enumerate} \item DDD
+ \end{enumerate}\subsection{EEE}\begin{enumerate}
+ \item FFF\begin{enumerate} \item GGG
+ \end{enumerate}
+ \end{enumerate}'''
+
+ def get_xml(self):
+ li_text = [
+ ('AAA', 0, 1),
+ ('BBB', 1, 1),
+ ('CCC', 0, 2),
+ ('DDD', 1, 2),
+ ('EEE', 0, 3),
+ ('FFF', 1, 3),
+ ('GGG', 2, 3),
+ ]
+ body = ''
+ for text, ilvl, numId in li_text:
+ body += DXB.li(text=text, ilvl=ilvl, numId=numId)
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class MultipleTTagsInRTag(_TranslationTestCase):
+ expected_output = '''
+ ABC
+ '''
+ latex_expected_output = 'ABC'
+
+ def get_xml(self):
+ r_tag = DXB.r_tag(
+ [DXB.t_tag(letter) for letter in 'ABC'],
+ )
+ p_tag = DXB.p_tag(
+ [r_tag],
+ jc='start',
+ )
+ body = p_tag
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class SuperAndSubScripts(_TranslationTestCase):
+ expected_output = '''
+ AAABBB
+ CCCDDD
+ '''
+
+ latex_expected_output = r'''
+ AAA \textsuperscript{BBB}
+ ''' + '\n' + r'\textsubscript{CCC} DDD'
+
+ def get_xml(self):
+ p_tags = [
+ DXB.p_tag(
+ [
+ DXB.r_tag([DXB.t_tag('AAA')]),
+ DXB.r_tag(
+ [DXB.t_tag('BBB')],
+ rpr=DXB.rpr_tag({'vertAlign': 'superscript'}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('CCC')],
+ rpr=DXB.rpr_tag({'vertAlign': 'subscript'}),
+ ),
+ DXB.r_tag([DXB.t_tag('DDD')]),
+ ],
+ ),
+ ]
+ body = ''
+ for p_tag in p_tags:
+ body += p_tag
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class AvaliableInlineTags(_TranslationTestCase):
+ expected_output = '''
+ aaa
+ bbb
+ ccc
+ ddd
+ eee
+ fff
+ ggg
+ hhh
+ iii
+ jjj
+ '''
+
+ latex_expected_output = r'''\textbf {aaa}
+ \underline {bbb}
+ \emph {ccc}
+ \MakeUppercase{ddd}
+ \textsx{eee}
+ \sout{fff}
+ \sout{ggg}
+ \begin{comment}hhh\end{comment}
+ \begin{comment}iii\end{comment}
+ \textsuperscript{jjj}
+ '''
+
+ def get_xml(self):
+ p_tags = [
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('aaa')],
+ rpr=DXB.rpr_tag({'b': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('bbb')],
+ rpr=DXB.rpr_tag({'u': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('ccc')],
+ rpr=DXB.rpr_tag({'i': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('ddd')],
+ rpr=DXB.rpr_tag({'caps': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('eee')],
+ rpr=DXB.rpr_tag({'smallCaps': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('fff')],
+ rpr=DXB.rpr_tag({'strike': None})
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('ggg')],
+ rpr=DXB.rpr_tag({'dstrike': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('hhh')],
+ rpr=DXB.rpr_tag({'vanish': None})
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('iii')],
+ rpr=DXB.rpr_tag({'webHidden': None}),
+ ),
+ ],
+ ),
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag('jjj')],
+ rpr=DXB.rpr_tag({'vertAlign': 'superscript'}),
+ ),
+ ],
+ ),
+ ]
+ body = ''
+ for p_tag in p_tags:
+ body += p_tag
+
+ xml = DXB.xml(body)
+ return xml
+
+
+class Math(_TranslationTestCase):
+
+ expected_output = '''
+
+ '''
+ latex_expected_output = r'''
+ $\sqrt{1+3}$
+ '''
+
+ def get_xml(self):
+ deg = DXB.deg()
+ exp = DXB.exp(run_text=DXB.r_tag([DXB.t_tag('1+3')]))
+ rad = DXB.rad(deg=deg, exp=exp)
+ math = DXB.math(rad=rad)
+ math_para_tag = DXB.math_paragraph(math)
+ xml = DXB.xml(math_para_tag)
+ return xml
+
+
+class UnicodeTestCase(_TranslationTestCase):
+ expected_output = u"""
+ \U0010001f
+ """
+ latex_expected_output = '''
+
+ '''
+
+ def get_xml(self):
+ tags = [
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag(r'')],
+ ),
+ ],
+ ),
+ ]
+
+ body = ''
+ for tag in tags:
+ body += tag
+ xml = DXB.xml(body)
+ return xml.encode('utf-8')
+
+
+class MatrixTestCase(_TranslationTestCase):
+
+ expected_output = '''
+
+ '''
+
+ latex_expected_output = r'''
+ $\begin{matrix} 1&2
+ \\3&4\\ \end{matrix}$
+ '''
+
+ def get_xml(self):
+ matrix_cell1 = DXB.exp(run_text=DXB.r_tag([DXB.t_tag('1')]))
+ matrix_cell2 = DXB.exp(run_text=DXB.r_tag([DXB.t_tag('2')]))
+ matrix_cell3 = DXB.exp(run_text=DXB.r_tag([DXB.t_tag('3')]))
+ matrix_cell4 = DXB.exp(run_text=DXB.r_tag([DXB.t_tag('4')]))
+ row_1 = DXB.matrix_row([matrix_cell1, matrix_cell2])
+ row_2 = DXB.matrix_row([matrix_cell3, matrix_cell4])
+ matrix = DXB.matrix([row_1, row_2])
+ math = DXB.math(matrix)
+ para = DXB.math_paragraph(math)
+ return DXB.xml(para)
+
+
+class NoTextInTTagTestCase(_TranslationTestCase):
+ expected_output = u"""
+ """
+ latex_expected_output = ''
+
+ def get_xml(self):
+ tags = [
+ DXB.p_tag(
+ [
+ DXB.r_tag(
+ [DXB.t_tag(None)],
+ ),
+ ],
+ ),
+ ]
+
+ body = ''
+ for tag in tags:
+ body += tag
+ xml = DXB.xml(body)
+ return xml.encode('utf-8')
diff --git a/pydocx/utils.py b/pydocx/utils.py
new file mode 100644
index 00000000..29b9da54
--- /dev/null
+++ b/pydocx/utils.py
@@ -0,0 +1,471 @@
+import re
+
+from collections import defaultdict
+from xml.etree import cElementTree
+
+from pydocx.exceptions import MalformedDocxException
+
+
+UPPER_ROMAN_TO_HEADING_VALUE = 'h2'
+TAGS_CONTAINING_CONTENT = (
+ 't',
+ 'pict',
+ 'drawing',
+ 'delText',
+ 'ins',
+)
+TAGS_HOLDING_CONTENT_TAGS = (
+ 'p',
+ 'tbl',
+ 'sdt',
+)
+
+
+def el_iter(el):
+ """
+ Go through all elements
+ """
+ try:
+ return el.iter()
+ except AttributeError:
+ return el.findall('.//*')
+
+
+def find_first(el, tag):
+ """
+ Find the first occurrence of a tag beneath the current element.
+ """
+ return el.find('.//' + tag)
+
+
+def find_all(el, tag):
+ """
+ Find all occurrences of a tag
+ """
+ return el.findall('.//' + tag)
+
+
+def find_ancestor_with_tag(pre_processor, el, tag):
+ """
+ Find the first ancestor with that is a `tag`.
+ """
+ while pre_processor.parent(el) is not None:
+ el = pre_processor.parent(el)
+ if el.tag == tag:
+ return el
+ return None
+
+
+def has_descendant_with_tag(el, tag):
+ """
+ Determine if there is a child ahead in the element tree.
+ """
+ # Get child. stop at first child.
+ return True if el.find('.//' + tag) is not None else False
+
+
+def _filter_children(element, tags):
+ return [
+ el for el in element.getchildren()
+ if el.tag in tags
+ ]
+
+
+def remove_namespaces(document):
+ """
+ >>> exception_raised = False
+ >>> try:
+ ... remove_namespaces('junk')
+ ... except MalformedDocxException:
+ ... exception_raised = True
+ >>> assert exception_raised
+ """
+ encoding_regex = re.compile(
+ r'<\?xml.*encoding="(.+?)"',
+ re.DOTALL | re.MULTILINE,
+ )
+ encoding = 'us-ascii'
+ m = encoding_regex.match(document)
+ if m:
+ encoding = m.groups(0)[0]
+ try:
+ root = cElementTree.fromstring(document)
+ except SyntaxError:
+ raise MalformedDocxException('This document cannot be converted.')
+ for child in el_iter(root):
+ child.tag = child.tag.split("}")[1]
+ child.attrib = dict(
+ (k.split("}")[-1], v)
+ for k, v in child.attrib.items()
+ )
+ return cElementTree.tostring(root, encoding=encoding)
+
+
+def get_list_style(numbering_root, num_id, ilvl):
+ # This is needed on both the custom lxml parser and the pydocx parser. So
+ # make it a function.
+ ids = find_all(numbering_root, 'num')
+ for _id in ids:
+ if _id.attrib['numId'] != num_id:
+ continue
+ abstractid = _id.find('abstractNumId')
+ abstractid = abstractid.attrib['val']
+ style_information = find_all(
+ numbering_root,
+ 'abstractNum',
+ )
+ for info in style_information:
+ if info.attrib['abstractNumId'] == abstractid:
+ for i in el_iter(info):
+ if (
+ 'ilvl' in i.attrib and
+ i.attrib['ilvl'] != ilvl):
+ continue
+ if i.find('numFmt') is not None:
+ return i.find('numFmt').attrib['val']
+
+
+class NamespacedNumId(object):
+ def __init__(self, num_id, num_tables, *args, **kwargs):
+ self._num_id = num_id
+ self._num_tables = num_tables
+
+ def __unicode__(self, *args, **kwargs):
+ return '%s:%d' % (
+ self._num_id,
+ self._num_tables,
+ )
+
+ def __repr__(self, *args, **kwargs):
+ return self.__unicode__(*args, **kwargs)
+
+ def __eq__(self, other):
+ if other is None:
+ return False
+ return repr(self) == repr(other)
+
+ def __ne__(self, other):
+ if other is None:
+ return False
+ return repr(self) != repr(other)
+
+ @property
+ def num_id(self):
+ return self._num_id
+
+
+class PydocxPrePorcessor(object):
+ def __init__(
+ self,
+ convert_root_level_upper_roman=False,
+ styles_dict=None,
+ numbering_root=None,
+ *args, **kwargs):
+ self.meta_data = defaultdict(dict)
+ self.convert_root_level_upper_roman = convert_root_level_upper_roman
+ self.styles_dict = styles_dict
+ self.numbering_root = numbering_root
+
+ def perform_pre_processing(self, root, *args, **kwargs):
+ self._add_parent(root)
+ self._set_list_attributes(root)
+ self._set_table_attributes(root)
+ self._set_matrix_attributes(root)
+ self._set_is_in_table(root)
+
+ body = find_first(root, 'body')
+ p_elements = [
+ child for child in find_all(body, 'p')
+ ]
+ list_elements = [
+ child for child in p_elements
+ if self.is_list_item(child)
+ ]
+ # Find the first and last li elements
+ num_ids = set([self.num_id(i) for i in list_elements])
+ ilvls = set([self.ilvl(i) for i in list_elements])
+ self._set_first_list_item(num_ids, ilvls, list_elements)
+ self._set_last_list_item(num_ids, list_elements)
+
+ self._set_headers(p_elements)
+ self._convert_upper_roman(body)
+ self._set_next(body)
+
+ def is_first_list_item(self, el):
+ return self.meta_data[el].get('is_first_list_item', False)
+
+ def is_last_list_item_in_root(self, el):
+ return self.meta_data[el].get('is_last_list_item_in_root', False)
+
+ def is_list_item(self, el):
+ return self.meta_data[el].get('is_list_item', False)
+
+ def num_id(self, el):
+ if not self.is_list_item(el):
+ return None
+ return self.meta_data[el].get('num_id')
+
+ def ilvl(self, el):
+ if not self.is_list_item(el):
+ return None
+ return self.meta_data[el].get('ilvl')
+
+ def heading_level(self, el):
+ return self.meta_data[el].get('heading_level')
+
+ def is_in_table(self, el):
+ return self.meta_data[el].get('is_in_table')
+
+ def is_last_row_item(self, el):
+ return self.meta_data[el].get('is_last_row_item')
+
+ def row_index(self, el):
+ return self.meta_data[el].get('row_index')
+
+ def column_index(self, el):
+ return self.meta_data[el].get('column_index')
+
+ def is_last_matrix_row_item(self, el):
+ return self.meta_data[el].get('is_last_matrix_row_item')
+
+ def matrix_row_index(self, el):
+ return self.meta_data[el].get('matrix_row_index')
+
+ def matrix_column_index(self, el):
+ return self.meta_data[el].get('matrix_column_index')
+
+ def vmerge_continue(self, el):
+ return self.meta_data[el].get('vmerge_continue')
+
+ def next(self, el):
+ if el not in self.meta_data:
+ return
+ return self.meta_data[el].get('next')
+
+ def previous(self, el):
+ if el not in self.meta_data:
+ return
+ return self.meta_data[el].get('previous')
+
+ def parent(self, el):
+ return self.meta_data[el].get('parent')
+
+ def _add_parent(self, el): # if a parent, make that an attribute
+ for child in el.getchildren():
+ self.meta_data[child]['parent'] = el
+ self._add_parent(child)
+
+ def _set_list_attributes(self, el):
+ list_elements = find_all(el, 'numId')
+ for li in list_elements:
+ parent = find_ancestor_with_tag(self, li, 'p')
+ # Deleted text in a list will have a numId but no ilvl.
+ if parent is None:
+ continue
+ if find_first(parent, 'ilvl') is None:
+ continue
+ self.meta_data[parent]['is_list_item'] = True
+ self.meta_data[parent]['num_id'] = self._generate_num_id(parent)
+ self.meta_data[parent]['ilvl'] = find_first(
+ parent,
+ 'ilvl',
+ ).attrib['val']
+
+ def _generate_num_id(self, el):
+ '''
+ Fun fact: It is possible to have a list in the root, that holds a table
+ that holds a list and for both lists to have the same numId. When this
+ happens we should namespace the nested list with the number of tables
+ it is in to ensure it is considered a new list. Otherwise all sorts of
+ terrible html gets generated.
+ '''
+ num_id = find_first(el, 'numId').attrib['val']
+
+ # First, go up the parent until we get None and count the number of
+ # tables there are.
+ num_tables = 0
+ while self.parent(el) is not None:
+ if el.tag == 'tbl':
+ num_tables += 1
+ el = self.parent(el)
+ return NamespacedNumId(
+ num_id=num_id,
+ num_tables=num_tables,
+ )
+
+ def _set_first_list_item(self, num_ids, ilvls, list_elements):
+ # Lists are grouped by having the same `num_id` and `ilvl`. The first
+ # list item is the first list item found for each `num_id` and `ilvl`
+ # combination.
+ for num_id in num_ids:
+ for ilvl in ilvls:
+ filtered_list_elements = [
+ i for i in list_elements
+ if (
+ self.num_id(i) == num_id and
+ self.ilvl(i) == ilvl
+ )
+ ]
+ if not filtered_list_elements:
+ continue
+ first_el = filtered_list_elements[0]
+ self.meta_data[first_el]['is_first_list_item'] = True
+
+ def _set_last_list_item(self, num_ids, list_elements):
+ # Find last list elements. Only mark list tags as the last list tag if
+ # it is in the root of the document. This is only used to ensure that
+ # once a root level list is finished we do not roll in the rest of the
+ # non list elements into the first root level list.
+ for num_id in num_ids:
+ filtered_list_elements = [
+ i for i in list_elements
+ if self.num_id(i) == num_id
+ ]
+ if not filtered_list_elements:
+ continue
+ last_el = filtered_list_elements[-1]
+ self.meta_data[last_el]['is_last_list_item_in_root'] = True
+
+ def _set_matrix_attributes(self, el):
+ matrices = find_all(el, 'm')
+ for matrix in matrices:
+ rows = _filter_children(matrix, ['mr'])
+ if rows is None:
+ continue
+ for i, row in enumerate(rows):
+ tcs = _filter_children(row, ['e'])
+ self.meta_data[tcs[-1]]['is_last_matrix_row_item'] = True
+ for j, child in enumerate(tcs):
+ self.meta_data[child]['matrix_row_index'] = i
+ self.meta_data[child]['matrix_column_index'] = j
+
+ def _set_table_attributes(self, el):
+ tables = find_all(el, 'tbl')
+ for table in tables:
+ rows = _filter_children(table, ['tr'])
+ if rows is None:
+ continue
+ for i, row in enumerate(rows):
+ tcs = _filter_children(row, ['tc'])
+ self.meta_data[tcs[-1]]['is_last_row_item'] = True
+ for j, child in enumerate(tcs):
+ self.meta_data[child]['row_index'] = i
+ self.meta_data[child]['column_index'] = j
+ v_merge = find_first(child, 'vMerge')
+ if (
+ v_merge is not None and
+ ('continue' == v_merge.get('val', '') or
+ v_merge.attrib == {})
+ ):
+ self.meta_data[child]['vmerge_continue'] = True
+
+ def _set_is_in_table(self, el):
+ paragraph_elements = find_all(el, 'p')
+ for p in paragraph_elements:
+ if find_ancestor_with_tag(self, p, 'tc') is not None:
+ self.meta_data[p]['is_in_table'] = True
+
+ def _set_headers(self, elements):
+ # These are the styles for headers and what the html tag should be if
+ # we have one.
+ headers = {
+ 'heading 1': 'h1',
+ 'heading 2': 'h2',
+ 'heading 3': 'h3',
+ 'heading 4': 'h4',
+ 'heading 5': 'h5',
+ 'heading 6': 'h6',
+ 'heading 7': 'h6',
+ 'heading 8': 'h6',
+ 'heading 9': 'h6',
+ 'heading 10': 'h6',
+ }
+ for element in elements:
+ # This element is using the default style which is not a heading.
+ if find_first(element, 'pStyle') is None:
+ continue
+ style = find_first(element, 'pStyle').attrib.get('val', '')
+ style = self.styles_dict.get(style)
+
+ # Check to see if this element is actually a header.
+ if style and style.lower() in headers:
+ # Set all the list item variables to false.
+ self.meta_data[element]['is_list_item'] = False
+ self.meta_data[element]['is_first_list_item'] = False
+ self.meta_data[element]['is_last_list_item_in_root'] = False
+ # Prime the heading_level
+ self.meta_data[element]['heading_level'] = headers[style.lower()] # noqa
+
+ def _convert_upper_roman(self, body):
+ if not self.convert_root_level_upper_roman:
+ return
+ first_root_list_items = [
+ # Only root level elements.
+ el for el in body.getchildren()
+ # And only first_list_items
+ if self.is_first_list_item(el)
+ ]
+ visited_num_ids = []
+ for root_list_item in first_root_list_items:
+ if self.num_id(root_list_item) in visited_num_ids:
+ continue
+ visited_num_ids.append(self.num_id(root_list_item))
+ lst_style = get_list_style(
+ self.numbering_root,
+ self.num_id(root_list_item).num_id,
+ self.ilvl(root_list_item),
+ )
+ if lst_style != 'upperRoman':
+ continue
+ ilvl = min(
+ self.ilvl(el) for el in find_all(body, 'p')
+ if self.num_id(el) == self.num_id(root_list_item)
+ )
+ root_upper_roman_list_items = [
+ el for el in find_all(body, 'p')
+ if self.num_id(el) == self.num_id(root_list_item) and
+ self.ilvl(el) == ilvl
+ ]
+ for list_item in root_upper_roman_list_items:
+ self.meta_data[list_item]['is_list_item'] = False
+ self.meta_data[list_item]['is_first_list_item'] = False
+ self.meta_data[list_item]['is_last_list_item_in_root'] = False # noqa
+
+ self.meta_data[list_item]['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa
+
+ def _set_next(self, body):
+ def _get_children_with_content(el):
+ # We only care about children if they have text in them.
+ children = []
+ for child in _filter_children(el, TAGS_HOLDING_CONTENT_TAGS):
+ _has_descendant_with_tag = any(
+ has_descendant_with_tag(child, tag) for
+ tag in TAGS_CONTAINING_CONTENT
+ )
+ if _has_descendant_with_tag:
+ children.append(child)
+ return children
+
+ def _assign_next(children):
+ # Populate the `next` attribute for all the child elements.
+ for i in range(len(children)):
+ try:
+ if children[i + 1] is not None:
+ self.meta_data[children[i]]['next'] = children[i + 1] # noqa
+ except IndexError:
+ pass
+ try:
+ if children[i - 1] is not None:
+ self.meta_data[children[i]]['previous'] = children[i - 1] # noqa
+ except IndexError:
+ pass
+ # Assign next for everything in the root.
+ _assign_next(_get_children_with_content(body))
+
+ # In addition set next for everything in table cells.
+ for tc in find_all(body, 'tc'):
+ _assign_next(_get_children_with_content(tc))
+
+
+def parse_xml_from_string(xml):
+ return cElementTree.fromstring(remove_namespaces(xml))
diff --git a/requirements.txt b/requirements.txt
index f9954ad0..77421ff8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,4 @@
-beautifulsoup4>=4.1.0
+Jinja2>=2.0
+coverage==3.6
+nose==1.3.0
+flake8
diff --git a/run_tests.sh b/run_tests.sh
new file mode 100755
index 00000000..da46b811
--- /dev/null
+++ b/run_tests.sh
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+nosetests --verbose --with-doctest --with-coverage --cover-package pydocx $@ &&
+find -name '*.py' | xargs flake8
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..d0285271
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,58 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+
+try:
+ from setuptools import setup, find_packages
+except ImportError:
+ from ez_setup import use_setuptools
+ use_setuptools()
+ from setuptools import setup, find_packages # noqa
+
+rel_file = lambda *args: os.path.join(
+ os.path.dirname(os.path.abspath(__file__)), *args)
+
+
+def get_file(filename):
+ with open(rel_file(filename)) as f:
+ return f.read()
+
+
+def get_description():
+ return get_file('README.rst') + get_file('CHANGELOG')
+
+setup(
+ name="PyDocX",
+ # Edit here and pydocx.__init__
+ version="0.3.3",
+ description="docx (OOXML) to html converter",
+ author="Jason Ward, Sam Portnow",
+ author_email="jason.louard.ward@gmail.com, samson91787@gmail.com",
+ url="http://github.com/OpenScienceFramework/pydocx",
+ platforms=["any"],
+ license="BSD",
+ packages=find_packages(),
+ package_data={
+ 'pydocx': [
+ 'tests/templates/*.xml',
+ ],
+ },
+ scripts=[],
+ zip_safe=False,
+ install_requires=[],
+ cmdclass={},
+ classifiers=[
+ "Development Status :: 3 - Alpha",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 2 :: Only",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: BSD License",
+ "Operating System :: OS Independent",
+ "Topic :: Text Processing :: Markup :: HTML",
+ "Topic :: Text Processing :: Markup :: XML",
+ ],
+ long_description=get_description(),
+)