CenterForOpenScience · jlward · May 21, 2013 · May 21, 2013 · May 21, 2013 · May 21, 2013
diff --git a/README.md b/README.md
@@ -113,8 +113,7 @@ DocxParser includes abstracts methods that each parser overwrites to satsify its
 
 		@abstractmethod
 		def table(self, text):
-			return text
-
+			return text 
 		@abstractmethod
 		def table_row(self, text):
 			return text
@@ -161,4 +160,16 @@ OR, let's say FOO is your new favorite markup language. Simply customize your ow
 
             def linebreak(self):
                 return '!!!!!!!!!!!!' #  because linebreaks in are denoted by '!!!!!!!!!!!!'
-                                      #  with the FOO markup langauge  :)
+                                      #  with the FOO markup langauge  :)
+
+#Styles
+
+The base parser `Docx2Html` relies on certain css class being set for certain behaviour to occur. Currently these include:
+
+* class `insert` -> Turns the text green.
+* class `delete` -> Turns the text red and draws a line through the text.
+* class `center` -> Aligns the text to the center.
+* class `right` -> Aligns the text to the right.
+* class `left` -> Aligns the text to the left.
+* class `comment` -> Turns the text blue.
+* class `pydocx-underline` -> Underlines the text.
diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py
@@ -9,9 +9,6 @@ class Docx2Html(DocxParser):
     @property
     def parsed(self):
         content = self._parsed
-        content = content.replace('<p></p><p></p>', '<br />')
-        content = content.replace('</p><br /><p>', '</p><p>')
-        content = content.replace('</p><br /><ul>', '</p><ul>')
         content = "<html>%(head)s<body>%(content)s</body></html>" % {
             'head': self.head(),
             'content': content,
@@ -28,6 +25,7 @@ def style(self):
         {{color:red; text-decoration:line-through}}.center
         {{text-align:center}}.right{{text-align:right}}
         .left{{text-align:left}} .comment{{color:blue}}
+        .pydocx-underline {text-decoration: underline;}
         body{{width:%(width)spx; margin:0px auto;
         }}</style>''') % {
             'width': (self.page_width * (4 / 3)),
@@ -109,13 +107,13 @@ def unordered_list(self, text):
         }
 
     def bold(self, text):
-        return '<b>' + text + '</b>'
+        return '<strong>' + text + '</strong>'
 
     def italics(self, text):
-        return '<i>' + text + '</i>'
+        return '<em>' + text + '</em>'
 
     def underline(self, text):
-        return '<u>' + text + '</u>'
+        return '<span class="pydocx-underline">' + text + '</span>'
 
     def tab(self):
         # Insert before the text right?? So got the text and just do an insert
@@ -142,7 +140,7 @@ def table_cell(self, text, col='', row=''):
         }
 
     def page_break(self):
-        return '<hr>'
+        return '<hr />'
 
     def indent(self, text, just='', firstLine='', left='', right=''):
         slug = '<div'
@@ -167,4 +165,4 @@ def indent(self, text, just='', firstLine='', left='', right=''):
         }
 
     def break_tag(self):
-        return '<br/>'
+        return '<br />'
diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py
@@ -143,8 +143,13 @@ def test_inline_tags():
         'inline_tags.docx',
     )
     actual_html = convert(file_path)
-    assert_html_equal(actual_html, '''
-    <html><body><p>This sentence has some <b>bold</b>, some <i>italics</i> and some <u>underline</u>, as well as a <a href="http://www.google.com/">hyperlink</a>.</p></body></html>''')  # noqa
+    assert_html_equal(actual_html, (
+        '<html><body><p>This sentence has some <strong>bold</strong>, '
+        'some <em>italics</em> and some '
+        '<span class="pydocx-underline">underline</span>, '
+        'as well as a <a href="http://www.google.com/">hyperlink</a>'
+        '.</p></body></html>'
+    ))
 
 
 def test_unicode():
@@ -639,16 +644,16 @@ def test_shift_enter():
     actual_html = convert(file_path)
     assert_html_equal(actual_html, '''
     <html><body>
-        <p>AAA<br/>BBB</p>
+        <p>AAA<br />BBB</p>
         <p>CCC</p>
         <ol data-list-type="decimal">
-            <li>DDD<br/>EEE</li>
+            <li>DDD<br />EEE</li>
             <li>FFF</li>
         </ol>
         <table>
             <tr>
-                <td>GGG<br/>HHH</td>
-                <td>III<br/>JJJ</td>
+                <td>GGG<br />HHH</td>
+                <td>III<br />JJJ</td>
             </tr>
             <tr>
                 <td>KKK</td>
@@ -767,7 +772,7 @@ def test_simple_table():
     assert_html_equal(actual_html, '''
     <html><body>
     <table>
-    <tr><td>Cell1<br/>Cell3</td><td>Cell2<br/>
+    <tr><td>Cell1<br />Cell3</td><td>Cell2<br />
     And I am writing in the table</td></tr>
     <tr><td></td><td>Cell4</td></tr>
     </table>

diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py
@@ -16,7 +16,7 @@
 class BoldTestCase(_TranslationTestCase):
     expected_output = """
         <html><body>
-            <p><b>AAA</b></p>
+            <p><strong>AAA</strong></p>
             <p>BBB</p>
         </body></html>
     """
@@ -121,7 +121,7 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase):
 
     expected_output = '''
     <html><body>
-        <p><a href="www.google.com">link<br/></a></p>
+        <p><a href="www.google.com">link<br /></a></p>
     </body></html>
     '''
 
@@ -382,7 +382,7 @@ class TableWithListAndParagraph(_TranslationTestCase):
                         <li>AAA</li>
                         <li>BBB</li>
                     </ol>
-                    CCC<br/>
+                    CCC<br />
                     DDD
                 </td>
             </tr>
@@ -478,7 +478,7 @@ class ListWithContinuationTestCase(_TranslationTestCase):
     expected_output = '''
         <html><body>
             <ol data-list-type="decimal">
-                <li>AAA<br/>BBB</li>
+                <li>AAA<br />BBB</li>
                 <li>CCC
                     <table>
                         <tr>
@@ -722,7 +722,7 @@ class DeleteTagInList(_TranslationTestCase):
     expected_output = '''
     <html><body>
         <ol data-list-type="decimal">
-            <li>AAA<br/>
+            <li>AAA<br />
                 <span class='delete' author='' date=''>BBB</span>
             </li>
             <li>CCC</li>
@@ -746,7 +746,7 @@ class InsertTagInList(_TranslationTestCase):
     expected_output = '''
     <html><body>
         <ol data-list-type="decimal">
-            <li>AAA<br/>
+            <li>AAA<br />
                 <span class='insert' author='' date=''>BBB</span>
             </li>
             <li>CCC</li>
@@ -771,7 +771,7 @@ class SmartTagInList(_TranslationTestCase):
     expected_output = '''
     <html><body>
         <ol data-list-type="decimal">
-            <li>AAA<br/>
+            <li>AAA<br />
                 BBB
             </li>
             <li>CCC</li>
@@ -850,7 +850,7 @@ class MissingIlvl(_TranslationTestCase):
     expected_output = '''
     <html><body>
         <ol data-list-type="decimal">
-            <li>AAA<br/>
+            <li>AAA<br />
                 BBB
             </li>
             <li>CCC</li>
@@ -923,7 +923,7 @@ class SDTTestCase(_TranslationTestCase):
     expected_output = '''
     <html><body>
         <ol data-list-type="decimal">
-            <li>AAA<br/>
+            <li>AAA<br />
                 BBB
             </li>
             <li>CCC</li>