PolicyStat · jlward · Mar 19, 2013 · Mar 19, 2013 · Mar 19, 2013 · Mar 19, 2013
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,12 @@
 Changelog
 =========
 
+* 0.1.7
+    * If the indentation level of a set of lists (with the same list id) were
+      mangled (Starting off with a higher indentation level followed by a
+      lower) then the entire sub list (the list with the lower indentation
+      level) would not be added to the root list. This would result in removing
+      the mangled list from the final output. This issue has been addressed.
 * 0.1.6
     * Header detection was relying on case. However it is possible for a lower
       case version of headers to show up. Those are now handled correctly.

diff --git a/docx2html/core.py b/docx2html/core.py
@@ -165,8 +165,7 @@ def is_natural_header(el, styles_dict):
     if (
             style_id in styles_dict and
             'header' in styles_dict[style_id] and
-            styles_dict[style_id]['header']
-        ):
+            styles_dict[style_id]['header']):
         return styles_dict[style_id]['header']
 
 
@@ -287,6 +286,7 @@ def get_li_nodes(li, meta_data):
     yield li
     w_namespace = get_namespace(li, 'w')
     current_numId = get_numId(li, w_namespace)
+    starting_ilvl = get_ilvl(li, w_namespace)
     el = li
     while True:
         el = el.getnext()
@@ -301,6 +301,11 @@ def get_li_nodes(li, meta_data):
         if _is_top_level_upper_roman(el, meta_data):
             break
 
+        if (
+                is_li(el, meta_data) and
+                (starting_ilvl > get_ilvl(el, w_namespace))):
+            break
+
         # If the list id of the next tag is different that the previous that
         # means a new list being made (not nested)
         if is_last_li(el, meta_data, current_numId):
@@ -309,7 +314,6 @@ def get_li_nodes(li, meta_data):
                 # Not a subsequent list.
                 yield el
             break
-
         yield el
 
 
@@ -1018,8 +1022,7 @@ def get_tr_data(tr, meta_data, row_spans):
             # ignored.
             if (
                     v_merge is not None and
-                    v_merge.get('%sval' % w_namespace) != 'restart'
-                ):
+                    v_merge.get('%sval' % w_namespace) != 'restart'):
                 continue
 
             # Loop through each and build a list of all the content.
@@ -1072,8 +1075,7 @@ def get_tr_data(tr, meta_data, row_spans):
             # here.
             if (
                     v_merge is not None and
-                    v_merge.get('%sval' % w_namespace) == 'restart'
-                ):
+                    v_merge.get('%sval' % w_namespace) == 'restart'):
                 rowspan = next(row_spans)
                 td_el.set('rowspan', '%d' % rowspan)
 

diff --git a/docx2html/tests/test_docx.py b/docx2html/tests/test_docx.py
@@ -33,8 +33,7 @@ def test_extract_html():
         'simple.docx',
     )
     actual_html = convert(file_path)
-    assert_html_equal(actual_html,
-    '''
+    assert_html_equal(actual_html, '''
     <html>
         <p>
           Simple text
@@ -66,8 +65,7 @@ def test_nested_list():
         'nested_lists.docx',
     )
     actual_html = convert(file_path)
-    assert_html_equal(actual_html,
-    '''
+    assert_html_equal(actual_html, '''
     <html>
         <ol data-list-type="decimal">
             <li>one</li>
@@ -111,8 +109,7 @@ def test_simple_list():
         'simple_lists.docx',
     )
     actual_html = convert(file_path)
-    assert_html_equal(actual_html,
-    '''
+    assert_html_equal(actual_html, '''
     <html>
         <ol data-list-type="decimal">
             <li>One</li>
@@ -132,8 +129,7 @@ def test_inline_tags():
         'inline_tags.docx',
     )
     actual_html = convert(file_path)
-    assert_html_equal(actual_html,
-    '''
+    assert_html_equal(actual_html, '''
     <html><p>This sentence has some <strong>bold</strong>, some <em>italics</em> and some <strong>underline</strong>, as well as a <a href="http://www.google.com/">hyperlink</a>.</p></html>''')  # noqa
 
 

diff --git a/docx2html/tests/test_xml.py b/docx2html/tests/test_xml.py
@@ -593,3 +593,32 @@ def test_get_headings(self):
         styles_xml = etree.fromstring(xml)
         styles_dict = get_style_dict(styles_xml)
         self.assertEqual(styles_dict['heading 1']['header'], 'h2')
+
+
+class MangledIlvlTestCase(_TranslationTestCase):
+    expected_output = '''
+    <html>
+        <ol data-list-type="decimal">
+            <li>AAA</li>
+        </ol>
+        <ol data-list-type="decimal">
+            <li>BBB</li>
+        </ol>
+        <ol data-list-type="decimal">
+            <li>CCC</li>
+        </ol>
+    </html>
+    '''
+
+    def get_xml(self):
+        li_text = [
+            ('AAA', 0, 2),
+            ('BBB', 1, 1),
+            ('CCC', 0, 1),
+        ]
+        lis = ''
+        for text, ilvl, numId in li_text:
+            lis += DXB.li(text=text, ilvl=ilvl, numId=numId)
+
+        xml = DXB.xml(lis)
+        return etree.fromstring(xml)