diff --git a/CHANGELOG b/CHANGELOG index 854cca9..7f6a437 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,12 @@ Changelog ========= +* 0.1.7 + * If the indentation level of a set of lists (with the same list id) were + mangled (Starting off with a higher indentation level followed by a + lower) then the entire sub list (the list with the lower indentation + level) would not be added to the root list. This would result in removing + the mangled list from the final output. This issue has been addressed. * 0.1.6 * Header detection was relying on case. However it is possible for a lower case version of headers to show up. Those are now handled correctly. diff --git a/docx2html/core.py b/docx2html/core.py index 72e1efb..b020cc5 100644 --- a/docx2html/core.py +++ b/docx2html/core.py @@ -165,8 +165,7 @@ def is_natural_header(el, styles_dict): if ( style_id in styles_dict and 'header' in styles_dict[style_id] and - styles_dict[style_id]['header'] - ): + styles_dict[style_id]['header']): return styles_dict[style_id]['header'] @@ -287,6 +286,7 @@ def get_li_nodes(li, meta_data): yield li w_namespace = get_namespace(li, 'w') current_numId = get_numId(li, w_namespace) + starting_ilvl = get_ilvl(li, w_namespace) el = li while True: el = el.getnext() @@ -301,6 +301,11 @@ def get_li_nodes(li, meta_data): if _is_top_level_upper_roman(el, meta_data): break + if ( + is_li(el, meta_data) and + (starting_ilvl > get_ilvl(el, w_namespace))): + break + # If the list id of the next tag is different that the previous that # means a new list being made (not nested) if is_last_li(el, meta_data, current_numId): @@ -309,7 +314,6 @@ def get_li_nodes(li, meta_data): # Not a subsequent list. yield el break - yield el @@ -1018,8 +1022,7 @@ def get_tr_data(tr, meta_data, row_spans): # ignored. if ( v_merge is not None and - v_merge.get('%sval' % w_namespace) != 'restart' - ): + v_merge.get('%sval' % w_namespace) != 'restart'): continue # Loop through each and build a list of all the content. @@ -1072,8 +1075,7 @@ def get_tr_data(tr, meta_data, row_spans): # here. if ( v_merge is not None and - v_merge.get('%sval' % w_namespace) == 'restart' - ): + v_merge.get('%sval' % w_namespace) == 'restart'): rowspan = next(row_spans) td_el.set('rowspan', '%d' % rowspan) diff --git a/docx2html/tests/test_docx.py b/docx2html/tests/test_docx.py index bb2a566..5863353 100644 --- a/docx2html/tests/test_docx.py +++ b/docx2html/tests/test_docx.py @@ -33,8 +33,7 @@ def test_extract_html(): 'simple.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, - ''' + assert_html_equal(actual_html, '''
Simple text @@ -66,8 +65,7 @@ def test_nested_list(): 'nested_lists.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, - ''' + assert_html_equal(actual_html, '''
This sentence has some bold, some italics and some underline, as well as a hyperlink.
''') # noqa diff --git a/docx2html/tests/test_xml.py b/docx2html/tests/test_xml.py index 586b42e..52bca28 100644 --- a/docx2html/tests/test_xml.py +++ b/docx2html/tests/test_xml.py @@ -593,3 +593,32 @@ def test_get_headings(self): styles_xml = etree.fromstring(xml) styles_dict = get_style_dict(styles_xml) self.assertEqual(styles_dict['heading 1']['header'], 'h2') + + +class MangledIlvlTestCase(_TranslationTestCase): + expected_output = ''' + +