Skip to content
This repository was archived by the owner on Oct 17, 2018. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
Changelog
=========

* 0.1.7
* If the indentation level of a set of lists (with the same list id) were
mangled (Starting off with a higher indentation level followed by a
lower) then the entire sub list (the list with the lower indentation
level) would not be added to the root list. This would result in removing
the mangled list from the final output. This issue has been addressed.
* 0.1.6
* Header detection was relying on case. However it is possible for a lower
case version of headers to show up. Those are now handled correctly.
Expand Down
16 changes: 9 additions & 7 deletions docx2html/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ def is_natural_header(el, styles_dict):
if (
style_id in styles_dict and
'header' in styles_dict[style_id] and
styles_dict[style_id]['header']
):
styles_dict[style_id]['header']):
return styles_dict[style_id]['header']


Expand Down Expand Up @@ -287,6 +286,7 @@ def get_li_nodes(li, meta_data):
yield li
w_namespace = get_namespace(li, 'w')
current_numId = get_numId(li, w_namespace)
starting_ilvl = get_ilvl(li, w_namespace)
el = li
while True:
el = el.getnext()
Expand All @@ -301,6 +301,11 @@ def get_li_nodes(li, meta_data):
if _is_top_level_upper_roman(el, meta_data):
break

if (
is_li(el, meta_data) and
(starting_ilvl > get_ilvl(el, w_namespace))):
break

# If the list id of the next tag is different that the previous that
# means a new list being made (not nested)
if is_last_li(el, meta_data, current_numId):
Expand All @@ -309,7 +314,6 @@ def get_li_nodes(li, meta_data):
# Not a subsequent list.
yield el
break

yield el


Expand Down Expand Up @@ -1018,8 +1022,7 @@ def get_tr_data(tr, meta_data, row_spans):
# ignored.
if (
v_merge is not None and
v_merge.get('%sval' % w_namespace) != 'restart'
):
v_merge.get('%sval' % w_namespace) != 'restart'):
continue

# Loop through each and build a list of all the content.
Expand Down Expand Up @@ -1072,8 +1075,7 @@ def get_tr_data(tr, meta_data, row_spans):
# here.
if (
v_merge is not None and
v_merge.get('%sval' % w_namespace) == 'restart'
):
v_merge.get('%sval' % w_namespace) == 'restart'):
rowspan = next(row_spans)
td_el.set('rowspan', '%d' % rowspan)

Expand Down
12 changes: 4 additions & 8 deletions docx2html/tests/test_docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ def test_extract_html():
'simple.docx',
)
actual_html = convert(file_path)
assert_html_equal(actual_html,
'''
assert_html_equal(actual_html, '''
<html>
<p>
Simple text
Expand Down Expand Up @@ -66,8 +65,7 @@ def test_nested_list():
'nested_lists.docx',
)
actual_html = convert(file_path)
assert_html_equal(actual_html,
'''
assert_html_equal(actual_html, '''
<html>
<ol data-list-type="decimal">
<li>one</li>
Expand Down Expand Up @@ -111,8 +109,7 @@ def test_simple_list():
'simple_lists.docx',
)
actual_html = convert(file_path)
assert_html_equal(actual_html,
'''
assert_html_equal(actual_html, '''
<html>
<ol data-list-type="decimal">
<li>One</li>
Expand All @@ -132,8 +129,7 @@ def test_inline_tags():
'inline_tags.docx',
)
actual_html = convert(file_path)
assert_html_equal(actual_html,
'''
assert_html_equal(actual_html, '''
<html><p>This sentence has some <strong>bold</strong>, some <em>italics</em> and some <strong>underline</strong>, as well as a <a href="http://www.google.com/">hyperlink</a>.</p></html>''') # noqa


Expand Down
29 changes: 29 additions & 0 deletions docx2html/tests/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,3 +593,32 @@ def test_get_headings(self):
styles_xml = etree.fromstring(xml)
styles_dict = get_style_dict(styles_xml)
self.assertEqual(styles_dict['heading 1']['header'], 'h2')


class MangledIlvlTestCase(_TranslationTestCase):
expected_output = '''
<html>
<ol data-list-type="decimal">
<li>AAA</li>
</ol>
<ol data-list-type="decimal">
<li>BBB</li>
</ol>
<ol data-list-type="decimal">
<li>CCC</li>
</ol>
</html>
'''

def get_xml(self):
li_text = [
('AAA', 0, 2),
('BBB', 1, 1),
('CCC', 0, 1),
]
lis = ''
for text, ilvl, numId in li_text:
lis += DXB.li(text=text, ilvl=ilvl, numId=numId)

xml = DXB.xml(lis)
return etree.fromstring(xml)