From 3f0c1ce1555d3dd1e93c81c56d7149775e6e814b Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 27 Feb 2013 15:05:58 -0500 Subject: [PATCH 1/3] refs #15: added a test showing that header detection was not case insensitive --- docx2html/tests/document_builder.py | 22 ++++++++++++++++++++++ docx2html/tests/templates/style.xml | 15 +++++++++++++++ docx2html/tests/templates/styles.xml | 6 ++++++ docx2html/tests/test_xml.py | 18 ++++++++++++++++++ 4 files changed, 61 insertions(+) create mode 100644 docx2html/tests/templates/style.xml create mode 100644 docx2html/tests/templates/styles.xml diff --git a/docx2html/tests/document_builder.py b/docx2html/tests/document_builder.py index 5da1495..eaedfb5 100644 --- a/docx2html/tests/document_builder.py +++ b/docx2html/tests/document_builder.py @@ -11,6 +11,8 @@ 'table': 'table.xml', 'tc': 'tc.xml', 'tr': 'tr.xml', + 'styles': 'styles.xml', + 'style': 'style.xml', } env = Environment( @@ -118,3 +120,23 @@ def sectPr_tag(self, p_tag): 'p_tag': p_tag, } return template.render(**kwargs) + + @classmethod + def styles_xml(self, style_tags): + template = env.get_template(templates['styles']) + + kwargs = { + 'style_tags': style_tags, + } + return template.render(**kwargs) + + @classmethod + def style(self, style_id, value): + template = env.get_template(templates['style']) + + kwargs = { + 'style_id': style_id, + 'value': value, + } + + return template.render(**kwargs) diff --git a/docx2html/tests/templates/style.xml b/docx2html/tests/templates/style.xml new file mode 100644 index 0000000..5fa9f00 --- /dev/null +++ b/docx2html/tests/templates/style.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/docx2html/tests/templates/styles.xml b/docx2html/tests/templates/styles.xml new file mode 100644 index 0000000..a30e752 --- /dev/null +++ b/docx2html/tests/templates/styles.xml @@ -0,0 +1,6 @@ + + + {% for style in style_tags %} + {{ style }} + {% endfor %} + diff --git a/docx2html/tests/test_xml.py b/docx2html/tests/test_xml.py index 4225ef0..586b42e 100644 --- a/docx2html/tests/test_xml.py +++ b/docx2html/tests/test_xml.py @@ -6,6 +6,7 @@ from docx2html.core import ( _is_top_level_upper_roman, create_html, + get_style_dict, get_font_size, get_image_id, get_li_nodes, @@ -575,3 +576,20 @@ def get_xml(self): body = li + footer_tag xml = DXB.xml(body) return etree.fromstring(xml) + + +class StylesParsingTestCase(_TranslationTestCase): + expected_output = '' + + def get_xml(self): + return etree.fromstring(DXB.xml('')) + + def test_get_headings(self): + + styles = [ + DXB.style('heading 1', 'heading 1'), + ] + xml = DXB.styles_xml(styles) + styles_xml = etree.fromstring(xml) + styles_dict = get_style_dict(styles_xml) + self.assertEqual(styles_dict['heading 1']['header'], 'h2') From b9758be45606403ab215987873f778f5d18797f9 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 27 Feb 2013 15:06:25 -0500 Subject: [PATCH 2/3] refs #15: header detection is now case insensitive --- docx2html/core.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docx2html/core.py b/docx2html/core.py index 30f24ca..72e1efb 100644 --- a/docx2html/core.py +++ b/docx2html/core.py @@ -659,16 +659,16 @@ def get_style_dict(tree): # This is a partial document and actual h1 is the document title, which # will be displayed elsewhere. headers = { - 'Heading 1': 'h2', - 'Heading 2': 'h3', - 'Heading 3': 'h4', - 'Heading 4': 'h5', - 'Heading 5': 'h6', - 'Heading 6': 'h6', - 'Heading 7': 'h6', - 'Heading 8': 'h6', - 'Heading 9': 'h6', - 'Heading 10': 'h6', + 'heading 1': 'h2', + 'heading 2': 'h3', + 'heading 3': 'h4', + 'heading 4': 'h5', + 'heading 5': 'h6', + 'heading 6': 'h6', + 'heading 7': 'h6', + 'heading 8': 'h6', + 'heading 9': 'h6', + 'heading 10': 'h6', } if tree is None: return {} @@ -685,7 +685,7 @@ def get_style_dict(tree): name = el.find('%sname' % w_namespace) if name is None: continue - value = name.get('%sval' % w_namespace) + value = name.get('%sval' % w_namespace).lower() if value in headers: el_result['header'] = headers[value] From ba810219f7e7ae044600ffa5a7efb5a82c05335f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 27 Feb 2013 15:15:40 -0500 Subject: [PATCH 3/3] refs #15: update note --- CHANGELOG | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 49fb4be..854cca9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,9 @@ Changelog ========= +* 0.1.6 + * Header detection was relying on case. However it is possible for a lower + case version of headers to show up. Those are now handled correctly. * 0.1.4 * Added a function to remove tags, in addition stripped 'sectPr' tags since they have to do with headers and footers.