From 3be3a148047a3dbb8b215e7e5ecca4db859a0833 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 11:27:54 -0400 Subject: [PATCH 01/13] refs #3: Added a few stub tests. --- html2docx/tests/test_builder.py | 16 +++++++++++++++- html2docx/tests/test_tables.py | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 html2docx/tests/test_tables.py diff --git a/html2docx/tests/test_builder.py b/html2docx/tests/test_builder.py index ceb3c49..ae771a0 100644 --- a/html2docx/tests/test_builder.py +++ b/html2docx/tests/test_builder.py @@ -1,7 +1,12 @@ from xml.etree import cElementTree from unittest import TestCase -from html2docx.builder import RunProperties, ParagraphParser, Paragraph +from html2docx.builder import ( + Paragraph, + ParagraphParser, + RunProperties, + TableCell, +) class RunPropertiesTestCase(TestCase): @@ -75,3 +80,12 @@ def test_empty(self): xml = paragraph.xml self.assertEqual(xml, expected_xml) + + +class TableCellTestCase(TestCase): + def test_empty(self): + table_cell = TableCell() + expected_xml = '' + + xml = table_cell.xml + self.assertEqual(xml, expected_xml) diff --git a/html2docx/tests/test_tables.py b/html2docx/tests/test_tables.py new file mode 100644 index 0000000..a40cf25 --- /dev/null +++ b/html2docx/tests/test_tables.py @@ -0,0 +1,15 @@ +from html2docx.tests import build_run + + +test_cases = [ + ( + 'Test simple table.', + '
AAA
', + ), +] + + +def test(): + for test_name, html in test_cases: + run = build_run(test_name, html) + yield run From b5ccfb39aec734708b272f628975c2a5c0d4dc4f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 11:36:27 -0400 Subject: [PATCH 02/13] refs #3: That should have been removed in the last ticket. --- html2docx/builder.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/html2docx/builder.py b/html2docx/builder.py index 9b60392..5788f23 100644 --- a/html2docx/builder.py +++ b/html2docx/builder.py @@ -46,10 +46,6 @@ def build_runs(self): ooxml_style = self.html_to_ooxml_tag_conversions.get(style) if ooxml_style: setattr(run.properties, ooxml_style, True) - if 'strong' in styles: - run.properties.bold = True - if 'em' in styles: - run.properties.italics = True yield run @property From db15ce8284808fbb2710ec5ef42aee19c017e605 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 11:46:47 -0400 Subject: [PATCH 03/13] refs #3: Added a test for styled table cells. --- html2docx/tests/test_builder.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/html2docx/tests/test_builder.py b/html2docx/tests/test_builder.py index ae771a0..8d0910d 100644 --- a/html2docx/tests/test_builder.py +++ b/html2docx/tests/test_builder.py @@ -6,6 +6,7 @@ ParagraphParser, RunProperties, TableCell, + TableCellParser, ) @@ -89,3 +90,19 @@ def test_empty(self): xml = table_cell.xml self.assertEqual(xml, expected_xml) + + def test_simple(self): + element = cElementTree.fromstring('AAA') + parser = TableCellParser(element) + xml = parser.tag.xml + expected_xml = 'AAA' # noqa + + self.assertEqual(xml, expected_xml) + + def test_with_style(self): + element = cElementTree.fromstring('AAA') + parser = TableCellParser(element) + xml = parser.tag.xml + expected_xml = 'AAA' # noqa + + self.assertEqual(xml, expected_xml) From b86c5bae1e3cb4f75460f8b2ebaa8699b3c9f1db Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 11:47:09 -0400 Subject: [PATCH 04/13] refs #3: Basic table cells working. --- html2docx/builder.py | 36 ++++++++++++++++++++++++++++----- html2docx/tests/test_builder.py | 18 +++++++++-------- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/html2docx/builder.py b/html2docx/builder.py index 5788f23..b4f0401 100644 --- a/html2docx/builder.py +++ b/html2docx/builder.py @@ -7,11 +7,8 @@ def xml(self): return cElementTree.tostring(self.tree) -class ParagraphParser(object): - html_to_ooxml_tag_conversions = { - 'strong': 'bold', - 'em': 'italics', - } +class BaseParser(object): + abstract = True def __init__(self, element): self.element = element @@ -39,6 +36,13 @@ def _parse(self, element, styles): if element.tail: yield element.tail, styles[-1] + +class ParagraphParser(BaseParser): + html_to_ooxml_tag_conversions = { + 'strong': 'bold', + 'em': 'italics', + } + def build_runs(self): for text, styles in self.parse(self.element): run = Run(text) @@ -126,3 +130,25 @@ def italics(self, value): self._italics = True else: self._italics = False + + +class TableCellParser(BaseParser): + @property + def tag(self): + paragraph = ParagraphParser(self.element) + return TableCell(paragraph) + + +class TableCell(BaseTag): + tag_name = 'w:tc' + + def __init__(self, element=None): + self.element = element + + @property + def tree(self): + element = cElementTree.Element(self.tag_name) + if self.element is None: + return element + element.append(self.element.tag.tree) + return element diff --git a/html2docx/tests/test_builder.py b/html2docx/tests/test_builder.py index 8d0910d..7ef21b7 100644 --- a/html2docx/tests/test_builder.py +++ b/html2docx/tests/test_builder.py @@ -83,14 +83,7 @@ def test_empty(self): self.assertEqual(xml, expected_xml) -class TableCellTestCase(TestCase): - def test_empty(self): - table_cell = TableCell() - expected_xml = '' - - xml = table_cell.xml - self.assertEqual(xml, expected_xml) - +class TableCellParserTestCase(TestCase): def test_simple(self): element = cElementTree.fromstring('AAA') parser = TableCellParser(element) @@ -106,3 +99,12 @@ def test_with_style(self): expected_xml = 'AAA' # noqa self.assertEqual(xml, expected_xml) + + +class TableCellTestCase(TestCase): + def test_empty(self): + table_cell = TableCell() + expected_xml = '' + + xml = table_cell.xml + self.assertEqual(xml, expected_xml) From 3cce6a74d58a2bbd25772862a98f08d64880bbf2 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:03:35 -0400 Subject: [PATCH 05/13] refs #3: Added table row test cases. --- html2docx/tests/test_builder.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/html2docx/tests/test_builder.py b/html2docx/tests/test_builder.py index 7ef21b7..6328b35 100644 --- a/html2docx/tests/test_builder.py +++ b/html2docx/tests/test_builder.py @@ -7,6 +7,7 @@ RunProperties, TableCell, TableCellParser, + TableRowParser, ) @@ -108,3 +109,29 @@ def test_empty(self): xml = table_cell.xml self.assertEqual(xml, expected_xml) + + +class TableRowParserTestCase(TestCase): + def test_simple(self): + element = cElementTree.fromstring('AAA') + parser = TableRowParser(element) + xml = parser.tag.xml + expected_xml = 'AAA' # noqa + + self.assertEqual(xml, expected_xml) + + def test_with_style(self): + element = cElementTree.fromstring('AAA') # noqa + parser = TableRowParser(element) + xml = parser.tag.xml + expected_xml = 'AAA' # noqa + + self.assertEqual(xml, expected_xml) + + def test_multiple_cells(self): + element = cElementTree.fromstring('AAABBB') + parser = TableRowParser(element) + xml = parser.tag.xml + expected_xml = 'AAABBB' # noqa + + self.assertEqual(xml, expected_xml) From 310b5e768996fe0d90a82667e71e98be3dbf08dc Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:05:29 -0400 Subject: [PATCH 06/13] ref #3: 100% code coverage. --- html2docx/builder.py | 25 +++++++++++++++++++++++++ html2docx/tests/test_builder.py | 10 ++++++++++ 2 files changed, 35 insertions(+) diff --git a/html2docx/builder.py b/html2docx/builder.py index b4f0401..9161e0b 100644 --- a/html2docx/builder.py +++ b/html2docx/builder.py @@ -132,6 +132,31 @@ def italics(self, value): self._italics = False +class TableRowParser(BaseParser): + @property + def tag(self): + table_cells = [] + for table_row in self.element.findall('td'): + table_cells.append(TableCellParser(table_row)) + return TableRow(table_cells) + + +class TableRow(BaseTag): + tag_name = 'w:tr' + + def __init__(self, table_cells=None): + self.table_cells = table_cells + + @property + def tree(self): + element = cElementTree.Element(self.tag_name) + if self.table_cells is None: + return element + for table_cell in self.table_cells: + element.append(table_cell.tag.tree) + return element + + class TableCellParser(BaseParser): @property def tag(self): diff --git a/html2docx/tests/test_builder.py b/html2docx/tests/test_builder.py index 6328b35..b090f2b 100644 --- a/html2docx/tests/test_builder.py +++ b/html2docx/tests/test_builder.py @@ -7,6 +7,7 @@ RunProperties, TableCell, TableCellParser, + TableRow, TableRowParser, ) @@ -135,3 +136,12 @@ def test_multiple_cells(self): expected_xml = 'AAABBB' # noqa self.assertEqual(xml, expected_xml) + + +class TableRowTestCase(TestCase): + def test_empty(self): + table_row = TableRow() + expected_xml = '' + + xml = table_row.xml + self.assertEqual(xml, expected_xml) From ab67017e7ce17bf566cc7e618571fe2a08afef02 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:11:10 -0400 Subject: [PATCH 07/13] res #3: Table test cases. --- html2docx/tests/test_builder.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/html2docx/tests/test_builder.py b/html2docx/tests/test_builder.py index b090f2b..fc8324a 100644 --- a/html2docx/tests/test_builder.py +++ b/html2docx/tests/test_builder.py @@ -5,8 +5,10 @@ Paragraph, ParagraphParser, RunProperties, + Table, TableCell, TableCellParser, + TableParser, TableRow, TableRowParser, ) @@ -145,3 +147,38 @@ def test_empty(self): xml = table_row.xml self.assertEqual(xml, expected_xml) + + +class TableParserTestCase(TestCase): + def test_simple(self): + element = cElementTree.fromstring('
AAA
') # noqa + parser = TableParser(element) + xml = parser.tag.xml + expected_xml = 'AAA' # noqa + + self.assertEqual(xml, expected_xml) + + def test_with_style(self): + element = cElementTree.fromstring('
AAA
') # noqa + parser = TableParser(element) + xml = parser.tag.xml + expected_xml = 'AAA' # noqa + + self.assertEqual(xml, expected_xml) + + def test_multiple_cells(self): + element = cElementTree.fromstring('
AAABBB
CCCDDD
') # noqa + parser = TableParser(element) + xml = parser.tag.xml + expected_xml = 'AAABBBCCCDDD' # noqa + + self.assertEqual(xml, expected_xml) + + +class TableTestCase(TestCase): + def test_empty(self): + table_row = Table() + expected_xml = '' + + xml = table_row.xml + self.assertEqual(xml, expected_xml) From d1f0165a8334bfa81fccd2606a67fd71b8d8e629 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:12:00 -0400 Subject: [PATCH 08/13] refs #3: Added table support --- html2docx/builder.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/html2docx/builder.py b/html2docx/builder.py index 9161e0b..a6fed8e 100644 --- a/html2docx/builder.py +++ b/html2docx/builder.py @@ -132,12 +132,37 @@ def italics(self, value): self._italics = False +class TableParser(BaseParser): + @property + def tag(self): + table_rows = [] + for table_row in self.element.findall('tr'): + table_rows.append(TableRowParser(table_row)) + return Table(table_rows) + + +class Table(BaseTag): + tag_name = 'w:tbl' + + def __init__(self, table_rows=None): + self.table_rows = table_rows + + @property + def tree(self): + element = cElementTree.Element(self.tag_name) + if self.table_rows is None: + return element + for table_row in self.table_rows: + element.append(table_row.tag.tree) + return element + + class TableRowParser(BaseParser): @property def tag(self): table_cells = [] - for table_row in self.element.findall('td'): - table_cells.append(TableCellParser(table_row)) + for table_cell in self.element.findall('td'): + table_cells.append(TableCellParser(table_cell)) return TableRow(table_cells) From 94812976ee2c38f7a6b43e005f41f66dfb389a95 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:14:58 -0400 Subject: [PATCH 09/13] refs #3: Got the table parser plugged in. --- html2docx/core.py | 13 ++++++++++--- html2docx/tests/__init__.py | 3 +++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/html2docx/core.py b/html2docx/core.py index 2d8fd47..7fb7e0b 100644 --- a/html2docx/core.py +++ b/html2docx/core.py @@ -3,7 +3,13 @@ from jinja2 import Environment, PackageLoader from html2docx.utils import ZipFile -from html2docx.builder import ParagraphParser +from html2docx.builder import ParagraphParser, TableParser + + +tag_to_parser_conversions = { + 'p': ParagraphParser, + 'table': TableParser +} class HTML2Docx(object): @@ -47,8 +53,9 @@ def _convert(self): if el in self.visited: continue self.visited.update([el]) - if el.tag == 'p': - parser = ParagraphParser(el) + Parser = tag_to_parser_conversions.get(el.tag) + if Parser: + parser = Parser(el) self.document_state.append(parser.tag) self.visited.update(el.getiterator()) diff --git a/html2docx/tests/__init__.py b/html2docx/tests/__init__.py index e6b05fb..4bf9fda 100644 --- a/html2docx/tests/__init__.py +++ b/html2docx/tests/__init__.py @@ -56,6 +56,9 @@ class TestDocx2Html(Docx2Html): def style(*args, **kwargs): return '' + def table(self, text): + return '%s
' % text + def build_run(test_name, html): boiler_plate = '%s' From b6337eb07e560cbdf433a08b7f69e044499b1125 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:17:30 -0400 Subject: [PATCH 10/13] refs #3: Added better round trip table test cases. --- html2docx/tests/test_tables.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/html2docx/tests/test_tables.py b/html2docx/tests/test_tables.py index a40cf25..c7658da 100644 --- a/html2docx/tests/test_tables.py +++ b/html2docx/tests/test_tables.py @@ -6,6 +6,18 @@ 'Test simple table.', '
AAA
', ), + ( + 'Test multiple rows.', + '
AAA
BBB
', + ), + ( + 'Test multiple cells.', + '
AAABBB
', + ), + ( + 'Test multiple rows and cells.', + '
AAABBB
CCCDDD
', # noqa + ), ] From f862d82fc35a02f872a02909538cb0890b32bece Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:21:19 -0400 Subject: [PATCH 11/13] refs #3: Added a few more complex test cases. --- html2docx/tests/test_complex.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 html2docx/tests/test_complex.py diff --git a/html2docx/tests/test_complex.py b/html2docx/tests/test_complex.py new file mode 100644 index 0000000..3e76b82 --- /dev/null +++ b/html2docx/tests/test_complex.py @@ -0,0 +1,24 @@ +from html2docx.tests import build_run + + +test_cases = [ + ( + 'Test paragraph, table, paragraph.', + '

AAA

BBB

CCC

', + ), + ( + 'Test table, table, paragraph', + '
AAA
BBB

CCC

', # noqa + ), + # Nesting doesn't really work yet. + # ( + # 'Test Nested Table', + # '
AAA
BBB
', # noqa + # ), +] + + +def test(): + for test_name, html in test_cases: + run = build_run(test_name, html) + yield run From d3be797eb091b11395d15ab6f1e9cbde2a27e1a5 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Sun, 13 Apr 2014 13:33:08 -0400 Subject: [PATCH 12/13] refs #3: Updated the run tests script. --- run_tests.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/run_tests.sh b/run_tests.sh index 167ea3f..54749d5 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,5 +1,3 @@ #! /bin/sh -RUN_TESTS='nosetests -v -v --with-coverage --cover-erase --cover-package=. html2docx' -echo $RUN_TESTS -$RUN_TESTS +nosetests -v -v --with-coverage --cover-erase --cover-package=. html2docx && find -name '*.py' | xargs flake8 From 27fdfa9adafa5def7f563a3770226980788917c3 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 4 Jun 2014 11:13:20 -0400 Subject: [PATCH 13/13] refs #3: Updated the run test script to do code coverage better. --- run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_tests.sh b/run_tests.sh index 54749d5..5b68bf0 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,3 +1,3 @@ #! /bin/sh -nosetests -v -v --with-coverage --cover-erase --cover-package=. html2docx && find -name '*.py' | xargs flake8 +nosetests -v -v --with-coverage --cover-erase --cover-package=html2docx html2docx && find -name '*.py' | xargs flake8