diff --git a/html2docx/builder.py b/html2docx/builder.py
index 9b60392..a6fed8e 100644
--- a/html2docx/builder.py
+++ b/html2docx/builder.py
@@ -7,11 +7,8 @@ def xml(self):
return cElementTree.tostring(self.tree)
-class ParagraphParser(object):
- html_to_ooxml_tag_conversions = {
- 'strong': 'bold',
- 'em': 'italics',
- }
+class BaseParser(object):
+ abstract = True
def __init__(self, element):
self.element = element
@@ -39,6 +36,13 @@ def _parse(self, element, styles):
if element.tail:
yield element.tail, styles[-1]
+
+class ParagraphParser(BaseParser):
+ html_to_ooxml_tag_conversions = {
+ 'strong': 'bold',
+ 'em': 'italics',
+ }
+
def build_runs(self):
for text, styles in self.parse(self.element):
run = Run(text)
@@ -46,10 +50,6 @@ def build_runs(self):
ooxml_style = self.html_to_ooxml_tag_conversions.get(style)
if ooxml_style:
setattr(run.properties, ooxml_style, True)
- if 'strong' in styles:
- run.properties.bold = True
- if 'em' in styles:
- run.properties.italics = True
yield run
@property
@@ -130,3 +130,75 @@ def italics(self, value):
self._italics = True
else:
self._italics = False
+
+
+class TableParser(BaseParser):
+ @property
+ def tag(self):
+ table_rows = []
+ for table_row in self.element.findall('tr'):
+ table_rows.append(TableRowParser(table_row))
+ return Table(table_rows)
+
+
+class Table(BaseTag):
+ tag_name = 'w:tbl'
+
+ def __init__(self, table_rows=None):
+ self.table_rows = table_rows
+
+ @property
+ def tree(self):
+ element = cElementTree.Element(self.tag_name)
+ if self.table_rows is None:
+ return element
+ for table_row in self.table_rows:
+ element.append(table_row.tag.tree)
+ return element
+
+
+class TableRowParser(BaseParser):
+ @property
+ def tag(self):
+ table_cells = []
+ for table_cell in self.element.findall('td'):
+ table_cells.append(TableCellParser(table_cell))
+ return TableRow(table_cells)
+
+
+class TableRow(BaseTag):
+ tag_name = 'w:tr'
+
+ def __init__(self, table_cells=None):
+ self.table_cells = table_cells
+
+ @property
+ def tree(self):
+ element = cElementTree.Element(self.tag_name)
+ if self.table_cells is None:
+ return element
+ for table_cell in self.table_cells:
+ element.append(table_cell.tag.tree)
+ return element
+
+
+class TableCellParser(BaseParser):
+ @property
+ def tag(self):
+ paragraph = ParagraphParser(self.element)
+ return TableCell(paragraph)
+
+
+class TableCell(BaseTag):
+ tag_name = 'w:tc'
+
+ def __init__(self, element=None):
+ self.element = element
+
+ @property
+ def tree(self):
+ element = cElementTree.Element(self.tag_name)
+ if self.element is None:
+ return element
+ element.append(self.element.tag.tree)
+ return element
diff --git a/html2docx/core.py b/html2docx/core.py
index 2d8fd47..7fb7e0b 100644
--- a/html2docx/core.py
+++ b/html2docx/core.py
@@ -3,7 +3,13 @@
from jinja2 import Environment, PackageLoader
from html2docx.utils import ZipFile
-from html2docx.builder import ParagraphParser
+from html2docx.builder import ParagraphParser, TableParser
+
+
+tag_to_parser_conversions = {
+ 'p': ParagraphParser,
+ 'table': TableParser
+}
class HTML2Docx(object):
@@ -47,8 +53,9 @@ def _convert(self):
if el in self.visited:
continue
self.visited.update([el])
- if el.tag == 'p':
- parser = ParagraphParser(el)
+ Parser = tag_to_parser_conversions.get(el.tag)
+ if Parser:
+ parser = Parser(el)
self.document_state.append(parser.tag)
self.visited.update(el.getiterator())
diff --git a/html2docx/tests/__init__.py b/html2docx/tests/__init__.py
index e6b05fb..4bf9fda 100644
--- a/html2docx/tests/__init__.py
+++ b/html2docx/tests/__init__.py
@@ -56,6 +56,9 @@ class TestDocx2Html(Docx2Html):
def style(*args, **kwargs):
return ''
+ def table(self, text):
+ return '
' % text
+
def build_run(test_name, html):
boiler_plate = '%s'
diff --git a/html2docx/tests/test_builder.py b/html2docx/tests/test_builder.py
index ceb3c49..fc8324a 100644
--- a/html2docx/tests/test_builder.py
+++ b/html2docx/tests/test_builder.py
@@ -1,7 +1,17 @@
from xml.etree import cElementTree
from unittest import TestCase
-from html2docx.builder import RunProperties, ParagraphParser, Paragraph
+from html2docx.builder import (
+ Paragraph,
+ ParagraphParser,
+ RunProperties,
+ Table,
+ TableCell,
+ TableCellParser,
+ TableParser,
+ TableRow,
+ TableRowParser,
+)
class RunPropertiesTestCase(TestCase):
@@ -75,3 +85,100 @@ def test_empty(self):
xml = paragraph.xml
self.assertEqual(xml, expected_xml)
+
+
+class TableCellParserTestCase(TestCase):
+ def test_simple(self):
+ element = cElementTree.fromstring('AAA | ')
+ parser = TableCellParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAA' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+ def test_with_style(self):
+ element = cElementTree.fromstring('AAA | ')
+ parser = TableCellParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAA' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+
+class TableCellTestCase(TestCase):
+ def test_empty(self):
+ table_cell = TableCell()
+ expected_xml = ''
+
+ xml = table_cell.xml
+ self.assertEqual(xml, expected_xml)
+
+
+class TableRowParserTestCase(TestCase):
+ def test_simple(self):
+ element = cElementTree.fromstring('| AAA |
')
+ parser = TableRowParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAA' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+ def test_with_style(self):
+ element = cElementTree.fromstring('| AAA |
') # noqa
+ parser = TableRowParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAA' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+ def test_multiple_cells(self):
+ element = cElementTree.fromstring('| AAA | BBB |
')
+ parser = TableRowParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAABBB' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+
+class TableRowTestCase(TestCase):
+ def test_empty(self):
+ table_row = TableRow()
+ expected_xml = ''
+
+ xml = table_row.xml
+ self.assertEqual(xml, expected_xml)
+
+
+class TableParserTestCase(TestCase):
+ def test_simple(self):
+ element = cElementTree.fromstring('') # noqa
+ parser = TableParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAA' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+ def test_with_style(self):
+ element = cElementTree.fromstring('') # noqa
+ parser = TableParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAA' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+ def test_multiple_cells(self):
+ element = cElementTree.fromstring('') # noqa
+ parser = TableParser(element)
+ xml = parser.tag.xml
+ expected_xml = 'AAABBBCCCDDD' # noqa
+
+ self.assertEqual(xml, expected_xml)
+
+
+class TableTestCase(TestCase):
+ def test_empty(self):
+ table_row = Table()
+ expected_xml = ''
+
+ xml = table_row.xml
+ self.assertEqual(xml, expected_xml)
diff --git a/html2docx/tests/test_complex.py b/html2docx/tests/test_complex.py
new file mode 100644
index 0000000..3e76b82
--- /dev/null
+++ b/html2docx/tests/test_complex.py
@@ -0,0 +1,24 @@
+from html2docx.tests import build_run
+
+
+test_cases = [
+ (
+ 'Test paragraph, table, paragraph.',
+ 'AAA
CCC
',
+ ),
+ (
+ 'Test table, table, paragraph',
+ 'CCC
', # noqa
+ ),
+ # Nesting doesn't really work yet.
+ # (
+ # 'Test Nested Table',
+ # '', # noqa
+ # ),
+]
+
+
+def test():
+ for test_name, html in test_cases:
+ run = build_run(test_name, html)
+ yield run
diff --git a/html2docx/tests/test_tables.py b/html2docx/tests/test_tables.py
new file mode 100644
index 0000000..c7658da
--- /dev/null
+++ b/html2docx/tests/test_tables.py
@@ -0,0 +1,27 @@
+from html2docx.tests import build_run
+
+
+test_cases = [
+ (
+ 'Test simple table.',
+ '',
+ ),
+ (
+ 'Test multiple rows.',
+ '',
+ ),
+ (
+ 'Test multiple cells.',
+ '',
+ ),
+ (
+ 'Test multiple rows and cells.',
+ '', # noqa
+ ),
+]
+
+
+def test():
+ for test_name, html in test_cases:
+ run = build_run(test_name, html)
+ yield run
diff --git a/run_tests.sh b/run_tests.sh
index 167ea3f..5b68bf0 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -1,5 +1,3 @@
#! /bin/sh
-RUN_TESTS='nosetests -v -v --with-coverage --cover-erase --cover-package=. html2docx'
-echo $RUN_TESTS
-$RUN_TESTS
+nosetests -v -v --with-coverage --cover-erase --cover-package=html2docx html2docx && find -name '*.py' | xargs flake8