diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py
index e13604f1..c24e0ae8 100644
--- a/pydocx/DocxParser.py
+++ b/pydocx/DocxParser.py
@@ -13,6 +13,17 @@
# http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx
EMUS_PER_PIXEL = 9525
USE_ALIGNMENTS = True
+TAGS_CONTAINING_CONTENT = (
+ 't',
+ 'pict',
+ 'drawing',
+ 'delText',
+ 'ins',
+)
+TAGS_HOLDING_CONTENT_TAGS = (
+ 'p',
+ 'tbl',
+)
def remove_namespaces(document): # remove namespaces
@@ -332,17 +343,14 @@ def _set_headers(self, elements):
element.heading_level = headers[style.lower()]
def _set_next(self, body):
- def _get_children(el):
+ def _get_children_with_content(el):
# We only care about children if they have text in them.
children = []
- for child in self._filter_children(el, ['p', 'tbl']):
- has_descendant_with_tag = False
- if child.has_descendant_with_tag('t'):
- has_descendant_with_tag = True
- if child.has_descendant_with_tag('pict'):
- has_descendant_with_tag = True
- if child.has_descendant_with_tag('drawing'):
- has_descendant_with_tag = True
+ for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS):
+ has_descendant_with_tag = any(
+ child.has_descendant_with_tag(tag) for
+ tag in TAGS_CONTAINING_CONTENT
+ )
if has_descendant_with_tag:
children.append(child)
return children
@@ -361,11 +369,11 @@ def _assign_next(children):
except IndexError:
pass
# Assign next for everything in the root.
- _assign_next(_get_children(body))
+ _assign_next(_get_children_with_content(body))
# In addition set next for everything in table cells.
for tc in body.find_all('tc'):
- _assign_next(_get_children(tc))
+ _assign_next(_get_children_with_content(tc))
def parse_begin(self, el):
self._set_list_attributes(el)
diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py
index 2c19f369..3a011159 100644
--- a/pydocx/tests/document_builder.py
+++ b/pydocx/tests/document_builder.py
@@ -2,6 +2,7 @@
from pydocx.DocxParser import EMUS_PER_PIXEL
templates = {
+ 'delete': 'text_delete.xml',
'drawing': 'drawing.xml',
'hyperlink': 'hyperlink.xml',
'insert': 'insert.xml',
@@ -78,6 +79,14 @@ def insert_tag(self, run_tags):
}
return template.render(**kwargs)
+ @classmethod
+ def delete_tag(self, deleted_texts):
+ template = env.get_template(templates['delete'])
+ kwargs = {
+ 'deleted_texts': deleted_texts,
+ }
+ return template.render(**kwargs)
+
@classmethod
def smart_tag(self, run_tags):
template = env.get_template(templates['smartTag'])
diff --git a/pydocx/tests/templates/text_delete.xml b/pydocx/tests/templates/text_delete.xml
new file mode 100644
index 00000000..783b3ad3
--- /dev/null
+++ b/pydocx/tests/templates/text_delete.xml
@@ -0,0 +1,10 @@
+