diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index e13604f1..c24e0ae8 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -13,6 +13,17 @@ # http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx EMUS_PER_PIXEL = 9525 USE_ALIGNMENTS = True +TAGS_CONTAINING_CONTENT = ( + 't', + 'pict', + 'drawing', + 'delText', + 'ins', +) +TAGS_HOLDING_CONTENT_TAGS = ( + 'p', + 'tbl', +) def remove_namespaces(document): # remove namespaces @@ -332,17 +343,14 @@ def _set_headers(self, elements): element.heading_level = headers[style.lower()] def _set_next(self, body): - def _get_children(el): + def _get_children_with_content(el): # We only care about children if they have text in them. children = [] - for child in self._filter_children(el, ['p', 'tbl']): - has_descendant_with_tag = False - if child.has_descendant_with_tag('t'): - has_descendant_with_tag = True - if child.has_descendant_with_tag('pict'): - has_descendant_with_tag = True - if child.has_descendant_with_tag('drawing'): - has_descendant_with_tag = True + for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS): + has_descendant_with_tag = any( + child.has_descendant_with_tag(tag) for + tag in TAGS_CONTAINING_CONTENT + ) if has_descendant_with_tag: children.append(child) return children @@ -361,11 +369,11 @@ def _assign_next(children): except IndexError: pass # Assign next for everything in the root. - _assign_next(_get_children(body)) + _assign_next(_get_children_with_content(body)) # In addition set next for everything in table cells. for tc in body.find_all('tc'): - _assign_next(_get_children(tc)) + _assign_next(_get_children_with_content(tc)) def parse_begin(self, el): self._set_list_attributes(el) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 2c19f369..3a011159 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -2,6 +2,7 @@ from pydocx.DocxParser import EMUS_PER_PIXEL templates = { + 'delete': 'text_delete.xml', 'drawing': 'drawing.xml', 'hyperlink': 'hyperlink.xml', 'insert': 'insert.xml', @@ -78,6 +79,14 @@ def insert_tag(self, run_tags): } return template.render(**kwargs) + @classmethod + def delete_tag(self, deleted_texts): + template = env.get_template(templates['delete']) + kwargs = { + 'deleted_texts': deleted_texts, + } + return template.render(**kwargs) + @classmethod def smart_tag(self, run_tags): template = env.get_template(templates['smartTag']) diff --git a/pydocx/tests/templates/text_delete.xml b/pydocx/tests/templates/text_delete.xml new file mode 100644 index 00000000..783b3ad3 --- /dev/null +++ b/pydocx/tests/templates/text_delete.xml @@ -0,0 +1,10 @@ + + {% for deleted_text in deleted_texts %} + + + + + {{ deleted_text }} + + {% endfor %} + diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index c1a5bf8a..801f4210 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -718,6 +718,80 @@ def get_xml(self): return xml +class DeleteTagInList(_TranslationTestCase): + expected_output = ''' + +
    +
  1. AAA
    + BBB +
  2. +
  3. CCC
  4. +
+ + ''' + + def get_xml(self): + delete_tags = DXB.delete_tag(['BBB']) + p_tag = DXB.p_tag([delete_tags]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + +class InsertTagInList(_TranslationTestCase): + expected_output = ''' + +
    +
  1. AAA
    + BBB +
  2. +
  3. CCC
  4. +
+ + ''' + + def get_xml(self): + run_tags = [DXB.r_tag(i) for i in 'BBB'] + insert_tags = DXB.insert_tag(run_tags) + p_tag = DXB.p_tag([insert_tags]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + +class SmartTagInList(_TranslationTestCase): + expected_output = ''' + +
    +
  1. AAA
    + BBB +
  2. +
  3. CCC
  4. +
+ + ''' + + def get_xml(self): + run_tags = [DXB.r_tag(i) for i in 'BBB'] + smart_tag = DXB.smart_tag(run_tags) + p_tag = DXB.p_tag([smart_tag]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + class SingleListItem(_TranslationTestCase): expected_output = ''' @@ -767,6 +841,7 @@ def get_xml(self): [DXB.p_tag('Fourth')], ), merge=True) body = table + xml = DXB.xml(body) return xml @@ -793,7 +868,6 @@ def get_xml(self): for text, ilvl, numId in li_text: lis += DXB.li(text=text, ilvl=ilvl, numId=numId) body = lis - xml = DXB.xml(body) return xml