From 314a6ba6403a32aef35d7d14d64455cb5733c74f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 17 May 2013 11:34:41 -0400 Subject: [PATCH 1/5] refs #25: added a test showing the ordering bug --- pydocx/tests/document_builder.py | 9 +++++++++ pydocx/tests/templates/text_delete.xml | 10 ++++++++++ pydocx/tests/test_xml.py | 23 +++++++++++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 pydocx/tests/templates/text_delete.xml diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 7286cc8e..c4520853 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -2,6 +2,7 @@ from pydocx.DocxParser import EMUS_PER_PIXEL templates = { + 'delete': 'text_delete.xml', 'drawing': 'drawing.xml', 'hyperlink': 'hyperlink.xml', 'insert': 'insert.xml', @@ -77,6 +78,14 @@ def insert_tag(self, run_tags): } return template.render(**kwargs) + @classmethod + def delete_tag(self, deleted_texts): + template = env.get_template(templates['delete']) + kwargs = { + 'deleted_texts': deleted_texts, + } + return template.render(**kwargs) + @classmethod def smart_tag(self, run_tags): template = env.get_template(templates['smartTag']) diff --git a/pydocx/tests/templates/text_delete.xml b/pydocx/tests/templates/text_delete.xml new file mode 100644 index 00000000..783b3ad3 --- /dev/null +++ b/pydocx/tests/templates/text_delete.xml @@ -0,0 +1,10 @@ + + {% for deleted_text in deleted_texts %} + + + + + {{ deleted_text }} + + {% endfor %} + diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 8b6d04aa..767c4908 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -716,3 +716,26 @@ def get_xml(self): xml = DXB.xml(body) return xml + + +class DeleteTagInList(_TranslationTestCase): + expected_output = ''' + +
    +
  1. AAA
    + BBB +
  2. +
  3. CCC
  4. +
+ + ''' + + def get_xml(self): + delete_tags = DXB.delete_tag(['BBB']) + p_tag = DXB.p_tag([delete_tags]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + xml = DXB.xml(body) + return xml From 3e7b18eae7e5204185e073b555cbe511ebc6458e Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 17 May 2013 11:34:52 -0400 Subject: [PATCH 2/5] refs #25: fixed the ordering bug. --- pydocx/DocxParser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 1654b912..4a9e9ee2 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -311,6 +311,8 @@ def _get_children(el): has_descendant_with_tag = True if child.has_descendant_with_tag('drawing'): has_descendant_with_tag = True + if child.has_descendant_with_tag('delText'): + has_descendant_with_tag = True if has_descendant_with_tag: children.append(child) return children From 0e96655910a8f85bcdad8b2d2982b8c4366e8ca2 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 11:14:35 -0400 Subject: [PATCH 3/5] refs #25: added a test showing that inserted text in lists is still broken --- pydocx/tests/test_xml.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 1f2c35a9..124c4d43 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -742,6 +742,31 @@ def get_xml(self): return xml +class InsertTagInList(_TranslationTestCase): + expected_output = ''' + +
    +
  1. AAA
    + BBB +
  2. +
  3. CCC
  4. +
+ + ''' + + def get_xml(self): + run_tags = [DXB.r_tag(i) for i in 'BBB'] + insert_tags = DXB.insert_tag(run_tags) + p_tag = DXB.p_tag([insert_tags]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + class SingleListItem(_TranslationTestCase): expected_output = ''' From 2f0ed87b0528ed0bfa94396bea648457b914f0c4 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 11:14:51 -0400 Subject: [PATCH 4/5] refs #25: refactor and fixed inserted text in lists --- pydocx/DocxParser.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 2d21111f..c24e0ae8 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -13,6 +13,17 @@ # http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx EMUS_PER_PIXEL = 9525 USE_ALIGNMENTS = True +TAGS_CONTAINING_CONTENT = ( + 't', + 'pict', + 'drawing', + 'delText', + 'ins', +) +TAGS_HOLDING_CONTENT_TAGS = ( + 'p', + 'tbl', +) def remove_namespaces(document): # remove namespaces @@ -332,19 +343,14 @@ def _set_headers(self, elements): element.heading_level = headers[style.lower()] def _set_next(self, body): - def _get_children(el): + def _get_children_with_content(el): # We only care about children if they have text in them. children = [] - for child in self._filter_children(el, ['p', 'tbl']): - has_descendant_with_tag = False - if child.has_descendant_with_tag('t'): - has_descendant_with_tag = True - if child.has_descendant_with_tag('pict'): - has_descendant_with_tag = True - if child.has_descendant_with_tag('drawing'): - has_descendant_with_tag = True - if child.has_descendant_with_tag('delText'): - has_descendant_with_tag = True + for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS): + has_descendant_with_tag = any( + child.has_descendant_with_tag(tag) for + tag in TAGS_CONTAINING_CONTENT + ) if has_descendant_with_tag: children.append(child) return children @@ -363,11 +369,11 @@ def _assign_next(children): except IndexError: pass # Assign next for everything in the root. - _assign_next(_get_children(body)) + _assign_next(_get_children_with_content(body)) # In addition set next for everything in table cells. for tc in body.find_all('tc'): - _assign_next(_get_children(tc)) + _assign_next(_get_children_with_content(tc)) def parse_begin(self, el): self._set_list_attributes(el) From bc4196be377c2486d81ccb32879d78943179c945 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 11:19:22 -0400 Subject: [PATCH 5/5] refs #25: added a test showing that smart tags in a list work fine. --- pydocx/tests/test_xml.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 124c4d43..801f4210 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -767,6 +767,31 @@ def get_xml(self): return xml +class SmartTagInList(_TranslationTestCase): + expected_output = ''' + +
    +
  1. AAA
    + BBB +
  2. +
  3. CCC
  4. +
+ + ''' + + def get_xml(self): + run_tags = [DXB.r_tag(i) for i in 'BBB'] + smart_tag = DXB.smart_tag(run_tags) + p_tag = DXB.p_tag([smart_tag]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + class SingleListItem(_TranslationTestCase): expected_output = '''