From 65a76f5289626c30162298e4bda577ea7bc9c24d Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Thu, 25 Aug 2016 15:13:06 +0530 Subject: [PATCH 1/4] Don't drop internal links --- pydocx/export/html.py | 10 +++++++++- pydocx/openxml/wordprocessing/__init__.py | 2 ++ pydocx/openxml/wordprocessing/bookmark.py | 14 ++++++++++++++ pydocx/openxml/wordprocessing/paragraph.py | 8 ++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 pydocx/openxml/wordprocessing/bookmark.py diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 40498a89..01d4a2dc 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -270,6 +270,11 @@ def get_heading_tag(self, paragraph): heading_style.name.lower(), self.default_heading_level, ) + if paragraph.bookmark_name: + attrs = { + 'id': paragraph.bookmark_name + } + return HtmlTag(tag, **attrs) return HtmlTag(tag) def export_paragraph(self, paragraph): @@ -507,7 +512,10 @@ def get_hyperlink_tag(self, target_uri): def export_hyperlink(self, hyperlink): results = super(PyDocXHTMLExporter, self).export_hyperlink(hyperlink) - tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) + if hyperlink.target_uri: + tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) + else: + tag = self.get_hyperlink_tag(target_uri='#' + hyperlink.anchor) if tag: results = tag.apply(results, allow_empty=False) diff --git a/pydocx/openxml/wordprocessing/__init__.py b/pydocx/openxml/wordprocessing/__init__.py index 515f64ca..4fce72a2 100644 --- a/pydocx/openxml/wordprocessing/__init__.py +++ b/pydocx/openxml/wordprocessing/__init__.py @@ -1,6 +1,7 @@ # coding: utf-8 from pydocx.openxml.wordprocessing.abstract_num import AbstractNum from pydocx.openxml.wordprocessing.body import Body +from pydocx.openxml.wordprocessing.bookmark import Bookmark from pydocx.openxml.wordprocessing.br import Break from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.deleted_text import DeletedText @@ -47,6 +48,7 @@ __all__ = [ 'AbstractNum', 'Body', + 'Bookmark', 'Break', 'DeletedRun', 'DeletedText', diff --git a/pydocx/openxml/wordprocessing/bookmark.py b/pydocx/openxml/wordprocessing/bookmark.py new file mode 100644 index 00000000..1e7bf417 --- /dev/null +++ b/pydocx/openxml/wordprocessing/bookmark.py @@ -0,0 +1,14 @@ +# coding: utf-8 +from __future__ import ( + absolute_import, + print_function, + unicode_literals, +) + +from pydocx.models import XmlModel, XmlAttribute + + +class Bookmark(XmlModel): + XML_TAG = 'bookmarkStart' + + name = XmlAttribute(name='name') diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py index af59dd7b..fe5443e3 100644 --- a/pydocx/openxml/wordprocessing/paragraph.py +++ b/pydocx/openxml/wordprocessing/paragraph.py @@ -16,6 +16,7 @@ from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.sdt_run import SdtRun from pydocx.openxml.wordprocessing.simple_field import SimpleField +from pydocx.openxml.wordprocessing.bookmark import Bookmark class Paragraph(XmlModel): @@ -31,6 +32,7 @@ class Paragraph(XmlModel): DeletedRun, SdtRun, SimpleField, + Bookmark ) def __init__(self, **kwargs): @@ -121,6 +123,12 @@ def runs(self): if isinstance(p_child, Run): yield p_child + @property + def bookmark_name(self): + for p_child in self.children: + if isinstance(p_child, Bookmark): + return p_child.name + def get_text(self, tab_char=None): ''' Return a string of all of the contained Text nodes concatenated From ab1056ef042a4648ae51c88583f2a845145cdfa7 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Thu, 25 Aug 2016 17:28:35 +0530 Subject: [PATCH 2/4] First check if anchor exists --- pydocx/export/html.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 01d4a2dc..070ae1f3 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -512,10 +512,10 @@ def get_hyperlink_tag(self, target_uri): def export_hyperlink(self, hyperlink): results = super(PyDocXHTMLExporter, self).export_hyperlink(hyperlink) - if hyperlink.target_uri: - tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) - else: + if not hyperlink.target_uri and hyperlink.anchor: tag = self.get_hyperlink_tag(target_uri='#' + hyperlink.anchor) + else: + tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) if tag: results = tag.apply(results, allow_empty=False) From 7e8ce845bf30a34c1d3babe2068273b98b3eb84a Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Fri, 26 Aug 2016 12:32:29 +0530 Subject: [PATCH 3/4] minor refactoring --- pydocx/export/html.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 070ae1f3..fd8ebce1 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -271,10 +271,7 @@ def get_heading_tag(self, paragraph): self.default_heading_level, ) if paragraph.bookmark_name: - attrs = { - 'id': paragraph.bookmark_name - } - return HtmlTag(tag, **attrs) + return HtmlTag(tag, id=paragraph.bookmark_name) return HtmlTag(tag) def export_paragraph(self, paragraph): From ae32fb3509e663d985a2a13019bc2396b4ee439d Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Fri, 26 Aug 2016 12:48:14 +0530 Subject: [PATCH 4/4] Add tests for internal links --- tests/export/html/test_heading.py | 27 +++++++++++++++++++++++++++ tests/export/html/test_hyperlink.py | 18 ++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/tests/export/html/test_heading.py b/tests/export/html/test_heading.py index efd7ab1c..b77dfc2d 100644 --- a/tests/export/html/test_heading.py +++ b/tests/export/html/test_heading.py @@ -744,3 +744,30 @@ def test_single_lvl_list_has_precedence_over_headings(self): ''' self.assert_document_generates_html(document, expected_html) + + def test_heading_with_bookmark(self): + document_xml = ''' +

+ + + + + + + aaa + +

+ ''' + + style_xml = ''' + + ''' + + document = WordprocessingDocumentFactory() + document.add(StyleDefinitionsPart, style_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = '

aaa

' + self.assert_document_generates_html(document, expected_html) diff --git a/tests/export/html/test_hyperlink.py b/tests/export/html/test_hyperlink.py index a88ab748..dbbe4a0c 100644 --- a/tests/export/html/test_hyperlink.py +++ b/tests/export/html/test_hyperlink.py @@ -194,3 +194,21 @@ def test_with_anchor(self): expected_html = '

link.

' self.assert_document_generates_html(document, expected_html) + + def test_internal_link(self): + document_xml = ''' +

+ + + link + + +

+ ''' + + document = WordprocessingDocumentFactory() + + document.add(MainDocumentPart, document_xml) + + expected_html = '

link

' + self.assert_document_generates_html(document, expected_html)