From 1af481f90ce877bd6db2ed5e05379c6db5ab4e4c Mon Sep 17 00:00:00 2001 From: Ethan Date: Fri, 22 Jul 2022 17:02:22 +0800 Subject: [PATCH] add_hyperlink --- docs/dev/analysis/features/text/hyperlink.rst | 301 ++++++++++++++++++ docs/dev/analysis/features/text/index.rst | 1 + docs/user/hyperlink.rst | 199 ++++++++++++ docx/oxml/__init__.py | 13 +- docx/oxml/text/hyperlink.py | 37 +++ docx/oxml/text/paragraph.py | 1 + docx/text/hyperlink.py | 54 ++++ docx/text/paragraph.py | 26 +- 8 files changed, 618 insertions(+), 14 deletions(-) create mode 100644 docs/dev/analysis/features/text/hyperlink.rst create mode 100644 docs/user/hyperlink.rst create mode 100644 docx/oxml/text/hyperlink.py create mode 100644 docx/text/hyperlink.py diff --git a/docs/dev/analysis/features/text/hyperlink.rst b/docs/dev/analysis/features/text/hyperlink.rst new file mode 100644 index 00000000..aa8788da --- /dev/null +++ b/docs/dev/analysis/features/text/hyperlink.rst @@ -0,0 +1,301 @@ + +Hyperlink +========= + +Word allows hyperlinks to be placed in a document. + +The target of a hyperlink may be external, such as a web site, or internal, +to another location in the document. + +A hyperlink can contain multiple runs of text, each with its own distinct +text formatting (font). + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal +hyperlink has only an anchor. + +.. highlight:: python + +**Add the external hyperlink** `http://us.com#about`:: + + >>> hyperlink = paragraph.add_hyperlink('About', address='http://us.com', anchor='about') + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.anchor + 'about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', anchor='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.anchor + 'Section_1' + >>> hyperlink.address + None + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other +stuff, including nested hyperlinks, at least as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/index.rst b/docs/dev/analysis/features/text/index.rst index 2fff0392..c87732c1 100644 --- a/docs/dev/analysis/features/text/index.rst +++ b/docs/dev/analysis/features/text/index.rst @@ -13,3 +13,4 @@ Text underline run-content breaks + hyperlink diff --git a/docs/user/hyperlink.rst b/docs/user/hyperlink.rst new file mode 100644 index 00000000..ef8a717c --- /dev/null +++ b/docs/user/hyperlink.rst @@ -0,0 +1,199 @@ +Hyperlink +========= + +Word allows hyperlinks to be placed in a document. + +The target of a hyperlink may be external, such as a web site, or internal, +to another location in the document. + +A hyperlink can contain multiple runs of text, each with its own distinct +text formatting (font). + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal +hyperlink has only an anchor. + +.. highlight:: python + +**Add the external hyperlink** `http://us.com#about`:: + + >>> hyperlink = paragraph.add_hyperlink('About', address='http://us.com', anchor='about') + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.anchor + 'about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', anchor='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.anchor + 'Section_1' + >>> hyperlink.address + None + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other +stuff, including nested hyperlinks, at least as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 25994bf4..6e5b34b3 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -194,14 +194,7 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls("w:vMerge", CT_VMerge) from .text.font import CT_Highlight # noqa -from .text.font import ( - CT_Color, - CT_Fonts, - CT_HpsMeasure, - CT_RPr, - CT_Underline, - CT_VerticalAlignRun, -) +from .text.font import CT_Color, CT_Fonts, CT_HpsMeasure, CT_RPr, CT_Underline, CT_VerticalAlignRun register_element_cls("w:b", CT_OnOff) register_element_cls("w:bCs", CT_OnOff) @@ -236,6 +229,10 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls("w:p", CT_P) +from .text.hyperlink import CT_Hyperlink + +register_element_cls("w:hyperlink", CT_Hyperlink) + from .text.parfmt import CT_Spacing # noqa from .text.parfmt import CT_Ind, CT_Jc, CT_PPr, CT_TabStop, CT_TabStops diff --git a/docx/oxml/text/hyperlink.py b/docx/oxml/text/hyperlink.py new file mode 100644 index 00000000..f8ffa3dc --- /dev/null +++ b/docx/oxml/text/hyperlink.py @@ -0,0 +1,37 @@ +""" +Custom element classes related to hyperlinks (CT_Hyperlink). +""" + +from ..ns import qn +from ..simpletypes import ST_RelationshipId, ST_String +from ..xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore + + +class CT_Hyperlink(BaseOxmlElement): + """ + ```` element, containing the properties and text for a external hyperlink. + """ + + r = ZeroOrMore("w:r") + rid = OptionalAttribute("r:id", ST_RelationshipId) + anchor = OptionalAttribute("w:anchor", ST_String) + + @property + def relationship(self): + """ + String contained in ``r:id`` attribute of . It should + point to a URL in the document's relationships. + """ + val = self.get(qn("r:id")) + return val + + @relationship.setter + def relationship(self, rId): + self.set(qn("r:id"), rId) + + def clear_content(self): + """ + Remove all child elements. + """ + for child in self[:]: + self.remove(child) diff --git a/docx/oxml/text/paragraph.py b/docx/oxml/text/paragraph.py index 8386420f..2f57aa22 100644 --- a/docx/oxml/text/paragraph.py +++ b/docx/oxml/text/paragraph.py @@ -15,6 +15,7 @@ class CT_P(BaseOxmlElement): pPr = ZeroOrOne("w:pPr") r = ZeroOrMore("w:r") + hyperlink = ZeroOrMore("w:hyperlink") def _insert_pPr(self, pPr): self.insert(0, pPr) diff --git a/docx/text/hyperlink.py b/docx/text/hyperlink.py new file mode 100644 index 00000000..ff5d5ca5 --- /dev/null +++ b/docx/text/hyperlink.py @@ -0,0 +1,54 @@ +""" +Hyperlink proxy objects. +""" +from ..opc.constants import RELATIONSHIP_TYPE as RT +from ..shared import Parented +from .run import Run + + +class Hyperlink(Parented): + """ + Proxy object wrapping ```` element. + """ + + def __init__(self, hyperlink, parent): + super(Hyperlink, self).__init__(parent) + self._hyperlink = self.element = hyperlink + + @property + def address(self): + rId = self._hyperlink.relationship + return self.part.target_ref(rId) if rId else None + + @address.setter + def address(self, url): + rId = self.part.relate_to(url, RT.HYPERLINK, is_external=True) + self._hyperlink.relationship = rId + + @property + def anchor(self): + return self._hyperlink.anchor + + @anchor.setter + def anchor(self, anchor): + self._hyperlink.anchor = anchor + + def iter_runs(self): + return [Run(r, self) for r in self._hyperlink.r_lst] + + def insert_run(self, text, style=None): + _r = self._hyperlink.add_r() + run = Run(_r, self) + run.text = text + if style: + run.style = style + return run + + @property + def text(self): + return "".join([run.text for run in self.iter_runs()]) + + @text.setter + def text(self, text): + self._hyperlink.clear_content() + self.insert_run(text) diff --git a/docx/text/paragraph.py b/docx/text/paragraph.py index d349e537..3bfdb74f 100644 --- a/docx/text/paragraph.py +++ b/docx/text/paragraph.py @@ -1,15 +1,11 @@ -# encoding: utf-8 - """ Paragraph-related proxy types. """ - -from __future__ import absolute_import, division, print_function, unicode_literals - from ..enum.style import WD_STYLE_TYPE +from ..shared import Parented +from .hyperlink import Hyperlink from .parfmt import ParagraphFormat from .run import Run -from ..shared import Parented class Paragraph(Parented): @@ -21,6 +17,24 @@ def __init__(self, p, parent): super(Paragraph, self).__init__(parent) self._p = self._element = p + def add_hyperlink(self, text, address=None, anchor=None, style=None): + + _h = self._p.add_hyperlink() + _r = _h.add_r() + hyperlink = Hyperlink(_h, self) + run = Run(_r, hyperlink) + + run.text = text + if style: + run.style = style + + if address: + hyperlink.address = address + if anchor: + hyperlink.anchor = anchor + + return hyperlink + def add_run(self, text=None, style=None): """ Append a run to this paragraph containing *text* and having character