From 4baa9ce07e9982ec757354ca61585515df2cdd7e Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 20:29:55 +0200 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Parse=20entities=20to=20`t?= =?UTF-8?q?ext=5Fspecial`=20token?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before the `text_join` core rule joins it to the text. This stops it from being affected by typographic transforms. Implements upstream: https://github.com/markdown-it/markdown-it/commit/3fc0deb38b5a8b2eb8f46c727cc4e299e5ae5f9c --- markdown_it/rules_inline/entity.py | 67 +++++++++++++------------ tests/test_port/fixtures/smartquotes.md | 13 +++++ tests/test_port/fixtures/typographer.md | 7 +++ 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index d3b5f6bb..ec9d3965 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -16,35 +16,38 @@ def entity(state: StateInline, silent: bool) -> bool: if state.src[pos] != "&": return False - if (pos + 1) < maximum: - if state.src[pos + 1] == "#": - match = DIGITAL_RE.search(state.src[pos:]) - if match: - if not silent: - match1 = match.group(1) - code = ( - int(match1[1:], 16) - if match1[0].lower() == "x" - else int(match1, 10) - ) - state.pending += ( - fromCodePoint(code) - if isValidEntityCode(code) - else fromCodePoint(0xFFFD) - ) - - state.pos += len(match.group(0)) - return True - - else: - match = NAMED_RE.search(state.src[pos:]) - if match and match.group(1) in entities: - if not silent: - state.pending += entities[match.group(1)] - state.pos += len(match.group(0)) - return True - - if not silent: - state.pending += "&" - state.pos += 1 - return True + if pos + 1 >= maximum: + return False + + if state.src[pos + 1] == "#": + if match := DIGITAL_RE.search(state.src[pos:]): + if not silent: + match1 = match.group(1) + code = ( + int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10) + ) + + token = state.push("text_special", "", 0) + token.content = ( + fromCodePoint(code) + if isValidEntityCode(code) + else fromCodePoint(0xFFFD) + ) + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + else: + if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities: + if not silent: + token = state.push("text_special", "", 0) + token.content = entities[match.group(1)] + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + return False diff --git a/tests/test_port/fixtures/smartquotes.md b/tests/test_port/fixtures/smartquotes.md index e77175aa..8ed314e2 100644 --- a/tests/test_port/fixtures/smartquotes.md +++ b/tests/test_port/fixtures/smartquotes.md @@ -177,3 +177,16 @@ Should be escapable:

"foo"

"foo"

. + +Should not replace entities: +. +"foo" + +"foo" + +"foo" +. +

"foo"

+

"foo"

+

"foo"

+. diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index 59e48941..d72a7c2f 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -130,3 +130,10 @@ regression tests for #624

1–2–3

1 – – 3

. + +shouldn't replace entities +. +(c) (c) (c) +. +

(c) (c) ©

+.