From 0b1038318c015a63846527e66e8c65f4efcb3c98 Mon Sep 17 00:00:00 2001
From: Crozzers
Date: Sat, 22 Oct 2022 18:19:41 +0100
Subject: [PATCH 1/3] Fix hashing nested HTML blocks
Nested HTML blocks of the same type (eg: nested div's) would be hashed incorrectly
as the `_strict_block_tag_re` would match the FIRST closing tag and ignore any subsequent
closing tags. This commit fixes this behaviour by iterating over all the lines in a
given text, manually tallying up the number of opening/closing tags and then hashing
the relevant block
---
lib/markdown2.py | 44 +++++++++++++++++++++-
test/tm-cases/hash_nested_html_blocks.html | 7 ++++
test/tm-cases/hash_nested_html_blocks.opts | 1 +
test/tm-cases/hash_nested_html_blocks.tags | 1 +
test/tm-cases/hash_nested_html_blocks.text | 5 +++
5 files changed, 56 insertions(+), 2 deletions(-)
create mode 100644 test/tm-cases/hash_nested_html_blocks.html
create mode 100644 test/tm-cases/hash_nested_html_blocks.opts
create mode 100644 test/tm-cases/hash_nested_html_blocks.tags
create mode 100644 test/tm-cases/hash_nested_html_blocks.text
diff --git a/lib/markdown2.py b/lib/markdown2.py
index e450dca5..fe5c88ab 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -741,7 +741,11 @@ def _detab(self, text):
_html_markdown_attr_re = re.compile(
r'''\s+markdown=("1"|'1')''')
def _hash_html_block_sub(self, match, raw=False):
- html = match.group(1)
+ if isinstance(match, str):
+ html = match
+ else:
+ html = match.group(1)
+
if raw and self.safe_mode:
html = self._sanitize_html(html)
elif 'markdown-in-html' in self.extras and 'markdown=' in html:
@@ -792,7 +796,7 @@ def _hash_html_blocks(self, text, raw=False):
# the inner nested divs must be indented.
# We need to do this before the next, more liberal match, because the next
# match will start at the first `` and stop at the first `
`.
- text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
+ text = self._strict_tag_block_sub(text, self._block_tags_a, hash_html_block_sub)
# Now match more liberally, simply from `\n` to `\n`
text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
@@ -871,6 +875,42 @@ def _hash_html_blocks(self, text, raw=False):
return text
+ def _strict_tag_block_sub(self, text, html_tags_re, callback):
+ tag_count = 0
+ current_tag = html_tags_re
+ block = ''
+ result = ''
+
+ for chunk in text.splitlines(True):
+ is_markup = re.match(r'^(?(%s)\b>?)' % current_tag, chunk)
+ block += chunk
+
+ if is_markup:
+ if is_markup.group(2) == 'pre':
+ is_markup = None
+ else:
+ if chunk.startswith(''):
+ tag_count -= 1
+ else:
+ # if close tag is in same line
+ if '%s>' % is_markup.group(2) in chunk[is_markup.end():]:
+ # we must ignore these
+ is_markup = None
+ else:
+ tag_count += 1
+ current_tag = is_markup.group(2)
+
+ if tag_count == 0:
+ if is_markup:
+ block = callback(block.rstrip('\n')) # remove trailing newline
+ current_tag = html_tags_re
+ result += block
+ block = ''
+
+ result += block
+
+ return result
+
def _strip_link_definitions(self, text):
# Strips link definitions from text, stores the URLs and titles in
# hash references.
diff --git a/test/tm-cases/hash_nested_html_blocks.html b/test/tm-cases/hash_nested_html_blocks.html
new file mode 100644
index 00000000..1e7bc4cb
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.html
@@ -0,0 +1,7 @@
+
diff --git a/test/tm-cases/hash_nested_html_blocks.opts b/test/tm-cases/hash_nested_html_blocks.opts
new file mode 100644
index 00000000..9e6bd79a
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.opts
@@ -0,0 +1 @@
+{"extras": ["fenced-code-blocks", "pygments"]}
\ No newline at end of file
diff --git a/test/tm-cases/hash_nested_html_blocks.tags b/test/tm-cases/hash_nested_html_blocks.tags
new file mode 100644
index 00000000..2c03fb5d
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.tags
@@ -0,0 +1 @@
+extra fenced-code-blocks pygments
diff --git a/test/tm-cases/hash_nested_html_blocks.text b/test/tm-cases/hash_nested_html_blocks.text
new file mode 100644
index 00000000..f5ad8bd1
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.text
@@ -0,0 +1,5 @@
+
+```python
+x = 1
+```
+
\ No newline at end of file
From 240dd359db5b370651093098b14caaf2d8bd7654 Mon Sep 17 00:00:00 2001
From: Crozzers
Date: Wed, 26 Oct 2022 21:51:20 +0100
Subject: [PATCH 2/3] Fix tests not passing due to list items incorrectly
wrapped in `` tags
---
test/tm-cases/sublist-para.html | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/test/tm-cases/sublist-para.html b/test/tm-cases/sublist-para.html
index a2f551b1..484e67e4 100644
--- a/test/tm-cases/sublist-para.html
+++ b/test/tm-cases/sublist-para.html
@@ -11,7 +11,7 @@
Add Komodo chrome (XUL, JavaScript, CSS, DTDs).
-What this means is that work on and add significant functionality...
+What this means is that work on and add significant functionality...
Komodo uses the same extension mechanisms as Firefox...
Komodo builds and runs on Windows, Linux and ...
-
+
From 1ab08d0e5e7222782082c51489bc1eaa58e54f39 Mon Sep 17 00:00:00 2001
From: Crozzers
Date: Wed, 26 Oct 2022 22:04:16 +0100
Subject: [PATCH 3/3] Fix `_strict_tag_block_sub` skipping all `` blocks
---
lib/markdown2.py | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/lib/markdown2.py b/lib/markdown2.py
index fe5c88ab..b61d03a9 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -882,23 +882,20 @@ def _strict_tag_block_sub(self, text, html_tags_re, callback):
result = ''
for chunk in text.splitlines(True):
- is_markup = re.match(r'^(?(%s)\b>?)' % current_tag, chunk)
+ is_markup = re.match(r'^(?:(?=))?(?(%s)\b>?)' % current_tag, chunk)
block += chunk
if is_markup:
- if is_markup.group(2) == 'pre':
- is_markup = None
+ if chunk.startswith(''):
+ tag_count -= 1
else:
- if chunk.startswith(''):
- tag_count -= 1
+ # if close tag is in same line
+ if '%s>' % is_markup.group(2) in chunk[is_markup.end():]:
+ # we must ignore these
+ is_markup = None
else:
- # if close tag is in same line
- if '%s>' % is_markup.group(2) in chunk[is_markup.end():]:
- # we must ignore these
- is_markup = None
- else:
- tag_count += 1
- current_tag = is_markup.group(2)
+ tag_count += 1
+ current_tag = is_markup.group(2)
if tag_count == 0:
if is_markup: