From 0b1038318c015a63846527e66e8c65f4efcb3c98 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 22 Oct 2022 18:19:41 +0100 Subject: [PATCH 1/3] Fix hashing nested HTML blocks Nested HTML blocks of the same type (eg: nested div's) would be hashed incorrectly as the `_strict_block_tag_re` would match the FIRST closing tag and ignore any subsequent closing tags. This commit fixes this behaviour by iterating over all the lines in a given text, manually tallying up the number of opening/closing tags and then hashing the relevant block --- lib/markdown2.py | 44 +++++++++++++++++++++- test/tm-cases/hash_nested_html_blocks.html | 7 ++++ test/tm-cases/hash_nested_html_blocks.opts | 1 + test/tm-cases/hash_nested_html_blocks.tags | 1 + test/tm-cases/hash_nested_html_blocks.text | 5 +++ 5 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 test/tm-cases/hash_nested_html_blocks.html create mode 100644 test/tm-cases/hash_nested_html_blocks.opts create mode 100644 test/tm-cases/hash_nested_html_blocks.tags create mode 100644 test/tm-cases/hash_nested_html_blocks.text diff --git a/lib/markdown2.py b/lib/markdown2.py index e450dca5..fe5c88ab 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -741,7 +741,11 @@ def _detab(self, text): _html_markdown_attr_re = re.compile( r'''\s+markdown=("1"|'1')''') def _hash_html_block_sub(self, match, raw=False): - html = match.group(1) + if isinstance(match, str): + html = match + else: + html = match.group(1) + if raw and self.safe_mode: html = self._sanitize_html(html) elif 'markdown-in-html' in self.extras and 'markdown=' in html: @@ -792,7 +796,7 @@ def _hash_html_blocks(self, text, raw=False): # the inner nested divs must be indented. # We need to do this before the next, more liberal match, because the next # match will start at the first `
` and stop at the first `
`. - text = self._strict_tag_block_re.sub(hash_html_block_sub, text) + text = self._strict_tag_block_sub(text, self._block_tags_a, hash_html_block_sub) # Now match more liberally, simply from `\n` to `\n` text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) @@ -871,6 +875,42 @@ def _hash_html_blocks(self, text, raw=False): return text + def _strict_tag_block_sub(self, text, html_tags_re, callback): + tag_count = 0 + current_tag = html_tags_re + block = '' + result = '' + + for chunk in text.splitlines(True): + is_markup = re.match(r'^(?)' % current_tag, chunk) + block += chunk + + if is_markup: + if is_markup.group(2) == 'pre': + is_markup = None + else: + if chunk.startswith('' % is_markup.group(2) in chunk[is_markup.end():]: + # we must ignore these + is_markup = None + else: + tag_count += 1 + current_tag = is_markup.group(2) + + if tag_count == 0: + if is_markup: + block = callback(block.rstrip('\n')) # remove trailing newline + current_tag = html_tags_re + result += block + block = '' + + result += block + + return result + def _strip_link_definitions(self, text): # Strips link definitions from text, stores the URLs and titles in # hash references. diff --git a/test/tm-cases/hash_nested_html_blocks.html b/test/tm-cases/hash_nested_html_blocks.html new file mode 100644 index 00000000..1e7bc4cb --- /dev/null +++ b/test/tm-cases/hash_nested_html_blocks.html @@ -0,0 +1,7 @@ +
+
+
x = 1
+
+
+ +
diff --git a/test/tm-cases/hash_nested_html_blocks.opts b/test/tm-cases/hash_nested_html_blocks.opts new file mode 100644 index 00000000..9e6bd79a --- /dev/null +++ b/test/tm-cases/hash_nested_html_blocks.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks", "pygments"]} \ No newline at end of file diff --git a/test/tm-cases/hash_nested_html_blocks.tags b/test/tm-cases/hash_nested_html_blocks.tags new file mode 100644 index 00000000..2c03fb5d --- /dev/null +++ b/test/tm-cases/hash_nested_html_blocks.tags @@ -0,0 +1 @@ +extra fenced-code-blocks pygments diff --git a/test/tm-cases/hash_nested_html_blocks.text b/test/tm-cases/hash_nested_html_blocks.text new file mode 100644 index 00000000..f5ad8bd1 --- /dev/null +++ b/test/tm-cases/hash_nested_html_blocks.text @@ -0,0 +1,5 @@ +
+```python +x = 1 +``` +
\ No newline at end of file From 240dd359db5b370651093098b14caaf2d8bd7654 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 26 Oct 2022 21:51:20 +0100 Subject: [PATCH 2/3] Fix tests not passing due to list items incorrectly wrapped in `

` tags --- test/tm-cases/sublist-para.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/tm-cases/sublist-para.html b/test/tm-cases/sublist-para.html index a2f551b1..484e67e4 100644 --- a/test/tm-cases/sublist-para.html +++ b/test/tm-cases/sublist-para.html @@ -11,7 +11,7 @@

  • Add Komodo chrome (XUL, JavaScript, CSS, DTDs).
  • -

    What this means is that work on and add significant functionality...

    +

    What this means is that work on and add significant functionality...

  • Komodo uses the same extension mechanisms as Firefox...

  • Komodo builds and runs on Windows, Linux and ...

  • -

    + From 1ab08d0e5e7222782082c51489bc1eaa58e54f39 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 26 Oct 2022 22:04:16 +0100 Subject: [PATCH 3/3] Fix `_strict_tag_block_sub` skipping all `
    ` blocks
    
    ---
     lib/markdown2.py | 21 +++++++++------------
     1 file changed, 9 insertions(+), 12 deletions(-)
    
    diff --git a/lib/markdown2.py b/lib/markdown2.py
    index fe5c88ab..b61d03a9 100755
    --- a/lib/markdown2.py
    +++ b/lib/markdown2.py
    @@ -882,23 +882,20 @@ def _strict_tag_block_sub(self, text, html_tags_re, callback):
             result = ''
     
             for chunk in text.splitlines(True):
    -            is_markup = re.match(r'^(?)' % current_tag, chunk)
    +            is_markup = re.match(r'^(?:(?=
    ))?(?)' % current_tag, chunk) block += chunk if is_markup: - if is_markup.group(2) == 'pre': - is_markup = None + if chunk.startswith('' % is_markup.group(2) in chunk[is_markup.end():]: + # we must ignore these + is_markup = None else: - # if close tag is in same line - if '' % is_markup.group(2) in chunk[is_markup.end():]: - # we must ignore these - is_markup = None - else: - tag_count += 1 - current_tag = is_markup.group(2) + tag_count += 1 + current_tag = is_markup.group(2) if tag_count == 0: if is_markup: