From 0b1038318c015a63846527e66e8c65f4efcb3c98 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sat, 22 Oct 2022 18:19:41 +0100
Subject: [PATCH 1/3] Fix hashing nested HTML blocks

Nested HTML blocks of the same type (eg: nested div's) would be hashed incorrectly
as the `_strict_block_tag_re` would match the FIRST closing tag and ignore any subsequent
closing tags. This commit fixes this behaviour by iterating over all the lines in a
given text, manually tallying up the number of opening/closing tags and then hashing
the relevant block
---
 lib/markdown2.py                           | 44 +++++++++++++++++++++-
 test/tm-cases/hash_nested_html_blocks.html |  7 ++++
 test/tm-cases/hash_nested_html_blocks.opts |  1 +
 test/tm-cases/hash_nested_html_blocks.tags |  1 +
 test/tm-cases/hash_nested_html_blocks.text |  5 +++
 5 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 test/tm-cases/hash_nested_html_blocks.html
 create mode 100644 test/tm-cases/hash_nested_html_blocks.opts
 create mode 100644 test/tm-cases/hash_nested_html_blocks.tags
 create mode 100644 test/tm-cases/hash_nested_html_blocks.text
diff --git a/lib/markdown2.py b/lib/markdown2.py
index e450dca5..fe5c88ab 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -741,7 +741,11 @@ def _detab(self, text):
     _html_markdown_attr_re = re.compile(
         r'''\s+markdown=("1"|'1')''')
     def _hash_html_block_sub(self, match, raw=False):
-        html = match.group(1)
+        if isinstance(match, str):
+            html = match
+        else:
+            html = match.group(1)
+
         if raw and self.safe_mode:
             html = self._sanitize_html(html)
         elif 'markdown-in-html' in self.extras and 'markdown=' in html:
@@ -792,7 +796,7 @@ def _hash_html_blocks(self, text, raw=False):
         # the inner nested divs must be indented.
         # We need to do this before the next, more liberal match, because the next
         # match will start at the first `<div>` and stop at the first `</div>`.
-        text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
+        text = self._strict_tag_block_sub(text, self._block_tags_a, hash_html_block_sub)
 
         # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
         text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
@@ -871,6 +875,42 @@ def _hash_html_blocks(self, text, raw=False):
 
         return text
 
+    def _strict_tag_block_sub(self, text, html_tags_re, callback):
+        tag_count = 0
+        current_tag = html_tags_re
+        block = ''
+        result = ''
+
+        for chunk in text.splitlines(True):
+            is_markup = re.match(r'^(</?(%s)\b>?)' % current_tag, chunk)
+            block += chunk
+
+            if is_markup:
+                if is_markup.group(2) == 'pre':
+                    is_markup = None
+                else:
+                    if chunk.startswith('</'):
+                        tag_count -= 1
+                    else:
+                        # if close tag is in same line
+                        if '</%s>' % is_markup.group(2) in chunk[is_markup.end():]:
+                            # we must ignore these
+                            is_markup = None
+                        else:
+                            tag_count += 1
+                            current_tag = is_markup.group(2)
+
+            if tag_count == 0:
+                if is_markup:
+                    block = callback(block.rstrip('\n'))  # remove trailing newline
+                current_tag = html_tags_re
+                result += block
+                block = ''
+
+        result += block
+
+        return result
+
     def _strip_link_definitions(self, text):
         # Strips link definitions from text, stores the URLs and titles in
         # hash references.
diff --git a/test/tm-cases/hash_nested_html_blocks.html b/test/tm-cases/hash_nested_html_blocks.html
new file mode 100644
index 00000000..1e7bc4cb
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.html
@@ -0,0 +1,7 @@
+<div class="enclosing">
+<div class="codehilite">
+<pre><span></span><code><span class="n">x</span> <span class="o">=</span> <span class="mi">1</span>
+</code></pre>
+</div>
+
+</div>
diff --git a/test/tm-cases/hash_nested_html_blocks.opts b/test/tm-cases/hash_nested_html_blocks.opts
new file mode 100644
index 00000000..9e6bd79a
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.opts
@@ -0,0 +1 @@
+{"extras": ["fenced-code-blocks", "pygments"]}
\ No newline at end of file
diff --git a/test/tm-cases/hash_nested_html_blocks.tags b/test/tm-cases/hash_nested_html_blocks.tags
new file mode 100644
index 00000000..2c03fb5d
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.tags
@@ -0,0 +1 @@
+extra fenced-code-blocks pygments
diff --git a/test/tm-cases/hash_nested_html_blocks.text b/test/tm-cases/hash_nested_html_blocks.text
new file mode 100644
index 00000000..f5ad8bd1
--- /dev/null
+++ b/test/tm-cases/hash_nested_html_blocks.text
@@ -0,0 +1,5 @@
+<div class="enclosing">
+```python
+x = 1
+```
+</div>
\ No newline at end of file

From 240dd359db5b370651093098b14caaf2d8bd7654 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Wed, 26 Oct 2022 21:51:20 +0100
Subject: [PATCH 2/3] Fix tests not passing due to list items incorrectly
 wrapped in `<p>` tags

---
 test/tm-cases/sublist-para.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/tm-cases/sublist-para.html b/test/tm-cases/sublist-para.html
index a2f551b1..484e67e4 100644
--- a/test/tm-cases/sublist-para.html
+++ b/test/tm-cases/sublist-para.html
@@ -11,7 +11,7 @@
 <li>Add Komodo chrome (XUL, JavaScript, CSS, DTDs).</li>
 </ul>
 
-<p><p>What this means is that work on and add significant functionality...</p></li>
+<p>What this means is that work on and add significant functionality...</p></li>
 <li><p>Komodo uses the same extension mechanisms as Firefox...</p></li>
 <li><p>Komodo builds and runs on Windows, Linux and ...</p></li>
-</ul></p>
+</ul>

From 1ab08d0e5e7222782082c51489bc1eaa58e54f39 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Wed, 26 Oct 2022 22:04:16 +0100
Subject: [PATCH 3/3] Fix `_strict_tag_block_sub` skipping all `<pre>` blocks

---
 lib/markdown2.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index fe5c88ab..b61d03a9 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -882,23 +882,20 @@ def _strict_tag_block_sub(self, text, html_tags_re, callback):
         result = ''
 
         for chunk in text.splitlines(True):
-            is_markup = re.match(r'^(</?(%s)\b>?)' % current_tag, chunk)
+            is_markup = re.match(r'^(?:</code>(?=</pre>))?(</?(%s)\b>?)' % current_tag, chunk)
             block += chunk
 
             if is_markup:
-                if is_markup.group(2) == 'pre':
-                    is_markup = None
+                if chunk.startswith('</'):
+                    tag_count -= 1
                 else:
-                    if chunk.startswith('</'):
-                        tag_count -= 1
+                    # if close tag is in same line
+                    if '</%s>' % is_markup.group(2) in chunk[is_markup.end():]:
+                        # we must ignore these
+                        is_markup = None
                     else:
-                        # if close tag is in same line
-                        if '</%s>' % is_markup.group(2) in chunk[is_markup.end():]:
-                            # we must ignore these
-                            is_markup = None
-                        else:
-                            tag_count += 1
-                            current_tag = is_markup.group(2)
+                        tag_count += 1
+                        current_tag = is_markup.group(2)
 
             if tag_count == 0:
                 if is_markup: