From 713f026c86aa582d37921cebc0c0eca6d1de77e4 Mon Sep 17 00:00:00 2001 From: mohamorui <95970687+mohamorui@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:11:14 +0800 Subject: [PATCH] =?UTF-8?q?Fix=20infinite=20loop=20when=20parsing=20H4?= =?UTF-8?q?=E2=80=93H6=20headings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse_md() had handlers only for H1–H3. Any line starting with ####, ##### or ###### fell into the paragraph collector, which immediately broke on lines starting with '#', leaving `i` unchanged. The outer `while i < len(lines)` then re-processed the same line forever (100% CPU, no output), which broke conversion of documents using sub-sub-section headings. - Extend the section handler to match H3–H6 (`#{3,6}\s+`). Deeper levels render with the H3 style so their content is preserved instead of stalling the parser. - Add a defensive `i += 1` in the paragraph branch so future unmatched lines (e.g. `#!shebang` or a bare `#`) cannot stall the loop again. - Add tests/07-deep-headings.md as a regression guard covering H3–H6, a table between deep headings, and deep headings with `&` and inline code. --- lovstudio-any2pdf/scripts/md2pdf.py | 12 ++++++-- tests/07-deep-headings.md | 44 +++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 tests/07-deep-headings.md diff --git a/lovstudio-any2pdf/scripts/md2pdf.py b/lovstudio-any2pdf/scripts/md2pdf.py index a593154..a62eadf 100644 --- a/lovstudio-any2pdf/scripts/md2pdf.py +++ b/lovstudio-any2pdf/scripts/md2pdf.py @@ -1085,10 +1085,12 @@ def parse_md(self, md): toc.append(('chapter', title, cm.key)) i += 1; continue - # H3 = Section - if stripped.startswith('### '): + # H3–H6 = Section. Deeper levels fall back to the H3 style so their + # content is still rendered (ReportLab has no built-in H4–H6 styles here). + m_h = re.match(r'^(#{3,6})\s+(.+)$', stripped) + if m_h: story.append(Spacer(1, 3*mm)) - story.append(Paragraph(md_inline(stripped[4:].strip(), ah), ST['h3'])) + story.append(Paragraph(md_inline(m_h.group(2).strip(), ah), ST['h3'])) story.append(Spacer(1, 1*mm)) i += 1; continue @@ -1134,6 +1136,10 @@ def parse_md(self, md): else: merged += ' ' + pl story.append(Paragraph(md_inline(merged, ah), ST['body'])) + else: + # Defensive: current line matched no handler above (e.g. a stray + # marker we don't parse). Advance to prevent an infinite loop. + i += 1 continue return story, toc diff --git a/tests/07-deep-headings.md b/tests/07-deep-headings.md new file mode 100644 index 0000000..c3c02f2 --- /dev/null +++ b/tests/07-deep-headings.md @@ -0,0 +1,44 @@ +--- +title: "Deep Headings Regression Test" +subtitle: "H4/H5/H6 must not hang the parser" +author: "any2pdf" +version: "test" +--- + +# Part + +Body paragraph before the nested headings. + +## Chapter + +### Section (H3 — already supported) + +Body text under H3. + +#### Subsection (H4 — previously hung the parser) + +Body text under H4. The parse_md loop used to never advance `i` when it saw a +deeper heading like this, because H4–H6 had no handler and the paragraph +collector broke on lines starting with `#`. The result was 100% CPU forever. + +##### Deeper subsection (H5) + +Body text under H5 with some `inline code` and **bold**. + +###### Deepest (H6) + +Body text under H6. + +#### Another H4 right after a table + +| Col A | Col B | Col C | +|-------|-------|-------| +| a1 | b1 | c1 | +| a2 | b2 | c2 | + +Body paragraph after the table. This used to never render because the H4 above +would spin forever. + +#### H4 with & ampersand and `${VAR}/path.md` + +Inline code inside deep heading context should not break anything.