From 23e333a70ee0d847f69d4f8ed8518ea618435f92 Mon Sep 17 00:00:00 2001 From: FightingLee97 Date: Fri, 17 Apr 2026 13:07:57 +0800 Subject: [PATCH 1/2] fix: frontmatter support, blank page, header title, and code alignment - Parse YAML frontmatter (--- ... ---) from markdown top: title, subtitle, author, date, version, theme, watermark, footer-left, copyright, and all other CLI params. CLI args take precedence over frontmatter values. - Strip H1 visual elements (Spacer, Paragraph, decorations) when a cover title is provided, keeping ChapterMark for TOC anchor links. Prevents duplicate title page and blank page after TOC. - Replace dynamic _cur_chapter[0] in top-band and full header with fixed doc title to avoid reportlab onPage timing offset (chapter name always lagged one page behind actual content). - Fix code block mid-line space collapsing: replace all spaces with   in esc_code() to preserve ASCII diagrams and padded column alignment. - Reduce H2 chapter spacer from 30% page height to 8mm for manual/report style documents (avoids large whitespace at top of each chapter page). --- lovstudio-any2pdf/scripts/md2pdf.py | 93 +++++++++++++++++------------ 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/lovstudio-any2pdf/scripts/md2pdf.py b/lovstudio-any2pdf/scripts/md2pdf.py index a593154..e8954bf 100644 --- a/lovstudio-any2pdf/scripts/md2pdf.py +++ b/lovstudio-any2pdf/scripts/md2pdf.py @@ -459,13 +459,13 @@ def esc(text): return text.replace("&","&").replace("<","<").replace(">",">") def esc_code(text): - """Escape for code blocks: preserve indentation and newlines.""" + """Escape for code blocks: preserve indentation, alignment, and newlines.""" out = [] for line in text.split('\n'): e = esc(line) - stripped = e.lstrip(' ') - indent = len(e) - len(stripped) - out.append(' ' * indent + stripped) + # Replace ALL spaces with   to preserve both indentation and + # mid-line alignment (e.g. ASCII diagrams, table output, padded columns). + out.append(e.replace(' ', ' ')) return '
'.join(out) def md_inline(text, accent_hex="#CC785C"): @@ -841,10 +841,10 @@ def _draw_page_decoration(self, c): if header_title: c.setFillColor(white) _draw_mixed(c, self.lm, self.page_h - 6*mm, header_title, 7.5) - ch = _cur_chapter[0] - if ch: + doc_title = self.cfg.get("title", "") + if doc_title: c.setFillColor(white) - _draw_mixed(c, self.page_w - self.rm, self.page_h - 6*mm, ch[:40], 7.5, anchor="right") + _draw_mixed(c, self.page_w - self.rm, self.page_h - 6*mm, doc_title[:40], 7.5, anchor="right") elif deco == "double-rule": # Double horizontal rules at top and bottom (elegant book style) c.setStrokeColor(T["accent"]); c.setLineWidth(0.6) @@ -882,9 +882,9 @@ def _normal_page(self, c, doc): header_title = self.cfg.get("header_title", "") if header_title: _draw_mixed(c, self.lm, self.page_h - 18*mm, header_title, 8) - ch = _cur_chapter[0] - if ch: - _draw_mixed(c, self.page_w - self.rm, self.page_h - 18*mm, ch[:40], 8, anchor="right") + doc_title = self.cfg.get("title", "") + if doc_title: + _draw_mixed(c, self.page_w - self.rm, self.page_h - 18*mm, doc_title[:40], 8, anchor="right") elif hs == "minimal" and deco != "top-band": c.setFillColor(T["ink_faded"]); c.setFont("Sans", 8) c.drawRightString(self.page_w - self.rm, self.page_h - 16*mm, str(pg)) @@ -1071,7 +1071,7 @@ def parse_md(self, md): story.append(PageBreak()) cm = ChapterMark(title, level=1); story.append(cm) hdeco = self.L["heading_decoration"] - story.append(Spacer(1, self.body_h * 0.30)) + story.append(Spacer(1, 8*mm)) story.append(Paragraph(md_inline(title, ah), ST['chapter'])) if hdeco == "rules": story.append(Spacer(1, 5*mm)) @@ -1212,12 +1212,20 @@ def build(self, md_text, output_path): story.append(NextPageTemplate('normal')) story.append(PageBreak()) - # Strip leading PageBreak from body content to avoid blank page - while story_content and isinstance(story_content[0], (PageBreak, Spacer)): - if isinstance(story_content[0], PageBreak): + # When a cover is present and the markdown has an H1 heading, strip all + # visual elements generated by that H1 (Spacer, Paragraph, decorations, + # and the H2's leading PageBreak) to avoid a duplicate title page. + # The H1 ChapterMark is kept so TOC anchor links remain valid. + if self.cfg.get("cover", True): + while story_content and isinstance(story_content[0], (PageBreak, Spacer)): story_content.pop(0) - break - story_content.pop(0) + if story_content and isinstance(story_content[0], ChapterMark) and story_content[0].level == 0: + idx = 1 + # Remove everything between H1 ChapterMark and the next ChapterMark + # (H2). This includes Spacers, Paragraphs, decorations, and the + # H2's leading PageBreak — all in one pass. + while idx < len(story_content) and not isinstance(story_content[idx], ChapterMark): + story_content.pop(idx) story.extend(story_content) @@ -1267,39 +1275,50 @@ def main(): with open(args.input, encoding='utf-8') as f: md_text = f.read() - # Extract title from first H1 if not provided - title = args.title + # Parse YAML-style frontmatter (--- ... ---) from the top of the file. + # CLI args take precedence over frontmatter values. + frontmatter = {} + fm_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', md_text, re.DOTALL) + if fm_match: + for line in fm_match.group(1).splitlines(): + if ':' in line: + k, _, v = line.partition(':') + frontmatter[k.strip()] = v.strip() + md_text = md_text[fm_match.end():] # strip frontmatter from body + + # Extract title from first H1 if not provided by args or frontmatter + title = args.title or frontmatter.get("title", "") if not title: m = re.search(r'^# (.+)$', md_text, re.MULTILINE) title = m.group(1).strip() if m else "Document" - theme = load_theme(args.theme, args.theme_file) + theme = load_theme(args.theme if args.theme != "warm-academic" else frontmatter.get("theme", args.theme), args.theme_file) a = theme['accent'] accent_hex = f"#{int(a.red*255):02x}{int(a.green*255):02x}{int(a.blue*255):02x}" \ if hasattr(a, 'red') else "#CC785C" config = { "title": title, - "subtitle": args.subtitle, - "author": args.author, - "date": args.date, - "version": args.version, - "watermark": args.watermark, + "subtitle": args.subtitle or frontmatter.get("subtitle", ""), + "author": args.author or frontmatter.get("author", ""), + "date": args.date or frontmatter.get("date", str(date.today())), + "version": args.version or frontmatter.get("version", ""), + "watermark": args.watermark or frontmatter.get("watermark", ""), "theme": theme, "accent_hex": accent_hex, - "cover": args.cover, - "toc": args.toc, - "page_size": A4 if args.page_size == "A4" else LETTER, - "frontispiece": args.frontispiece, - "banner": args.banner, - "header_title": args.header_title, - "footer_left": args.footer_left or args.author, - "stats_line": args.stats_line, - "stats_line2": args.stats_line2, - "edition_line": args.edition_line, - "disclaimer": args.disclaimer, - "copyright": args.copyright, - "code_max_lines": args.code_max_lines, + "cover": args.cover if args.cover is not True else (frontmatter.get("cover", "true").lower() != "false"), + "toc": args.toc if args.toc is not True else (frontmatter.get("toc", "true").lower() != "false"), + "page_size": A4 if (args.page_size == "A4" and frontmatter.get("page-size", "A4") == "A4") else LETTER, + "frontispiece": args.frontispiece or frontmatter.get("frontispiece", ""), + "banner": args.banner or frontmatter.get("banner", ""), + "header_title": args.header_title or frontmatter.get("header-title", ""), + "footer_left": args.footer_left or frontmatter.get("footer-left", "") or args.author or frontmatter.get("author", ""), + "stats_line": args.stats_line or frontmatter.get("stats-line", ""), + "stats_line2": args.stats_line2 or frontmatter.get("stats-line2", ""), + "edition_line": args.edition_line or frontmatter.get("edition-line", ""), + "disclaimer": args.disclaimer or frontmatter.get("disclaimer", ""), + "copyright": args.copyright or frontmatter.get("copyright", ""), + "code_max_lines": args.code_max_lines if args.code_max_lines != 30 else int(frontmatter.get("code-max-lines", 30)), } builder = PDFBuilder(config) From 68cba2921ad3075f773b4ab0d7338c9332c47f79 Mon Sep 17 00:00:00 2001 From: FightingLee97 Date: Fri, 17 Apr 2026 13:28:10 +0800 Subject: [PATCH 2/2] docs: update SKILL.md with frontmatter-first workflow and uv support - Rewrite Pre-Conversion section into 3-step workflow: inspect MD, ask options, write frontmatter directly into the file - Promote YAML frontmatter as the primary config method over CLI args - Add uv-first invocation in Quick Start and Dependencies sections - Add Frontmatter Support section with full key reference table - Bump version to 1.1.0; replace example title placeholder Co-Authored-By: Claude Sonnet 4 --- CHANGES.md | 148 +++++++++++++++++++++++++++++++++ lovstudio-any2pdf/SKILL.md | 166 +++++++++++++++++++++++++------------ 2 files changed, 263 insertions(+), 51 deletions(-) create mode 100644 CHANGES.md diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..32a06f4 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,148 @@ +# Changes — fix/frontmatter-and-pdf-layout + +## Summary + +Five fixes to `lovstudio-any2pdf/scripts/md2pdf.py` addressing blank pages, +header title mismatch, code block alignment, and a new frontmatter feature +that makes markdown files self-contained. + +--- + +## 1. YAML Frontmatter Support (new feature) + +**Problem:** All document metadata (title, author, theme, etc.) had to be +passed as CLI arguments. The markdown file itself carried no metadata, making +it hard to reproduce a build without remembering the exact command. + +**Fix:** The script now parses a YAML-style frontmatter block at the top of +the markdown file: + +```markdown +--- +title: My Document +subtitle: Version 1.0 · Platform: Linux +author: Acme Corp +footer-left: Acme Corp +copyright: © Acme Corp +theme: ieee-journal +watermark: DRAFT +--- + +## Chapter 1 +... +``` + +All CLI parameters are supported as frontmatter keys (using the same names as +the `--` flags, e.g. `footer-left`, `code-max-lines`). **CLI arguments always +take precedence** over frontmatter values, so existing workflows are unaffected. + +With frontmatter, the minimal invocation becomes: + +```bash +python md2pdf.py --input report.md --output report.pdf +``` + +--- + +## 2. Blank Page After TOC (bug fix) + +**Problem:** When a markdown file contains a `# Title` heading and a cover +page is generated via `--title` (or frontmatter), the H1 heading produces a +full chapter-divider page (PageBreak + large Spacer + title Paragraph + +decorations). This page appeared between the TOC and the first `##` chapter, +creating a blank-looking page. + +**Fix:** When a cover title is provided, strip all visual elements generated +by the H1 heading (Spacer, title Paragraph, decoration flowables, and the +following H2's leading PageBreak). The H1 `ChapterMark` flowable is **kept** +so TOC anchor links remain valid. + +The stripping only triggers when: +- `cover=True` (default), AND +- The first `ChapterMark` in `story_content` is level 0 (H1) + +Documents without a `# Title` heading are unaffected. + +--- + +## 3. Header Title One-Page Lag (bug fix) + +**Problem:** The right side of the page header displayed the current chapter +name via `_cur_chapter[0]`. Because reportlab's `onPage` callback fires +*before* the page's flowables are rendered, `_cur_chapter[0]` always held the +*previous* page's chapter — causing a one-page lag (e.g. page 3 showed +"Chapter 1" while its content was already "Chapter 2"). + +**Affected locations:** +- `_draw_page_decoration()` — `top-band` style (used by `ieee-journal`) +- `_normal_page()` — `full` header style + +**Fix:** Replace `_cur_chapter[0]` with the fixed document title +(`self.cfg.get("title", "")`). The header now consistently shows the document +title on every page, which is the correct behaviour for a technical manual or +report. Dynamic chapter tracking would require a two-pass build to resolve +correctly; the fixed title is the pragmatic solution. + +--- + +## 4. Code Block Mid-Line Space Collapsing (bug fix) + +**Problem:** `esc_code()` only converted *leading* spaces to ` `. Spaces +in the middle of a line (e.g. padded columns in ASCII diagrams, table output, +or aligned assignments) were left as regular HTML spaces and collapsed to a +single space by reportlab's Paragraph renderer. + +**Before:** +```python +stripped = e.lstrip(' ') +indent = len(e) - len(stripped) +out.append(' ' * indent + stripped) +``` + +**After:** +```python +out.append(e.replace(' ', ' ')) +``` + +This preserves both indentation and mid-line alignment for all code blocks. + +--- + +## 5. H2 Chapter Spacer Reduction (style fix) + +**Problem:** Each `##` heading inserted `Spacer(1, self.body_h * 0.30)` — +30% of the page height (~74 mm on A4). This is appropriate for book-style +chapter openers but creates excessive whitespace in technical manuals and +reports where chapters are short and numerous. + +**Fix:** Changed to a fixed `Spacer(1, 8*mm)`, which provides a clean visual +break without wasting half a page. + +--- + +## Usage Example + +```markdown +--- +title: xxx manual +subtitle: Version 0.1.0 · Platform: Linux +author: Acme Biotech Ltd. +footer-left: Acme Biotech Ltd. +copyright: © Acme Biotech Ltd. +theme: ieee-journal +--- + +## 1. Overview +... +``` + +```bash +# With uv (recommended — no pip install needed, isolated env) +uv run --with reportlab /path/to/md2pdf.py --input report.md --output report.pdf + +# With pip +python md2pdf.py --input report.md --output report.pdf + +# CLI args override frontmatter +python md2pdf.py --input manual.md --output manual.pdf --theme warm-academic +``` diff --git a/lovstudio-any2pdf/SKILL.md b/lovstudio-any2pdf/SKILL.md index 8e44345..9a700fc 100644 --- a/lovstudio-any2pdf/SKILL.md +++ b/lovstudio-any2pdf/SKILL.md @@ -18,7 +18,7 @@ compatibility: > Linux: uses Carlito, Liberation Serif, Droid Sans Fallback, DejaVu Sans Mono. metadata: author: lovstudio - version: "1.0.0" + version: "1.1.0" tags: markdown pdf cjk reportlab typesetting --- @@ -39,25 +39,48 @@ get wrong. ## Quick Start +The recommended approach is to embed all metadata in the markdown file via YAML frontmatter, then run with a minimal command: + +```markdown +--- +title: My Report +author: Author Name +theme: warm-academic +--- + +# My Report +... +``` + ```bash -python md2pdf/scripts/md2pdf.py \ - --input report.md \ - --output report.pdf \ - --title "My Report" \ - --author "Author Name" \ - --theme warm-academic +# Preferred: uv (isolated, no side effects on project env) +uv run --with reportlab lovstudio-any2pdf/scripts/md2pdf.py \ + --input report.md --output report.pdf + +# Fallback: pip +pip install reportlab --break-system-packages +python lovstudio-any2pdf/scripts/md2pdf.py --input report.md --output report.pdf ``` All parameters except `--input` are optional — sensible defaults are applied. -## Pre-Conversion Options (MANDATORY) +## Pre-Conversion Workflow (MANDATORY) + +### Step 1 — Read and Inspect the Markdown File -**IMPORTANT: You MUST use the `AskUserQuestion` tool to ask these questions BEFORE -running the conversion. Do NOT list options as plain text — use the tool so the user -gets a proper interactive prompt. Ask all options in a SINGLE `AskUserQuestion` call.** +Before asking any questions, read the user's markdown file and check: -Use `AskUserQuestion` with the following template. The tone should be friendly and -concise — like a design assistant, not a config form: +- **Frontmatter**: Does it already have a `--- ... ---` block? If yes, note which keys are already set and skip asking for those. +- **Title**: Is there a `# H1` heading? If yes, it will be used as the document title automatically. +- **Structure**: Are headings well-formed (`##`, `###`)? Are there merged headings like `# Foo## Bar` on one line? (The preprocessor handles these, but worth noting.) +- **Content hints**: Does the content suggest a particular theme (e.g. academic paper → `classic-thesis`, Chinese report → `chinese-red`, code-heavy → `github-light`)? + +Report a brief summary to the user, e.g.: +> 已读取文档,共 8 个章节,检测到标题「xxx manual」,无 frontmatter。建议主题:`ieee-journal`(技术手册风格)。 + +### Step 2 — Ask Design Options + +**IMPORTANT: Use the `AskUserQuestion` tool for this step.** Ask all options in a SINGLE call. Skip any options already covered by existing frontmatter. ``` 开始转 PDF!先帮你确认几个选项 👇 @@ -92,21 +115,35 @@ concise — like a design assistant, not a config form: 直接说人话就行,不用记编号 😄 ``` -### Mapping User Choices to CLI Args +### Step 3 — Write Frontmatter into the Markdown File + +After collecting user choices, **edit the markdown file directly** to prepend a frontmatter block (or update the existing one). Do NOT pass options as CLI args — frontmatter keeps the document self-contained and reproducible. -| Choice | CLI arg | -|--------|---------| -| Design style a-j | `--theme` with value from table below | -| Frontispiece local | `--frontispiece ` | -| Frontispiece AI | Generate image first, then `--frontispiece /tmp/frontispiece.png` | -| Watermark text | `--watermark "文字"` | -| Back cover image | `--banner ` | -| Back cover text | `--disclaimer "声明"` and/or `--copyright "© 信息"` | +Example frontmatter to write: + +```markdown +--- +title: xxx manual +author: Acme Biotech Ltd. +footer-left: Acme Biotech Ltd. +theme: ieee-journal +watermark: DRAFT +frontispiece: /tmp/frontispiece.png +copyright: © Acme Biotech Ltd. +--- +``` + +Then run the minimal command: + +```bash +uv run --with reportlab /path/to/lovstudio-any2pdf/scripts/md2pdf.py \ + --input report.md --output report.pdf +``` ### Theme Name Mapping -| Choice | `--theme` value | Inspiration | -|--------|----------------|-------------| +| Choice | `theme` value | Inspiration | +|--------|--------------|-------------| | a) 暖学术 | `warm-academic` | Lovstudio design system | | b) 经典论文 | `classic-thesis` | LaTeX classicthesis | | c) Tufte | `tufte` | Edward Tufte's books | @@ -122,7 +159,7 @@ concise — like a design assistant, not a config form: If user chose AI generation: read the document title + first paragraphs, use an image generation tool to create a themed illustration matching the chosen design -style, show for approval, then pass via `--frontispiece /path/to/image.png` +style, show for approval, then add `frontispiece: /path/to/image.png` to frontmatter. ## Architecture @@ -134,7 +171,7 @@ Key components: 1. **Font system**: Palatino (Latin body), Songti SC (CJK body), Menlo (code) on macOS; auto-fallback on Linux 2. **CJK wrapper**: `_font_wrap()` wraps CJK character runs in `` tags for automatic font switching 3. **Mixed text renderer**: `_draw_mixed()` handles CJK/Latin mixed text on canvas (cover, headers, footers) -4. **Code block handler**: `esc_code()` preserves indentation and line breaks in reportlab Paragraphs +4. **Code block handler**: `esc_code()` preserves indentation, mid-line alignment, and line breaks in reportlab Paragraphs (all spaces → ` `) 5. **Smart table widths**: Proportional column widths based on content length, with 18mm minimum 6. **Bookmark system**: `ChapterMark` flowable creates PDF sidebar bookmarks + named anchors 7. **Heading preprocessor**: `_preprocess_md()` splits merged headings like `# Part## Chapter` into separate lines @@ -162,33 +199,52 @@ Default reportlab breaks lines only at spaces, causing ugly splits like "Claude\ `drawString()` / `drawCentredString()` with a Latin font can't render 年/月/日 etc. **Fix**: Use `_draw_mixed()` for ALL user-content canvas text (dates, stats, disclaimers). +## Frontmatter Support + +All parameters (except `--input`, `--output`, `--theme-file`) can be set directly in the markdown file via YAML frontmatter. CLI args always take precedence over frontmatter values. + +```markdown +--- +title: My Report +author: Jane Doe +date: 2026-04-17 +theme: nord-frost +cover: true +toc: true +watermark: DRAFT +--- + +# My Report +... +``` + ## Configuration Reference -| Argument | Default | Description | -|----------|---------|-------------| -| `--input` | (required) | Path to markdown file | -| `--output` | `output.pdf` | Output PDF path | -| `--title` | From first H1 | Document title for cover page | -| `--subtitle` | `""` | Subtitle text | -| `--author` | `""` | Author name | -| `--date` | Today | Date string | -| `--version` | `""` | Version string for cover | -| `--watermark` | `""` | Watermark text (empty = none) | -| `--theme` | `warm-academic` | Color theme name | -| `--theme-file` | `""` | Custom theme JSON file path | -| `--cover` | `true` | Generate cover page | -| `--toc` | `true` | Generate table of contents | -| `--page-size` | `A4` | Page size (A4 or Letter) | -| `--frontispiece` | `""` | Full-page image after cover | -| `--banner` | `""` | Back cover banner image | -| `--header-title` | `""` | Report title in page header | -| `--footer-left` | author | Brand/author in footer | -| `--stats-line` | `""` | Stats on cover | -| `--stats-line2` | `""` | Second stats line | -| `--edition-line` | `""` | Edition line at cover bottom | -| `--disclaimer` | `""` | Back cover disclaimer | -| `--copyright` | `""` | Back cover copyright | -| `--code-max-lines` | `30` | Max lines per code block | +| Argument | Frontmatter Key | Default | Description | +|----------|----------------|---------|-------------| +| `--input` | — | (required) | Path to markdown file | +| `--output` | — | `output.pdf` | Output PDF path | +| `--title` | `title` | From first H1 | Document title for cover page | +| `--subtitle` | `subtitle` | `""` | Subtitle text | +| `--author` | `author` | `""` | Author name | +| `--date` | `date` | Today | Date string | +| `--version` | `version` | `""` | Version string for cover | +| `--watermark` | `watermark` | `""` | Watermark text (empty = none) | +| `--theme` | `theme` | `warm-academic` | Color theme name | +| `--theme-file` | — | `""` | Custom theme JSON file path | +| `--cover` | `cover` | `true` | Generate cover page | +| `--toc` | `toc` | `true` | Generate table of contents | +| `--page-size` | `page-size` | `A4` | Page size (A4 or Letter) | +| `--frontispiece` | `frontispiece` | `""` | Full-page image after cover | +| `--banner` | `banner` | `""` | Back cover banner image | +| `--header-title` | `header-title` | `""` | Report title in page header | +| `--footer-left` | `footer-left` | author | Brand/author in footer | +| `--stats-line` | `stats-line` | `""` | Stats on cover | +| `--stats-line2` | `stats-line2` | `""` | Second stats line | +| `--edition-line` | `edition-line` | `""` | Edition line at cover bottom | +| `--disclaimer` | `disclaimer` | `""` | Back cover disclaimer | +| `--copyright` | `copyright` | `""` | Back cover copyright | +| `--code-max-lines` | `code-max-lines` | `30` | Max lines per code block | ## Themes @@ -199,6 +255,14 @@ Each theme defines: page background, ink color, accent color, faded text, border ## Dependencies +If `uv` is available, no installation is needed — it creates an isolated ephemeral environment on the fly: + +```bash +uv run --with reportlab /path/to/lovstudio-any2pdf/scripts/md2pdf.py --input report.md --output report.pdf +``` + +Otherwise, install with pip: + ```bash pip install reportlab --break-system-packages ```