From 5ecd444ce5ff03109b0a962d73f46dceb31ffff9 Mon Sep 17 00:00:00 2001 From: fangshuyu-768 Date: Mon, 20 Apr 2026 22:03:33 +0800 Subject: [PATCH] test(doc): harden markdown_fix pipeline with invariant tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 5 invariant-level tests on top of #469's transforms: - TestFixExportedMarkdownIdempotent — f(f(x)) == f(x) across rich fixtures (kitchen sink, CJK, nested containers). Protects the core round-trip promise from future transform interactions that rewrite their own output. - TestFixExportedMarkdownPreservesFencedCodeByteForByte — packs every pipeline-touching shape into a fence and asserts byte-identical output. Code samples must never be silently rewritten by a formatting pass. - TestFixExportedMarkdownPreservesCRLF — CRLF input preserves line endings AND still triggers transforms. Windows-authored markdown should not be silently LF-normalized. - TestFixExportedMarkdownTransformInteractions — composition regressions: nested-list + trailing-space bold, text→list transition, callout containing list with emphasis, heading vs paragraph bold. - TestNormalizeNestedListIndentationDocumentedSkips — locks in the deliberate no-op branches (odd-space indent, blank-line loose-list sibling, 4-space indented code block, parentless two-space) as an explicit spec so future heuristic tweaks surface in the test diff. All transforms, fixtures, and expectations are derived from the head of PR #469. No production code changes. --- shortcuts/doc/markdown_fix_hardening_test.go | 287 +++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 shortcuts/doc/markdown_fix_hardening_test.go diff --git a/shortcuts/doc/markdown_fix_hardening_test.go b/shortcuts/doc/markdown_fix_hardening_test.go new file mode 100644 index 000000000..36264f876 --- /dev/null +++ b/shortcuts/doc/markdown_fix_hardening_test.go @@ -0,0 +1,287 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package doc + +import ( + "strings" + "testing" +) + +// TestFixExportedMarkdownIdempotent asserts the core promise of the exported +// markdown pipeline: applying the fixes twice produces the same result as +// applying them once. Round-trip formatting relies on this invariant, so any +// transform that keeps rewriting its own output would break fetch → edit → +// update → fetch stability. +func TestFixExportedMarkdownIdempotent(t *testing.T) { + fixtures := map[string]string{ + "kitchen sink": strings.Join([]string{ + "# **Title**", + "paragraph one", + "paragraph two", + "**bold ** and * italic*", + "", + "> q1", + "> q2", + "", + "1. parent", + " 1. child", + " 1. grandchild", + "", + "", + "callout body line 1", + "callout body line 2", + "", + "", + "some text", + "---", + "", + "```go", + "// code content with markdown-like shapes must survive as-is", + "**foo **", + "* hello*", + " 1. nested", + "> q", + "---", + "```", + "", + }, "\n"), + + "cjk content": strings.Join([]string{ + "# **测试标题**", + "段落一", + "段落二", + "**有用性 ** and * 关键 *", + "", + "1. 父项", + " 1. 子项", + "", + }, "\n"), + + "nested containers": strings.Join([]string{ + "", + "line a", + "line b", + "", + "", + "", + "quoted 1", + "quoted 2", + "", + "", + }, "\n"), + } + + for name, fixture := range fixtures { + t.Run(name, func(t *testing.T) { + once := fixExportedMarkdown(fixture) + twice := fixExportedMarkdown(once) + if once != twice { + t.Errorf("fixExportedMarkdown is not idempotent for %q\nfirst pass:\n%s\nsecond pass:\n%s", + name, once, twice) + } + }) + } +} + +// TestFixExportedMarkdownPreservesFencedCodeByteForByte packs a fenced code +// block with content that every individual transform in the pipeline would +// normally rewrite, and asserts the fence content comes out byte-for-byte +// identical. This is the pipeline's strongest invariant — users' code samples +// must never be silently modified by a formatting pass. +func TestFixExportedMarkdownPreservesFencedCodeByteForByte(t *testing.T) { + // Every line below is something at least one transform would touch if it + // appeared outside a fence. None of it must change. + dangerous := strings.Join([]string{ + "**foo **", // fixBoldSpacing — trailing space bold + "* hello*", // fixBoldSpacing — leading space italic + "# **heading**", // fixBoldSpacing — redundant heading bold + "para1", // fixTopLevelSoftbreaks — adjacent paragraphs + "para2", + "> q1", // fixBlockquoteHardBreaks — blockquote pair + "> q2", + "some text", // fixSetextAmbiguity — text before --- + "---", + " 1. nested", // normalizeNestedListIndentation + ``, // fixCalloutEmoji — emoji alias + }, "\n") + + // Wrap the dangerous content in a triple-backtick fence and surround with + // content so the pipeline has adjacent regions to potentially touch. + input := "before\n\n```\n" + dangerous + "\n```\n\nafter\n" + + got := fixExportedMarkdown(input) + + // Extract the fence content from the output and compare to the input fence + // content byte-for-byte. + gotFence, ok := extractFirstFenceContent(got) + if !ok { + t.Fatalf("fixExportedMarkdown output lost its fenced code block:\n%s", got) + } + if gotFence != dangerous { + t.Errorf("fenced code content was modified\nwant (bytes): %q\ngot (bytes): %q", + dangerous, gotFence) + } +} + +// extractFirstFenceContent returns the inner text of the first triple-backtick +// fenced code block it finds, or ("", false) if none is present. +func extractFirstFenceContent(md string) (string, bool) { + const fence = "```" + open := strings.Index(md, fence) + if open < 0 { + return "", false + } + // Skip the fence marker and its info-string line. + rest := md[open+len(fence):] + lineEnd := strings.Index(rest, "\n") + if lineEnd < 0 { + return "", false + } + rest = rest[lineEnd+1:] + close := strings.Index(rest, "\n"+fence) + if close < 0 { + return "", false + } + return rest[:close], true +} + +// TestFixExportedMarkdownPreservesCRLF feeds CRLF-terminated markdown (Windows +// line endings) through the pipeline and asserts that line endings are +// preserved AND the emphasis/heading transforms still apply — neither +// silently-LF-normalized nor passed through unchanged. +func TestFixExportedMarkdownPreservesCRLF(t *testing.T) { + lf := "# **Title**\nparagraph one\nparagraph two\n**bold **\n" + crlf := strings.ReplaceAll(lf, "\n", "\r\n") + + got := fixExportedMarkdown(crlf) + + // Transforms must still fire: heading bold stripped, trailing-space bold trimmed. + if strings.Contains(got, "**Title**") { + t.Errorf("heading bold not stripped on CRLF input:\n%q", got) + } + if strings.Contains(got, "**bold **") { + t.Errorf("trailing-space bold not fixed on CRLF input:\n%q", got) + } + // CRLF line endings must survive — we don't want to silently normalize a + // Windows author's document to LF. + if !strings.Contains(got, "\r\n") { + t.Errorf("CRLF line endings were normalized away:\n%q", got) + } +} + +// TestFixExportedMarkdownTransformInteractions covers shapes where more than +// one transform fires on the same input. Each transform is individually tested +// elsewhere; these cases guard against composition regressions. +func TestFixExportedMarkdownTransformInteractions(t *testing.T) { + tests := []struct { + name string + input string + wantContains []string // substrings that must be present after fixes + wantAbsent []string // substrings that must be absent after fixes + }{ + { + name: "nested list item with trailing-space bold", + input: "1. parent\n 1. **child **\n", + wantContains: []string{ + "\t1.", // nested indent converted to tab + "**child**", // trailing space trimmed + }, + wantAbsent: []string{ + " 1.", // original two-space indent gone + "**child **", // original trailing space gone + }, + }, + { + name: "paragraph followed by list", + input: "paragraph\n- item a\n- item b\n", + wantContains: []string{ + "paragraph\n\n- item a", // blank line inserted at text-to-list transition + }, + wantAbsent: []string{ + "\n\n\n", // no triple newline + }, + }, + { + name: "callout containing list with emphasis", + input: "\n- **item **\n- another\n\n", + wantContains: []string{ + "**item**", // trailing-space bold fixed inside callout + }, + wantAbsent: []string{ + "**item **", + }, + }, + { + name: "heading followed by paragraph with bold", + input: "# **Title**\nbody **text **\n", + wantContains: []string{ + "# Title", // heading bold stripped + "body **text**", // paragraph bold trimmed, not stripped + }, + wantAbsent: []string{ + "# **Title**", + "body **text **", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := fixExportedMarkdown(tt.input) + for _, want := range tt.wantContains { + if !strings.Contains(got, want) { + t.Errorf("want substring %q not found in output:\n%s", want, got) + } + } + for _, unwanted := range tt.wantAbsent { + if strings.Contains(got, unwanted) { + t.Errorf("unwanted substring %q still present in output:\n%s", unwanted, got) + } + } + }) + } +} + +// TestNormalizeNestedListIndentationDocumentedSkips locks in the deliberate +// "do nothing" branches of normalizeNestedListIndentation. Each case below is +// a shape the function intentionally does not rewrite; if a future change to +// the heuristic flips one of these, we want the regression to be visible in +// the test diff rather than silently changing user documents. +func TestNormalizeNestedListIndentationDocumentedSkips(t *testing.T) { + tests := []struct { + name string + input string + // want is identical to input — we are asserting "no change". + }{ + { + name: "three-space indent (odd) under list item stays unchanged", + input: "1. parent\n 1. child", + }, + { + name: "five-space indent (odd) under list item stays unchanged", + input: "- parent\n - deep", + }, + { + name: "two-space indent without a parent list item stays unchanged", + input: "plain paragraph\n - not nested", + }, + { + name: "blank-line-separated loose-list sibling stays unchanged", + input: "1. a\n\n 1. b", + }, + { + name: "four-space indented code block under list item stays unchanged", + input: "- parent\n\n 1. code sample", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeNestedListIndentation(tt.input) + if got != tt.input { + t.Errorf("normalizeNestedListIndentation unexpectedly rewrote documented-skip input\ninput: %q\ngot: %q", tt.input, got) + } + }) + } +}