From aecaa6945aea5d040886d140ab661515f47caa93 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 31 Jul 2025 11:33:05 +0100 Subject: [PATCH 1/2] Document markdown tokenization --- src/wrap.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/wrap.rs b/src/wrap.rs index 7d331bd3..f812e540 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -186,14 +186,16 @@ fn tokenize_inline(text: &str) -> Vec { tokens } -/// Split the input string into [`Token`]s by analysing whitespace and -/// backtick delimiters. +/// Tokenize the input string by splitting on whitespace and backtick +/// delimiters. /// -/// The tokenizer groups consecutive whitespace into a single -/// [`Token::Text`] and recognises backtick sequences as inline code spans. -/// When a run of backticks is encountered the parser searches forward for an -/// identical delimiter, allowing nested backticks when the span uses a longer -/// fence. Unmatched delimiter sequences are treated as literal text. +/// Consecutive whitespace characters are emitted as a single [`Token::Text`]. +/// Runs of backticks denote code spans; the parser searches ahead for a +/// matching delimiter of the same length, allowing nested backticks when the +/// outer span uses more characters. If no matching delimiter is found, the +/// backticks are treated as literal text. The function employs a small +/// state machine to track fenced blocks, ensuring that lines within fences are +/// passed through unchanged. /// /// ```rust,ignore /// use mdtablefix::wrap::{Token, tokenize_markdown}; From d66ae8d137d532a657a4274491889d9701b39f3d Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 31 Jul 2025 18:42:00 +0100 Subject: [PATCH 2/2] Clarify markdown tokenization --- src/wrap.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/wrap.rs b/src/wrap.rs index f812e540..d203cd7e 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -186,16 +186,16 @@ fn tokenize_inline(text: &str) -> Vec { tokens } -/// Tokenize the input string by splitting on whitespace and backtick -/// delimiters. +/// Tokenize a Markdown snippet using backtick-delimited code spans. /// -/// Consecutive whitespace characters are emitted as a single [`Token::Text`]. -/// Runs of backticks denote code spans; the parser searches ahead for a -/// matching delimiter of the same length, allowing nested backticks when the -/// outer span uses more characters. If no matching delimiter is found, the -/// backticks are treated as literal text. The function employs a small -/// state machine to track fenced blocks, ensuring that lines within fences are -/// passed through unchanged. +/// The function scans the input line by line. Lines matching [`FENCE_RE`] +/// produce [`Token::Fence`] tokens and toggle fenced mode. Lines inside a +/// fence are yielded verbatim. Outside fenced regions the scanner searches for +/// backtick sequences. Text before a backtick becomes [`Token::Text`]. When a +/// matching sequence of equal length follows, the enclosed portion forms a +/// [`Token::Code`] span. If no closing sequence is found the backticks and the +/// remaining text are returned as [`Token::Text`]. Whitespace is preserved +/// exactly as it appears. /// /// ```rust,ignore /// use mdtablefix::wrap::{Token, tokenize_markdown};