From 1540b17e606cec75a4898cebf3d72ec28a204e63 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 15:31:42 +0100 Subject: [PATCH 1/2] Add cargo-binstall release metadata Declare the Linux GNU `cargo-binstall` package metadata in `Cargo.toml` and teach the release workflow to build matching `.tar.gz` archives for `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`. Document the new installation path in the README and release process notes so the published assets and the user-facing instructions stay aligned. --- .github/workflows/release.yml | 26 +++++++++++++++++++++----- Cargo.toml | 8 ++++++++ README.md | 7 +++++++ docs/release-process.md | 22 ++++++++++++++-------- 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index da399ade..71f950df 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,10 +18,12 @@ jobs: arch: x86_64 target: x86_64-unknown-linux-gnu ext: "" + cargo_binstall_archive: true - os: linux arch: aarch64 target: aarch64-unknown-linux-gnu ext: "" + cargo_binstall_archive: true # - os: windows # arch: x86_64 # target: x86_64-pc-windows-msvc @@ -42,6 +44,7 @@ jobs: arch: x86_64 target: x86_64-unknown-freebsd ext: "" + cargo_binstall_archive: false # - os: freebsd # arch: aarch64 # target: aarch64-unknown-freebsd @@ -104,11 +107,24 @@ jobs: run: cross +stable build --release --target ${{ matrix.target }} - name: Prepare artifact run: | - mkdir -p artifacts/${{ matrix.os }}-${{ matrix.arch }} - cp target/${{ matrix.target }}/release/${{ env.REPO_NAME }}${{ matrix.ext }} \ - artifacts/${{ matrix.os }}-${{ matrix.arch }}/${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }} - sha256sum artifacts/${{ matrix.os }}-${{ matrix.arch }}/${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }} > \ - artifacts/${{ matrix.os }}-${{ matrix.arch }}/${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }}.sha256 + set -euo pipefail + version="${GITHUB_REF_NAME#v}" + artifact_dir="artifacts/${{ matrix.os }}-${{ matrix.arch }}" + binary_path="target/${{ matrix.target }}/release/${{ env.REPO_NAME }}${{ matrix.ext }}" + binary_name="${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }}" + + mkdir -p "${artifact_dir}" + cp "${binary_path}" "${artifact_dir}/${binary_name}" + sha256sum "${artifact_dir}/${binary_name}" > \ + "${artifact_dir}/${binary_name}.sha256" + + if [ "${{ matrix.cargo_binstall_archive }}" = "true" ]; then + archive_name="${{ env.REPO_NAME }}-${version}-${{ matrix.target }}.tar.gz" + tar -C "target/${{ matrix.target }}/release" -czf \ + "${artifact_dir}/${archive_name}" "${{ env.REPO_NAME }}${{ matrix.ext }}" + sha256sum "${artifact_dir}/${archive_name}" > \ + "${artifact_dir}/${archive_name}.sha256" + fi - name: Upload release artifact uses: actions/upload-artifact@v4 with: diff --git a/Cargo.toml b/Cargo.toml index a27fc9bb..acb11865 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,10 +4,18 @@ version = "0.3.1" edition = "2024" rust-version = "1.89" license = "ISC" +repository = "https://github.com/leynos/mdtablefix" description = """ `mdtablefix` unb0rks and reflows Markdown tables so that each column has a uniform width. When \ the `--wrap` option is used, it also wraps paragraphs and list items to 80 columns.""" +[package.metadata.binstall] + +[package.metadata.binstall.overrides.'cfg(all(target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64"), target_env = "gnu"))'] +pkg-url = "{ repo }/releases/download/v{ version }/{ name }-{ version }-{ target }.tar.gz" +bin-dir = "{ bin }{ binary-ext }" +pkg-fmt = "tgz" + [dependencies] anyhow = "1" clap = { version = "4", features = ["derive"] } diff --git a/README.md b/README.md index 1f932fb5..23ad0f67 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,13 @@ Install via Cargo: cargo install mdtablefix ``` +On Linux `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`, install +the prebuilt release archive via `cargo-binstall`: + +```bash +cargo binstall mdtablefix +``` + Or clone the repository and build from source: ```bash diff --git a/docs/release-process.md b/docs/release-process.md index c0f137c3..4d4cfc17 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -1,7 +1,8 @@ # Release Process This project publishes prebuilt binaries for multiple operating systems and -architectures. +architectures. It also publishes `cargo-binstall` archives for the supported +Linux release targets. The project targets the stable Rust `1.89.0` toolchain, as specified in `rust-toolchain.toml`. @@ -14,10 +15,7 @@ The GitHub Actions workflow `.github/workflows/release.yml` builds and uploads binaries for: - Linux (x86_64 and aarch64) -- FreeBSD (x86_64 and aarch64) -- macOS (x86_64 and aarch64) -- Windows (x86_64 and aarch64) -- OpenBSD (x86_64 and aarch64) +- FreeBSD (x86_64) Releases start from tags named `v..`. The workflow checks that the tag's version, without the leading `v`, matches the `Cargo.toml` @@ -26,6 +24,12 @@ that the tag's version, without the leading `v`, matches the `Cargo.toml` Each binary is named using the pattern `mdtablefix--` with an `.exe` suffix on Windows. +For Linux `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`, the +workflow also produces `cargo-binstall` archives named +`mdtablefix--.tar.gz`. Each archive contains the `mdtablefix` +binary at the archive root, matching the `Cargo.toml` +`[package.metadata.binstall]` configuration. + Binaries are uploaded as soon as they are built, so they are available from the workflow run while other targets build. @@ -33,13 +37,15 @@ workflow run while other targets build. The `release.yml` workflow defines a matrix of operating system and architecture combinations. Each entry includes the target triple used by -`cross` and a filename extension for Windows. During the build job, `cross` -compiles a release binary for every matrix row. +`cross` and whether the target also needs a `cargo-binstall` archive. During +the build job, `cross` compiles a release binary for every matrix row. `cross` is installed from a specific git tag to avoid unexpected behaviour from its main branch. Each binary is placed in an `artifacts/-` directory using the naming pattern `mdtablefix--[.exe]`. An SHA-256 checksum is -written alongside each binary for download verification. +written alongside each binary for download verification. The Linux +`cargo-binstall` targets additionally produce +`mdtablefix--.tar.gz` plus a matching SHA-256 checksum. After every build completes, the artefact is uploaded so that the GitHub Actions interface provides it immediately. Once the matrix has finished, the From 3d51aed524df22ac6fce4d69ab1f5136af4c8e40 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Apr 2026 15:31:51 +0100 Subject: [PATCH 2/2] Refactor helpers to satisfy Clippy Break up the remaining high-nesting and high-arity helpers across wrapping, footnote, HTML, and list processing so `make lint` passes without suppressing the configured warnings. Reshape the affected tests and shared test helpers alongside the production refactors so the new helper boundaries remain covered while preserving existing behaviour. --- src/code_emphasis.rs | 166 ++++++++++++++++---------- src/fences.rs | 35 ++++-- src/footnotes/inline.rs | 37 +++--- src/footnotes/parsing.rs | 2 +- src/footnotes/renumber.rs | 216 ++++++++++++++++++++++------------ src/frontmatter.rs | 140 ++++++---------------- src/html.rs | 104 ++++++++-------- src/lists.rs | 62 ++++++---- src/process.rs | 126 ++++++++++---------- src/table.rs | 22 +--- src/wrap.rs | 181 +++++++++++++--------------- src/wrap/fence.rs | 13 +- src/wrap/inline.rs | 39 +++--- src/wrap/line_buffer.rs | 34 +++--- src/wrap/paragraph.rs | 153 ++++++++++++++---------- src/wrap/tests.rs | 23 +++- src/wrap/tokenize/mod.rs | 70 +++++++---- src/wrap/tokenize/scanning.rs | 44 +++---- tests/common/mod.rs | 52 ++++---- 19 files changed, 829 insertions(+), 690 deletions(-) diff --git a/src/code_emphasis.rs b/src/code_emphasis.rs index f64610f8..b5acd194 100644 --- a/src/code_emphasis.rs +++ b/src/code_emphasis.rs @@ -10,6 +10,8 @@ //! transformation should run before wrapping and footnote conversion so marker //! adjacency is evaluated on the raw input. +use std::{iter::Peekable, vec::IntoIter}; + use crate::{ textproc::process_text, wrap::{Token, tokenize_markdown}, @@ -49,6 +51,103 @@ fn push_code(code: &str, out: &mut String) { out.push_str(&fence); } +fn has_code_emphasis_adjacent(source: &str) -> bool { + source.contains("`*") || source.contains("`_") || source.contains("*`") || source.contains("_`") +} + +fn handle_text_token<'a>( + raw: &'a str, + next: Option<&Token<'a>>, + out: &mut String, + pending: &mut &'a str, +) { + if !next.is_some_and(|token| matches!(token, Token::Code { .. })) { + out.push_str(raw); + return; + } + + let (lead, body, trail) = split_marks(raw); + if body.is_empty() && trail.is_empty() { + *pending = lead; + return; + } + + out.push_str(lead); + out.push_str(body); + *pending = trail; +} + +fn try_fold_matching_emphasis<'a>( + tokens: &mut Peekable>>, + pending: &mut &'a str, + code: &str, + out: &mut String, +) -> bool { + let Some(Token::Text(next)) = tokens.peek() else { + return false; + }; + let (lead, mid, trail) = split_marks(next); + if *pending == lead && mid.is_empty() && trail.is_empty() { + out.push_str(pending); + push_code(code, out); + out.push_str(lead); + *pending = ""; + tokens.next(); + return true; + } + false +} + +fn consume_code_affixes<'a>( + tokens: &mut Peekable>>, + pending: &mut &'a str, +) -> (&'a str, &'a str, bool) { + let mut prefix = std::mem::take(pending); + let mut suffix = ""; + let mut modified = !prefix.is_empty(); + + let Some(Token::Text(next)) = tokens.peek_mut() else { + return (prefix, suffix, modified); + }; + + let (lead, mid, _) = split_marks(next); + if lead.is_empty() { + return (prefix, suffix, modified); + } + + modified = true; + if prefix.is_empty() { + prefix = lead; + } else if mid.is_empty() { + suffix = lead; + } else { + prefix = ""; + } + *next = &next[lead.len()..]; + (prefix, suffix, modified) +} + +fn handle_code_token<'a>( + tokens: &mut Peekable>>, + code_token: (&'a str, &'a str), + out: &mut String, + pending: &mut &'a str, +) { + let (raw, code) = code_token; + if !pending.is_empty() && try_fold_matching_emphasis(tokens, pending, code, out) { + return; + } + + let (prefix, suffix, modified) = consume_code_affixes(tokens, pending); + out.push_str(prefix); + if modified { + push_code(code, out); + } else { + out.push_str(raw); + } + out.push_str(suffix); +} + /// Merge contiguous code and emphasis spans. /// /// Groups of emphasis markers and inline code with no separating spaces are @@ -75,11 +174,7 @@ pub fn fix_code_emphasis(lines: &[String]) -> Vec { return vec![String::new(); lines.len()]; } let source = lines.join("\n"); - if !source.contains("`*") - && !source.contains("`_") - && !source.contains("*`") - && !source.contains("_`") - { + if !has_code_emphasis_adjacent(&source) { return lines.to_vec(); } let mut tokens = tokenize_markdown(&source).into_iter().peekable(); @@ -87,66 +182,9 @@ pub fn fix_code_emphasis(lines: &[String]) -> Vec { let mut pending = ""; while let Some(token) = tokens.next() { match token { - Token::Text(raw) => { - if tokens - .peek() - .is_some_and(|t| matches!(t, Token::Code { .. })) - { - let (lead, body, trail) = split_marks(raw); - if body.is_empty() && trail.is_empty() { - pending = lead; - } else { - out.push_str(lead); - out.push_str(body); - pending = trail; - } - } else { - out.push_str(raw); - } - } + Token::Text(raw) => handle_text_token(raw, tokens.peek(), &mut out, &mut pending), Token::Code { raw, code, .. } => { - if !pending.is_empty() - && let Some(Token::Text(next)) = tokens.peek() - { - let (lead, mid, trail) = split_marks(next); - if mid.is_empty() && trail.is_empty() && lead == pending { - out.push_str(pending); - push_code(code, &mut out); - out.push_str(lead); - pending = ""; - tokens.next(); - continue; - } - } - let mut prefix = pending; - let mut suffix = ""; - let mut modified = !pending.is_empty(); - pending = ""; - if let Some(Token::Text(next)) = tokens.peek_mut() { - let (lead, mid, _) = split_marks(next); - if !lead.is_empty() { - modified = true; - if prefix.is_empty() { - prefix = lead; - } else if mid.is_empty() { - suffix = lead; - } else { - prefix = ""; - } - *next = &next[lead.len()..]; - } - } - if !prefix.is_empty() { - out.push_str(prefix); - } - if modified { - push_code(code, &mut out); - } else { - out.push_str(raw); - } - if !suffix.is_empty() { - out.push_str(suffix); - } + handle_code_token(&mut tokens, (raw, code), &mut out, &mut pending); } Token::Fence(f) => out.push_str(f), Token::Newline => out.push('\n'), diff --git a/src/fences.rs b/src/fences.rs index 26176023..0644120f 100644 --- a/src/fences.rs +++ b/src/fences.rs @@ -120,6 +120,24 @@ fn attach_specifier_to_fence(fence_line: &str, specifier: &str, spec_indent: &st format!("{final_indent}```{specifier}") } +fn orphan_specifier_target(lines: &[String], start: usize) -> Option { + let mut index = start; + while index < lines.len() && lines[index].trim().is_empty() { + index += 1; + } + if index >= lines.len() || FENCE_RE.captures(&lines[index]).is_none() { + return None; + } + Some(index) +} + +fn orphan_specifier_target_without_language(lines: &[String], start: usize) -> Option { + let target = orphan_specifier_target(lines, start)?; + let cap = FENCE_RE.captures(&lines[target])?; + let lang = cap.get(3).map_or("", |m| m.as_str()); + is_null_lang(lang).then_some(target) +} + /// Attach orphaned language specifiers to opening fences. /// /// After compressing fences, a language may appear on its own line directly @@ -156,19 +174,10 @@ pub fn attach_orphan_specifiers(lines: &[String]) -> Vec { let (spec, indent) = normalize_specifier(line); if ORPHAN_LANG_RE.is_match(&spec) && out.last().is_none_or(|l: &String| l.trim().is_empty()) { - let mut j = i + 1; - while j < lines.len() && lines[j].trim().is_empty() { - j += 1; - } - if j < lines.len() - && let Some(cap) = FENCE_RE.captures(&lines[j]) - { - let lang = cap.get(3).map_or("", |m| m.as_str()); - if is_null_lang(lang) { - out.push(attach_specifier_to_fence(&lines[j], &spec, &indent)); - i = j + 1; - continue; - } + if let Some(target) = orphan_specifier_target_without_language(lines, i + 1) { + out.push(attach_specifier_to_fence(&lines[target], &spec, &indent)); + i = target + 1; + continue; } out.push(line.clone()); i += 1; diff --git a/src/footnotes/inline.rs b/src/footnotes/inline.rs index 82744eb0..2cd75e35 100644 --- a/src/footnotes/inline.rs +++ b/src/footnotes/inline.rs @@ -28,28 +28,37 @@ static ATX_HEADING_RE: LazyLock = lazy_regex!( "atx heading prefix", ); +#[derive(Clone, Copy)] +struct InlineFootnote<'a> { + pre: &'a str, + punc: &'a str, + style: &'a str, + num: &'a str, + boundary: &'a str, +} + #[inline] -fn capture_parts<'a>(caps: &'a Captures<'a>) -> (&'a str, &'a str, &'a str, &'a str, &'a str) { - ( - &caps["pre"], - &caps["punc"], - &caps["style"], - &caps["num"], - &caps["boundary"], - ) +fn capture_parts<'a>(caps: &'a Captures<'a>) -> InlineFootnote<'a> { + InlineFootnote { + pre: &caps["pre"], + punc: &caps["punc"], + style: &caps["style"], + num: &caps["num"], + boundary: &caps["boundary"], + } } #[inline] -fn build_footnote(pre: &str, punc: &str, style: &str, num: &str, boundary: &str) -> String { - format!("{pre}{punc}{style}[^{num}]{boundary}") +fn build_footnote(parts: InlineFootnote<'_>) -> String { + format!( + "{}{}{}[^{}]{}", + parts.pre, parts.punc, parts.style, parts.num, parts.boundary + ) } /// Convert inline numeric references into Markdown footnote syntax. pub(super) fn convert_inline(text: &str) -> String { - let out = INLINE_FN_RE.replace_all(text, |caps: &Captures| { - let (pre, punc, style, num, boundary) = capture_parts(caps); - build_footnote(pre, punc, style, num, boundary) - }); + let out = INLINE_FN_RE.replace_all(text, |caps: &Captures| build_footnote(capture_parts(caps))); COLON_FN_RE .replace_all(&out, |caps: &Captures| { let pre = &caps["pre"]; diff --git a/src/footnotes/parsing.rs b/src/footnotes/parsing.rs index 109379df..88a2007d 100644 --- a/src/footnotes/parsing.rs +++ b/src/footnotes/parsing.rs @@ -14,7 +14,7 @@ pub(super) static DEF_RE: LazyLock = lazy_regex!( "footnote definition pattern should compile", ); -#[derive(Clone)] +#[derive(Clone, Copy)] pub(super) struct DefinitionParts<'a> { pub(super) prefix: &'a str, pub(super) number: usize, diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index 9d7655b6..203c4d59 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -99,27 +99,36 @@ fn collect_reference_mapping(lines: &[String]) -> HashMap { } for token in tokenize_markdown(line) { if let Token::Text(text) = token { - for caps in FOOTNOTE_REF_RE.captures_iter(text) { - let Some(mat) = caps.get(0) else { - continue; - }; - if is_definition_like(text, &mat) { - continue; - } - if let Ok(number) = caps["num"].parse::() { - if mapping.contains_key(&number) { - continue; - } - mapping.insert(number, next); - next += 1; - } - } + collect_reference_mapping_from_text(text, &mut mapping, &mut next); } } } mapping } +fn collect_reference_mapping_from_text( + text: &str, + mapping: &mut HashMap, + next: &mut usize, +) { + for caps in FOOTNOTE_REF_RE.captures_iter(text) { + let Some(mat) = caps.get(0) else { + continue; + }; + if is_definition_like(text, &mat) { + continue; + } + let Ok(number) = caps["num"].parse::() else { + continue; + }; + if mapping.contains_key(&number) { + continue; + } + mapping.insert(number, *next); + *next += 1; + } +} + #[derive(Clone)] struct DefinitionLine { index: usize, @@ -270,6 +279,18 @@ struct DefinitionUpdates { is_definition_line: Vec, } +struct DefinitionScanContext<'a> { + mapping: &'a mut HashMap, + next_number: &'a mut usize, + numeric_list_range: Option<(usize, usize)>, + skip_numeric_conversion: bool, +} + +struct DefinitionAccumulator { + definitions: Vec, + is_definition_line: Vec, +} + fn assign_new_number( mapping: &mut HashMap, number: usize, @@ -296,22 +317,58 @@ fn should_convert_numeric_line( numeric_range.is_some_and(|(start, end)| index >= start && index < end) } -fn collect_definition_updates( - lines: &[String], +fn definition_line_from_parts( + index: usize, + parts: super::parsing::DefinitionParts<'_>, mapping: &mut HashMap, -) -> DefinitionUpdates { - let mut next_number = mapping.values().copied().max().unwrap_or(0) + 1; - let mut definitions = Vec::new(); - let mut is_definition_line = vec![false; lines.len()]; - let mut numeric_candidates: Vec = Vec::new(); - let numeric_list_range = footnote_block_range(lines); - let skip_numeric_conversion = numeric_list_range - .as_ref() - .is_some_and(|(start, _)| has_existing_footnote_block(lines, *start)); + next_number: &mut usize, +) -> DefinitionLine { + let new_number = assign_new_number(mapping, parts.number, next_number); + let rewritten_rest = rewrite_tokens(parts.rest, mapping); + let mut line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); + line.push_str(parts.prefix); + write!(&mut line, "[^{new_number}]:").expect("write to string cannot fail"); + line.push_str(&rewritten_rest); + DefinitionLine { + index, + new_number, + line, + } +} + +fn numeric_candidate_from_line(line: &str, index: usize) -> Option { + let caps = FOOTNOTE_LINE_RE.captures(line)?; + let number = caps["num"].parse::().ok()?; + let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); + let rest = caps.name("rest").map_or("", |m| m.as_str()).to_string(); + let num_match = caps + .name("num") + .expect("numeric list capture missing number"); + let rest_match = caps + .name("rest") + .expect("numeric list capture missing rest"); + let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); + Some(NumericCandidate { + index, + number, + indent, + whitespace, + rest, + }) +} +fn collect_scan_updates( + lines: &[String], + ctx: &mut DefinitionScanContext<'_>, +) -> (DefinitionAccumulator, Vec) { + let mut acc = DefinitionAccumulator { + definitions: Vec::new(), + is_definition_line: vec![false; lines.len()], + }; + let mut numeric_candidates = Vec::new(); let mut in_fence = false; - for (idx, line) in lines.iter().enumerate() { + for (index, line) in lines.iter().enumerate() { if is_fence_line(line) { in_fence = !in_fence; continue; @@ -321,67 +378,76 @@ fn collect_definition_updates( } if let Some(parts) = parse_definition(line) { - let new_number = assign_new_number(mapping, parts.number, &mut next_number); - let rewritten_rest = rewrite_tokens(parts.rest, mapping); - let mut new_line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); - new_line.push_str(parts.prefix); - write!(&mut new_line, "[^{new_number}]:").expect("write to string cannot fail"); - new_line.push_str(&rewritten_rest); - definitions.push(DefinitionLine { - index: idx, - new_number, - line: new_line, - }); - is_definition_line[idx] = true; - } else if should_convert_numeric_line(idx, numeric_list_range, skip_numeric_conversion) - && let Some(caps) = FOOTNOTE_LINE_RE.captures(line) + acc.definitions.push(definition_line_from_parts( + index, + parts, + ctx.mapping, + ctx.next_number, + )); + acc.is_definition_line[index] = true; + continue; + } + + if !should_convert_numeric_line(index, ctx.numeric_list_range, ctx.skip_numeric_conversion) { - if mapping.is_empty() && definitions.is_empty() { - continue; - } - let Ok(number) = caps["num"].parse::() else { - continue; - }; - let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); - let rest = caps.name("rest").map_or("", |m| m.as_str()).to_string(); - let num_match = caps - .name("num") - .expect("numeric list capture missing number"); - let rest_match = caps - .name("rest") - .expect("numeric list capture missing rest"); - let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); - numeric_candidates.push(NumericCandidate { - index: idx, - number, - indent, - whitespace, - rest, - }); + continue; + } + if ctx.mapping.is_empty() && acc.definitions.is_empty() { + continue; + } + if let Some(candidate) = numeric_candidate_from_line(line, index) { + numeric_candidates.push(candidate); } } + (acc, numeric_candidates) +} + +fn finalize_numeric_candidates( + numeric_candidates: Vec, + ctx: &mut DefinitionScanContext<'_>, + acc: &mut DefinitionAccumulator, +) { for candidate in numeric_candidates.into_iter().rev() { - let new_number = assign_new_number(mapping, candidate.number, &mut next_number); - let rewritten_rest = rewrite_tokens(&candidate.rest, mapping); - let mut new_line = String::with_capacity( + let new_number = assign_new_number(ctx.mapping, candidate.number, ctx.next_number); + let rewritten_rest = rewrite_tokens(&candidate.rest, ctx.mapping); + let mut line = String::with_capacity( candidate.indent.len() + candidate.whitespace.len() + rewritten_rest.len() + 8, ); - new_line.push_str(&candidate.indent); - write!(&mut new_line, "[^{new_number}]:").expect("write to string cannot fail"); - new_line.push_str(&candidate.whitespace); - new_line.push_str(&rewritten_rest); - definitions.push(DefinitionLine { + line.push_str(&candidate.indent); + write!(&mut line, "[^{new_number}]:").expect("write to string cannot fail"); + line.push_str(&candidate.whitespace); + line.push_str(&rewritten_rest); + acc.definitions.push(DefinitionLine { index: candidate.index, new_number, - line: new_line, + line, }); - is_definition_line[candidate.index] = true; + acc.is_definition_line[candidate.index] = true; } +} + +fn collect_definition_updates( + lines: &[String], + mapping: &mut HashMap, +) -> DefinitionUpdates { + let mut next_number = mapping.values().copied().max().unwrap_or(0) + 1; + let numeric_list_range = footnote_block_range(lines); + let skip_numeric_conversion = numeric_list_range + .as_ref() + .is_some_and(|(start, _)| has_existing_footnote_block(lines, *start)); + let mut ctx = DefinitionScanContext { + mapping, + next_number: &mut next_number, + numeric_list_range, + skip_numeric_conversion, + }; + let (mut acc, numeric_candidates) = collect_scan_updates(lines, &mut ctx); + finalize_numeric_candidates(numeric_candidates, &mut ctx, &mut acc); DefinitionUpdates { - definitions, - is_definition_line, + definitions: acc.definitions, + is_definition_line: acc.is_definition_line, } } diff --git a/src/frontmatter.rs b/src/frontmatter.rs index 12cb5c87..9c316cc0 100644 --- a/src/frontmatter.rs +++ b/src/frontmatter.rs @@ -69,49 +69,35 @@ mod tests { /// Helper to convert `&[&str]` → `Vec`. fn s(v: &[&str]) -> Vec { v.iter().copied().map(str::to_string).collect() } + struct PrefixEmptyCase { + lines: Vec, + body_is_empty: bool, + check_body_equality: bool, + } + + struct FrontmatterSplitCase { + lines: Vec, + prefix_len: usize, + body_len: usize, + prefix_spot_checks: Vec<(usize, &'static str)>, + body_spot_check: Option<&'static str>, + } + /// Cases where `prefix` is empty (no frontmatter detected). #[rstest] - #[case::empty_input_returns_empty_slices( - s(&[]), - true, // body_is_empty - false // check_body_equality - )] - #[case::no_frontmatter_returns_empty_prefix( - s(&["# Heading", "Some text"]), - false, - true // check body == input lines - )] - #[case::unmatched_opener_treated_as_body( - s(&["---", "Some text", "More text"]), - false, - false - )] - #[case::indented_opener_not_recognized( - s(&[" ---", "title: Example", " ---"]), - false, - false - )] - #[case::later_dash_block_not_frontmatter( - s(&["# Heading", "", "---", "Not frontmatter", "---"]), - false, - false - )] - #[case::indented_closer_not_recognized( - s(&["---", "title: Example", " --- ", "# Heading"]), - false, - false - )] - fn prefix_empty_cases( - #[case] lines: Vec, - #[case] body_is_empty: bool, - #[case] check_body_equality: bool, - ) { - let (prefix, body) = split_leading_yaml_frontmatter(&lines); + #[case::empty_input_returns_empty_slices(PrefixEmptyCase { lines: s(&[]), body_is_empty: true, check_body_equality: false })] + #[case::no_frontmatter_returns_empty_prefix(PrefixEmptyCase { lines: s(&["# Heading", "Some text"]), body_is_empty: false, check_body_equality: true })] + #[case::unmatched_opener_treated_as_body(PrefixEmptyCase { lines: s(&["---", "Some text", "More text"]), body_is_empty: false, check_body_equality: false })] + #[case::indented_opener_not_recognized(PrefixEmptyCase { lines: s(&[" ---", "title: Example", " ---"]), body_is_empty: false, check_body_equality: false })] + #[case::later_dash_block_not_frontmatter(PrefixEmptyCase { lines: s(&["# Heading", "", "---", "Not frontmatter", "---"]), body_is_empty: false, check_body_equality: false })] + #[case::indented_closer_not_recognized(PrefixEmptyCase { lines: s(&["---", "title: Example", " --- ", "# Heading"]), body_is_empty: false, check_body_equality: false })] + fn prefix_empty_cases(#[case] case: PrefixEmptyCase) { + let (prefix, body) = split_leading_yaml_frontmatter(&case.lines); assert!(prefix.is_empty()); - if body_is_empty { + if case.body_is_empty { assert!(body.is_empty()); - } else if check_body_equality { - assert_eq!(body, &lines); + } else if case.check_body_equality { + assert_eq!(body, &case.lines); } else { assert!(!body.is_empty()); } @@ -119,72 +105,20 @@ mod tests { /// Cases where frontmatter is detected (non-empty `prefix`). #[rstest] - #[case::detects_frontmatter_with_triple_dash_closer( - s(&["---", "title: Example", "author: Test", "---", "# Heading", "Body text"]), - 4, // prefix_len - 2, // body_len - Some((0, "---")), - Some((3, "---")), - Some("# Heading") - )] - #[case::detects_frontmatter_with_triple_dot_closer( - s(&["---", "title: Example", "...", "# Heading"]), - 3, - 1, - Some((2, "...")), - None, - Some("# Heading") - )] - #[case::frontmatter_with_empty_body( - s(&["---", "title: Example", "---"]), - 3, - 0, - None, - None, - None - )] - #[case::frontmatter_only_no_body( - s(&["---", "---"]), - 2, - 0, - Some((1, "---")), - None, - None - )] - #[case::trailing_whitespace_on_closer_is_trimmed( - s(&["---", "title: Example", "--- ", "# Heading"]), - 3, - 1, - None, - None, - None - )] - #[case::multiline_yaml_values_preserved( - s(&["---", "description: |", " This is a multi-line", " YAML value", "---", "# Content"]), - 5, - 1, - None, - None, - Some("# Content") - )] - fn frontmatter_split_cases( - #[case] lines: Vec, - #[case] prefix_len: usize, - #[case] body_len: usize, - #[case] prefix_spot_check: Option<(usize, &str)>, - #[case] prefix_spot_check_2: Option<(usize, &str)>, - #[case] body_spot_check: Option<&str>, - ) { - let (prefix, body) = split_leading_yaml_frontmatter(&lines); - assert_eq!(prefix.len(), prefix_len); - assert_eq!(body.len(), body_len); - if let Some((idx, expected)) = prefix_spot_check { - assert_eq!(prefix[idx], expected); - } - if let Some((idx, expected)) = prefix_spot_check_2 { + #[case::detects_frontmatter_with_triple_dash_closer(FrontmatterSplitCase { lines: s(&["---", "title: Example", "author: Test", "---", "# Heading", "Body text"]), prefix_len: 4, body_len: 2, prefix_spot_checks: vec![(0, "---"), (3, "---")], body_spot_check: Some("# Heading") })] + #[case::detects_frontmatter_with_triple_dot_closer(FrontmatterSplitCase { lines: s(&["---", "title: Example", "...", "# Heading"]), prefix_len: 3, body_len: 1, prefix_spot_checks: vec![(2, "...")], body_spot_check: Some("# Heading") })] + #[case::frontmatter_with_empty_body(FrontmatterSplitCase { lines: s(&["---", "title: Example", "---"]), prefix_len: 3, body_len: 0, prefix_spot_checks: vec![], body_spot_check: None })] + #[case::frontmatter_only_no_body(FrontmatterSplitCase { lines: s(&["---", "---"]), prefix_len: 2, body_len: 0, prefix_spot_checks: vec![(1, "---")], body_spot_check: None })] + #[case::trailing_whitespace_on_closer_is_trimmed(FrontmatterSplitCase { lines: s(&["---", "title: Example", "--- ", "# Heading"]), prefix_len: 3, body_len: 1, prefix_spot_checks: vec![], body_spot_check: None })] + #[case::multiline_yaml_values_preserved(FrontmatterSplitCase { lines: s(&["---", "description: |", " This is a multi-line", " YAML value", "---", "# Content"]), prefix_len: 5, body_len: 1, prefix_spot_checks: vec![], body_spot_check: Some("# Content") })] + fn frontmatter_split_cases(#[case] case: FrontmatterSplitCase) { + let (prefix, body) = split_leading_yaml_frontmatter(&case.lines); + assert_eq!(prefix.len(), case.prefix_len); + assert_eq!(body.len(), case.body_len); + for (idx, expected) in case.prefix_spot_checks { assert_eq!(prefix[idx], expected); } - if let Some(expected) = body_spot_check { + if let Some(expected) = case.body_spot_check { assert_eq!(body[0], expected); } } diff --git a/src/html.rs b/src/html.rs index a3179eb8..b8c6e326 100644 --- a/src/html.rs +++ b/src/html.rs @@ -52,15 +52,7 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { match &handle.data { NodeData::Text { contents } => { for ch in contents.borrow().chars() { - if ch.is_whitespace() { - *last_space = true; - } else { - if *last_space && !out.is_empty() { - out.push(' '); - } - out.push(ch); - *last_space = false; - } + push_collapsed_text_char(ch, out, last_space); } } NodeData::Element { name, .. } => { @@ -80,6 +72,18 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { } } +fn push_collapsed_text_char(ch: char, out: &mut String, last_space: &mut bool) { + if ch.is_whitespace() { + *last_space = true; + return; + } + if *last_space && !out.is_empty() { + out.push(' '); + } + out.push(ch); + *last_space = false; +} + /// Returns `true` if `handle` is an HTML element with the given tag name. fn is_element(handle: &Handle, tag: &str) -> bool { if let NodeData::Element { name, .. } = &handle.data { @@ -204,26 +208,43 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec { out } -/// Appends HTML table lines, tracking `` depth and converting them to Markdown when closed. -/// -/// Tracks the nesting depth of `
` tags, appending each line to the buffer. When all opened -/// tables are closed (depth reaches zero), converts the buffered HTML table lines to Markdown and -/// appends them to the output vector. Resets the buffer and updates the HTML state accordingly. -fn push_html_line( - line: &str, - buf: &mut Vec, - depth: &mut usize, - in_html: &mut bool, - out: &mut Vec, -) { +fn append_html_table_line(line: &str, buf: &mut Vec, depth: &mut usize) { buf.push(line.to_string()); *depth += TABLE_START_RE.find_iter(line).count(); if TABLE_END_RE.is_match(line) { *depth = depth.saturating_sub(TABLE_END_RE.find_iter(line).count()); - if *depth == 0 { - out.extend(html_table_to_markdown(buf)); - buf.clear(); - *in_html = false; + } +} + +fn flush_completed_html_table(buf: &mut Vec, depth: usize, out: &mut Vec) -> bool { + if depth != 0 { + return false; + } + out.extend(table_lines_to_markdown(buf)); + buf.clear(); + true +} + +#[derive(Default)] +struct HtmlTableState { + buf: Vec, + depth: usize, + in_html: bool, +} + +impl HtmlTableState { + fn flush_raw(&mut self, out: &mut Vec) { + if !self.buf.is_empty() { + out.append(&mut self.buf); + } + self.depth = 0; + self.in_html = false; + } + + fn push_html_line(&mut self, line: &str, out: &mut Vec) { + append_html_table_line(line, &mut self.buf, &mut self.depth); + if flush_completed_html_table(&mut self.buf, self.depth, out) { + self.in_html = false; } } } @@ -258,15 +279,8 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { for line in lines { if depth > 0 || TABLE_START_RE.is_match(line.trim_start()) { - buf.push(line.clone()); - depth += TABLE_START_RE.find_iter(line).count(); - if TABLE_END_RE.is_match(line) { - depth = depth.saturating_sub(TABLE_END_RE.find_iter(line).count()); - if depth == 0 { - out.extend(table_lines_to_markdown(&buf)); - buf.clear(); - } - } + append_html_table_line(line, &mut buf, &mut depth); + let _ = flush_completed_html_table(&mut buf, depth, &mut out); continue; } @@ -306,17 +320,13 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { /// ``` pub fn convert_html_tables(lines: &[String]) -> Vec { let mut out = Vec::new(); - let mut buf = Vec::new(); - let mut depth = 0usize; - let mut in_html = false; + let mut html_state = HtmlTableState::default(); let mut in_code = false; for line in lines { if is_fence(line).is_some() { - if in_html { - out.append(&mut buf); - in_html = false; - depth = 0; + if html_state.in_html { + html_state.flush_raw(&mut out); } in_code = !in_code; out.push(line.clone()); @@ -328,22 +338,22 @@ pub fn convert_html_tables(lines: &[String]) -> Vec { continue; } - if in_html { - push_html_line(line, &mut buf, &mut depth, &mut in_html, &mut out); + if html_state.in_html { + html_state.push_html_line(line, &mut out); continue; } if TABLE_START_RE.is_match(line.trim_start()) { - in_html = true; - push_html_line(line, &mut buf, &mut depth, &mut in_html, &mut out); + html_state.in_html = true; + html_state.push_html_line(line, &mut out); continue; } out.push(line.clone()); } - if !buf.is_empty() { - out.extend(buf); + if !html_state.buf.is_empty() { + out.extend(html_state.buf); } out diff --git a/src/lists.rs b/src/lists.rs index 1431a8a0..4fdd5748 100644 --- a/src/lists.rs +++ b/src/lists.rs @@ -61,21 +61,32 @@ fn is_plain_paragraph_line(line: &str) -> bool { ) } -fn handle_paragraph_restart( - indent: usize, - line: &str, - prev_blank: bool, - indent_stack: &mut Vec, - counters: &mut HashMap, -) -> bool { - let inclusive = prev_blank - && indent_stack - .last() - .is_some_and(|&d| indent <= d && is_plain_paragraph_line(line)); - if inclusive { - prune_deeper(indent, true, indent_stack, counters); +struct ListState { + indent_stack: Vec, + counters: HashMap, +} + +impl ListState { + fn prune_deeper(&mut self, indent: usize, inclusive: bool) { + prune_deeper( + indent, + inclusive, + &mut self.indent_stack, + &mut self.counters, + ); + } + + fn handle_paragraph_restart(&mut self, indent: usize, line: &str, prev_blank: bool) -> bool { + let inclusive = prev_blank + && self + .indent_stack + .last() + .is_some_and(|&depth| indent <= depth && is_plain_paragraph_line(line)); + if inclusive { + self.prune_deeper(indent, true); + } + inclusive } - inclusive } /// Renumber ordered Markdown list items across the given lines. @@ -86,8 +97,10 @@ fn handle_paragraph_restart( #[must_use] pub fn renumber_lists(lines: &[String]) -> Vec { let mut out = Vec::with_capacity(lines.len()); - let mut indent_stack: Vec = Vec::new(); - let mut counters: HashMap = HashMap::new(); + let mut state = ListState { + indent_stack: Vec::new(), + counters: HashMap::new(), + }; // Track fenced code blocks consistently across list processing. let mut fences = FenceTracker::default(); #[allow(clippy::unnecessary_map_or)] @@ -110,11 +123,11 @@ pub fn renumber_lists(lines: &[String]) -> Vec { continue; } if let Some((indent, indent_str, sep, rest)) = parse_numbered(line) { - prune_deeper(indent, false, &mut indent_stack, &mut counters); - if indent_stack.last().is_none_or(|&d| d < indent) { - indent_stack.push(indent); + state.prune_deeper(indent, false); + if state.indent_stack.last().is_none_or(|&d| d < indent) { + state.indent_stack.push(indent); } - let num = counters.entry(indent).or_insert(1); + let num = state.counters.entry(indent).or_insert(1); let current = *num; *num += 1; out.push(format!("{indent_str}{current}.{sep}{rest}")); @@ -128,16 +141,15 @@ pub fn renumber_lists(lines: &[String]) -> Vec { let indent_str = &line[..indent_end]; let indent = indent_len(indent_str); if HEADING_RE.is_match(line) || THEMATIC_BREAK_RE.is_match(line.trim_end()) { - indent_stack.clear(); - counters.clear(); + state.indent_stack.clear(); + state.counters.clear(); out.push(line.clone()); prev_blank = false; continue; } - let did_inclusive = - handle_paragraph_restart(indent, line, prev_blank, &mut indent_stack, &mut counters); + let did_inclusive = state.handle_paragraph_restart(indent, line, prev_blank); if !did_inclusive { - prune_deeper(indent, false, &mut indent_stack, &mut counters); + state.prune_deeper(indent, false); } out.push(line.clone()); prev_blank = false; diff --git a/src/process.rs b/src/process.rs index 1dd34a05..2664a1cd 100644 --- a/src/process.rs +++ b/src/process.rs @@ -53,72 +53,67 @@ pub struct Options { } /// Flushes buffered lines to `out`, formatting as a table when required. -fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec) { - if buf.is_empty() { - return; - } - if *in_table { - out.extend(reflow_table(buf)); - buf.clear(); - } else { - out.extend(std::mem::take(buf)); - } - *in_table = false; +struct ProcessBuffer { + out: Vec, + buf: Vec, + in_table: bool, } -/// Detects fence lines and toggles code mode, flushing buffered content. -fn handle_fence_line( - line: &str, - buf: &mut Vec, - in_table: &mut bool, - out: &mut Vec, - fences: &mut FenceTracker, -) -> bool { - if !fences.observe(line) { - return false; +impl ProcessBuffer { + fn flush(&mut self) { + if self.buf.is_empty() { + return; + } + if self.in_table { + self.out.extend(reflow_table(&self.buf)); + self.buf.clear(); + } else { + self.out.extend(std::mem::take(&mut self.buf)); + } + self.in_table = false; } - flush_buffer(buf, in_table, out); - out.push(line.to_string()); - true -} + fn push_verbatim(&mut self, line: &str) { + self.flush(); + self.out.push(line.to_string()); + } -/// Buffers table lines, returning `true` when a line was consumed. -fn handle_table_line( - line: &str, - buf: &mut Vec, - in_table: &mut bool, - out: &mut Vec, -) -> bool { - let trimmed = line.trim_start(); + fn handle_fence_line(&mut self, line: &str, fences: &mut FenceTracker) -> bool { + if !fences.observe(line) { + return false; + } - if trimmed.starts_with('|') { - *in_table = true; - buf.push(line.to_string()); - return true; + self.push_verbatim(line); + true } - if line.trim().is_empty() { - if *in_table { - flush_buffer(buf, in_table, out); + + fn handle_table_line(&mut self, line: &str) -> bool { + if line.trim_start().starts_with('|') { + self.in_table = true; + self.buf.push(line.to_string()); + return true; } - return false; - } - if *in_table && (line.contains('|') || crate::table::SEP_RE.is_match(line.trim())) { - buf.push(line.to_string()); - return true; - } - if *in_table { - if classify_block(line).is_some() { - // Flush when a new Markdown block (heading, list, quote, footnote, directive, - // or digit-prefixed text) begins so wrapping and table detection stay aligned. - flush_buffer(buf, in_table, out); + if line.trim().is_empty() { + if self.in_table { + self.flush(); + } return false; } - // Plain paragraphs also end the table so the caller can reprocess them for wrapping. - flush_buffer(buf, in_table, out); - return false; + if self.in_table && (line.contains('|') || crate::table::SEP_RE.is_match(line.trim())) { + self.buf.push(line.to_string()); + return true; + } + if self.in_table { + if classify_block(line).is_some() { + // Flush when a new Markdown block begins so wrapping and table + // detection stay aligned. + self.flush(); + return false; + } + self.flush(); + } + false } - false } /// Processes a stream of Markdown lines using the provided [`Options`]. @@ -157,32 +152,35 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { let pre = convert_html_tables(&lines); - let mut out = Vec::new(); - let mut buf = Vec::new(); + let mut state = ProcessBuffer { + out: Vec::new(), + buf: Vec::new(), + in_table: false, + }; // Track fences so subsequent logic respects shared semantics. let mut fence_tracker = FenceTracker::default(); - let mut in_table = false; for line in &pre { - if handle_fence_line(line, &mut buf, &mut in_table, &mut out, &mut fence_tracker) { + if state.handle_fence_line(line, &mut fence_tracker) { continue; } if fence_tracker.in_fence() { - out.push(line.clone()); + state.out.push(line.clone()); continue; } - if handle_table_line(line, &mut buf, &mut in_table, &mut out) { + if state.handle_table_line(line) { continue; } - flush_buffer(&mut buf, &mut in_table, &mut out); - out.push(line.clone()); + state.flush(); + state.out.push(line.clone()); } - flush_buffer(&mut buf, &mut in_table, &mut out); + state.flush(); + let mut out = state.out; if opts.headings { out = crate::headings::convert_setext_headings(&out); } diff --git a/src/table.rs b/src/table.rs index 7e0fc700..66d8c779 100644 --- a/src/table.rs +++ b/src/table.rs @@ -144,16 +144,10 @@ fn parse_and_validate(trimmed: &[String], sep_line: Option<&String>) -> Option

], - output_rows: &[Vec], - sep_cells: Option>, - max_cols: usize, - indent: &str, -) -> Vec { - let widths = crate::reflow::calculate_widths(cleaned, max_cols); - let out = crate::reflow::format_rows(output_rows, &widths, indent); - crate::reflow::insert_separator(out, sep_cells, &widths, indent) +fn calculate_and_format(parsed: &ParsedTable, indent: &str) -> Vec { + let widths = crate::reflow::calculate_widths(&parsed.cleaned, parsed.max_cols); + let out = crate::reflow::format_rows(&parsed.output_rows, &widths, indent); + crate::reflow::insert_separator(out, parsed.sep_cells.clone(), &widths, indent) } /// Reflow a Markdown table so columns align uniformly. @@ -188,13 +182,7 @@ pub fn reflow_table(lines: &[String]) -> Vec { return lines.to_vec(); }; - calculate_and_format( - &parsed.cleaned, - &parsed.output_rows, - parsed.sep_cells, - parsed.max_cols, - &indent, - ) + calculate_and_format(&parsed, &indent) } #[cfg(test)] diff --git a/src/wrap.rs b/src/wrap.rs index fbad449b..4499eb62 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -8,6 +8,8 @@ //! The [`Token`] enum and [`tokenize_markdown`] function are public so callers //! can perform custom token-based processing. +use std::borrow::Cow; + mod block; mod fence; mod inline; @@ -17,7 +19,7 @@ mod tokenize; use block::{BLOCKQUOTE_RE, BULLET_RE, FOOTNOTE_RE}; pub(crate) use block::{BlockKind, classify_block}; pub use fence::{FenceTracker, is_fence}; -use paragraph::{flush_paragraph, handle_prefix_line}; +use paragraph::{ParagraphState, ParagraphWriter, PrefixLine}; /// Token emitted by the `tokenize::segment_inline` parser and used by /// higher-level wrappers. /// @@ -46,6 +48,74 @@ fn is_indented_code_line(line: &str) -> bool { indent_width >= 4 && line.chars().any(|c| !c.is_whitespace()) } +fn is_table_or_separator(line: &str) -> bool { + line.trim_start().starts_with('|') || crate::table::SEP_RE.is_match(line.trim()) +} + +fn is_passthrough_block(line: &str) -> bool { + is_table_or_separator(line) + || matches!( + classify_block(line), + Some(BlockKind::Heading | BlockKind::MarkdownlintDirective) + ) + || line.trim().is_empty() + || is_indented_code_line(line) +} + +fn prefix_line(line: &str) -> Option> { + if let Some(cap) = BULLET_RE.captures(line) { + let prefix = cap.get(1).expect("bullet regex capture").as_str(); + let rest = cap.get(2).expect("bullet regex remainder capture").as_str(); + return Some(PrefixLine { + prefix: Cow::Borrowed(prefix), + rest, + repeat_prefix: false, + }); + } + + if let Some(cap) = FOOTNOTE_RE.captures(line) { + let prefix = cap.get(1).expect("footnote prefix capture").as_str(); + let marker = cap.get(2).expect("footnote marker capture").as_str(); + let rest = cap + .get(3) + .expect("footnote regex remainder capture") + .as_str(); + return Some(PrefixLine { + prefix: Cow::Owned(format!("{prefix}{marker}")), + rest, + repeat_prefix: false, + }); + } + + BLOCKQUOTE_RE.captures(line).map(|cap| PrefixLine { + prefix: Cow::Borrowed(cap.get(1).expect("blockquote prefix capture").as_str()), + rest: cap + .get(2) + .expect("blockquote regex remainder capture") + .as_str(), + repeat_prefix: true, + }) +} + +fn line_break_parts(line: &str) -> (String, bool) { + let trimmed_end = line.trim_end(); + let text_without_html_breaks = trimmed_end + .trim_end_matches("
") + .trim_end_matches("
") + .trim_end_matches("
"); + + let is_trailing_spaces = line.ends_with(" "); + let is_html_br = trimmed_end != text_without_html_breaks; + let backslash_count = trimmed_end.chars().rev().take_while(|&c| c == '\\').count(); + let is_backslash_escape = backslash_count % 2 == 1; + let hard_break = is_trailing_spaces || is_html_br || is_backslash_escape; + let text = text_without_html_breaks + .trim_start() + .trim_end_matches(' ') + .to_string(); + (text, hard_break) +} + /// Wrap text lines to the given width. /// /// # Panics @@ -53,122 +123,37 @@ fn is_indented_code_line(line: &str) -> bool { #[must_use] pub fn wrap_text(lines: &[String], width: usize) -> Vec { let mut out = Vec::new(); - let mut buf: Vec<(String, bool)> = Vec::new(); - let mut indent = String::new(); + let mut state = ParagraphState::default(); + let mut writer = ParagraphWriter::new(&mut out, width); // Track fenced code blocks so wrapping honours shared fence semantics. let mut fence_tracker = FenceTracker::default(); for line in lines { - if fence::handle_fence_line( - &mut out, - &mut buf, - &mut indent, - width, - line, - &mut fence_tracker, - ) { + if fence::handle_fence_line(line, &mut writer, &mut state, &mut fence_tracker) { continue; } if fence_tracker.in_fence() { - out.push(line.clone()); + writer.push_verbatim(&mut state, line); continue; } - if line.trim_start().starts_with('|') || crate::table::SEP_RE.is_match(line.trim()) { - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(line.clone()); + if is_passthrough_block(line) { + writer.push_verbatim(&mut state, line); continue; } - if matches!( - classify_block(line), - Some(BlockKind::Heading | BlockKind::MarkdownlintDirective) - ) { - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(line.clone()); + if let Some(prefix_line) = prefix_line(line) { + writer.handle_prefix_line(&mut state, &prefix_line); continue; } - if line.trim().is_empty() { - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(String::new()); - continue; - } - - if let Some(cap) = BULLET_RE.captures(line) { - let prefix = cap.get(1).expect("bullet regex capture").as_str(); - let rest = cap.get(2).expect("bullet regex remainder capture").as_str(); - handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, false); - continue; - } - - if let Some(cap) = FOOTNOTE_RE.captures(line) { - let prefix = format!("{}{}", &cap[1], &cap[2]); - let rest = cap - .get(3) - .expect("footnote regex remainder capture") - .as_str(); - handle_prefix_line(&mut out, &mut buf, &mut indent, width, &prefix, rest, false); - continue; - } - - if let Some(cap) = BLOCKQUOTE_RE.captures(line) { - let prefix = cap.get(1).expect("blockquote prefix capture").as_str(); - let rest = cap - .get(2) - .expect("blockquote regex remainder capture") - .as_str(); - handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, true); - continue; - } - - if is_indented_code_line(line) { - // Preserve indented code blocks verbatim so wrapping does not merge them into - // paragraphs. - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(line.clone()); - continue; - } - - if buf.is_empty() { - indent = line.chars().take_while(|c| c.is_whitespace()).collect(); - } - let trimmed_end = line.trim_end(); - let text_without_html_breaks = trimmed_end - .trim_end_matches("
") - .trim_end_matches("
") - .trim_end_matches("
"); - - let is_trailing_spaces = line.ends_with(" "); - let is_html_br = trimmed_end != text_without_html_breaks; - let backslash_count = line - .trim_end() - .chars() - .rev() - .take_while(|&c| c == '\\') - .count(); - let is_backslash_escape = backslash_count % 2 == 1; - - let hard_break = is_trailing_spaces || is_html_br || is_backslash_escape; - - let text = text_without_html_breaks - .trim_start() - .trim_end_matches(' ') - .to_string(); - - buf.push((text, hard_break)); + state.note_indent(line); + let (text, hard_break) = line_break_parts(line); + state.push(text, hard_break); } - flush_paragraph(&mut out, &buf, &indent, width); + writer.flush_paragraph(&mut state); out } diff --git a/src/wrap/fence.rs b/src/wrap/fence.rs index 19d543d7..1eaea4f4 100644 --- a/src/wrap/fence.rs +++ b/src/wrap/fence.rs @@ -2,6 +2,8 @@ use regex::Regex; +use super::paragraph::{ParagraphState, ParagraphWriter}; + pub(super) static FENCE_RE: std::sync::LazyLock = // Capture: indent, fence run of 3+ backticks/tilde, and the full info string (incl. leading // spaces) @@ -41,21 +43,16 @@ pub fn is_fence(line: &str) -> Option<(&str, &str, &str)> { /// /// Returns `true` if the line was processed as a fence. pub(crate) fn handle_fence_line( - out: &mut Vec, - buf: &mut Vec<(String, bool)>, - indent: &mut String, - width: usize, line: &str, + writer: &mut ParagraphWriter<'_>, + state: &mut ParagraphState, tracker: &mut FenceTracker, ) -> bool { if !tracker.observe(line) { return false; } - super::flush_paragraph(out, buf, indent, width); - buf.clear(); - indent.clear(); - out.push(line.to_string()); + writer.push_verbatim(state, line); true } diff --git a/src/wrap/inline.rs b/src/wrap/inline.rs index 78f261a2..8b8027fd 100644 --- a/src/wrap/inline.rs +++ b/src/wrap/inline.rs @@ -4,9 +4,14 @@ //! inline code, links, and trailing punctuation without reimplementing the //! grouping logic in multiple places. +use std::ops::Range; + use unicode_width::UnicodeWidthStr; -use super::{line_buffer::LineBuffer, tokenize}; +use super::{ + line_buffer::{LineBuffer, SplitContext}, + tokenize, +}; #[derive(Copy, Clone, PartialEq, Eq)] enum SpanKind { @@ -167,24 +172,23 @@ pub(super) fn attach_punctuation_to_previous_line( fn push_span_with_carry( buffer: &mut LineBuffer, tokens: &[String], - start: usize, - end: usize, + span: Range, carried_whitespace: &mut String, ) { - if start >= end { + if span.start >= span.end { return; } if carried_whitespace.is_empty() { - buffer.push_span(tokens, start, end); + buffer.push_span(tokens, span.start, span.end); return; } let mut first_token = std::mem::take(carried_whitespace); - first_token.push_str(tokens[start].as_str()); + first_token.push_str(tokens[span.start].as_str()); buffer.push_token(first_token.as_str()); - if start + 1 < end { - buffer.push_span(tokens, start + 1, end); + if span.start + 1 < span.end { + buffer.push_span(tokens, span.start + 1, span.end); } } @@ -201,12 +205,13 @@ pub(super) fn wrap_preserving_code(text: &str, width: usize) -> Vec { while i < tokens.len() { let (group_end, group_width) = determine_token_span(&tokens, i); - let span_is_whitespace = tokens[i..group_end] + let span = i..group_end; + let span_is_whitespace = tokens[span.clone()] .iter() .all(|tok| is_whitespace_token(tok)); if span_is_whitespace && !carried_whitespace.is_empty() && group_end != tokens.len() { - for tok in &tokens[i..group_end] { + for tok in &tokens[span.clone()] { carried_whitespace.push_str(tok); } i = group_end; @@ -220,31 +225,35 @@ pub(super) fn wrap_preserving_code(text: &str, width: usize) -> Vec { } if buffer.width() + group_width <= width { - push_span_with_carry(&mut buffer, &tokens, i, group_end, &mut carried_whitespace); + push_span_with_carry(&mut buffer, &tokens, span.clone(), &mut carried_whitespace); i = group_end; continue; } - if buffer.split_with_span(&mut lines, &tokens, i, group_end, width) { + let mut split = SplitContext { + lines: &mut lines, + width, + }; + if buffer.split_with_span(&mut split, &tokens, span.clone()) { i = group_end; continue; } - if buffer.flush_trailing_whitespace(&mut lines, &tokens, i, group_end) { + if buffer.flush_trailing_whitespace(&mut lines, &tokens, span.clone()) { i = group_end; continue; } buffer.flush_into(&mut lines); if span_is_whitespace { - for tok in &tokens[i..group_end] { + for tok in &tokens[span] { carried_whitespace.push_str(tok); } i = group_end; continue; } - push_span_with_carry(&mut buffer, &tokens, i, group_end, &mut carried_whitespace); + push_span_with_carry(&mut buffer, &tokens, i..group_end, &mut carried_whitespace); i = group_end; } diff --git a/src/wrap/line_buffer.rs b/src/wrap/line_buffer.rs index feff9c55..0eafa3b2 100644 --- a/src/wrap/line_buffer.rs +++ b/src/wrap/line_buffer.rs @@ -3,6 +3,8 @@ //! This module encapsulates the mutable state required to accumulate tokens into //! wrapped lines while reusing allocations between iterations. +use std::ops::Range; + use unicode_width::UnicodeWidthStr; #[derive(Default)] @@ -12,6 +14,11 @@ pub(crate) struct LineBuffer { last_split: Option, } +pub(crate) struct SplitContext<'a> { + pub(crate) lines: &'a mut Vec, + pub(crate) width: usize, +} + impl LineBuffer { pub(crate) fn new() -> Self { Self::default() } @@ -60,11 +67,9 @@ impl LineBuffer { pub(crate) fn split_with_span( &mut self, - lines: &mut Vec, + ctx: &mut SplitContext<'_>, tokens: &[String], - start: usize, - end: usize, - width: usize, + span: Range, ) -> bool { let Some(pos) = self.last_split else { return false; @@ -89,24 +94,26 @@ impl LineBuffer { }; if let Some((start_idx, end_idx)) = head_bounds { - lines.push(self.text[start_idx..end_idx].to_owned()); + ctx.lines.push(self.text[start_idx..end_idx].to_owned()); } self.text.drain(..trimmed_tail_start); - for tok in &tokens[start..end] { + for tok in &tokens[span.clone()] { self.text.push_str(tok); } self.width = UnicodeWidthStr::width(self.text.as_str()); - if end > start && tokens[end - 1].chars().all(char::is_whitespace) && !self.text.is_empty() + if span.end > span.start + && tokens[span.end - 1].chars().all(char::is_whitespace) + && !self.text.is_empty() { self.last_split = Some(self.text.len()); } else { self.last_split = None; } - if self.width > width { - lines.push(self.text.trim_end().to_string()); + if self.width > ctx.width { + ctx.lines.push(self.text.trim_end().to_string()); self.text.clear(); self.width = 0; self.last_split = None; @@ -119,13 +126,12 @@ impl LineBuffer { &mut self, lines: &mut Vec, tokens: &[String], - start: usize, - end: usize, + span: Range, ) -> bool { - if end != tokens.len() { + if span.end != tokens.len() { return false; } - if !tokens[start..end] + if !tokens[span.clone()] .iter() .all(|tok| tok.chars().all(char::is_whitespace)) { @@ -137,7 +143,7 @@ impl LineBuffer { return true; } - for tok in &tokens[start..end] { + for tok in &tokens[span] { self.text.push_str(tok); } lines.push(std::mem::take(&mut self.text)); diff --git a/src/wrap/paragraph.rs b/src/wrap/paragraph.rs index 79e52a17..358c946f 100644 --- a/src/wrap/paragraph.rs +++ b/src/wrap/paragraph.rs @@ -3,82 +3,117 @@ //! These helpers keep paragraph logic focused on buffer management while //! deferring inline wrapping to `inline::wrap_preserving_code`. +use std::borrow::Cow; + use unicode_width::UnicodeWidthStr; use super::inline::wrap_preserving_code; -fn append_wrapped_with_prefix( - out: &mut Vec, - prefix: &str, - text: &str, - width: usize, - repeat_prefix: bool, -) { - let prefix_width = UnicodeWidthStr::width(prefix); - let available = width.saturating_sub(prefix_width).max(1); - let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect(); - let indent_width = UnicodeWidthStr::width(indent_str.as_str()); - let wrapped_indent = if repeat_prefix { - prefix.to_string() - } else { - format!("{}{}", indent_str, " ".repeat(prefix_width - indent_width)) - }; - - let lines = wrap_preserving_code(text, available); - if lines.is_empty() { - out.push(prefix.to_string()); - return; +pub(super) struct PrefixLine<'a> { + pub(super) prefix: Cow<'a, str>, + pub(super) rest: &'a str, + pub(super) repeat_prefix: bool, +} + +#[derive(Default)] +pub(super) struct ParagraphState { + buf: Vec<(String, bool)>, + indent: String, +} + +impl ParagraphState { + pub(super) fn clear(&mut self) { + self.buf.clear(); + self.indent.clear(); } - for (i, line) in lines.iter().enumerate() { - if i == 0 { - out.push(format!("{prefix}{line}")); - } else { - out.push(format!("{wrapped_indent}{line}")); + pub(super) fn note_indent(&mut self, line: &str) { + if self.buf.is_empty() { + self.indent = line.chars().take_while(|c| c.is_whitespace()).collect(); } } + + pub(super) fn push(&mut self, text: String, hard_break: bool) { + self.buf.push((text, hard_break)); + } } -pub(super) fn flush_paragraph( - out: &mut Vec, - buf: &[(String, bool)], - indent: &str, +pub(super) struct ParagraphWriter<'a> { + out: &'a mut Vec, width: usize, -) { - if buf.is_empty() { - return; +} + +impl<'a> ParagraphWriter<'a> { + pub(super) fn new(out: &'a mut Vec, width: usize) -> Self { Self { out, width } } + + fn append_wrapped_with_prefix(&mut self, line: &PrefixLine<'_>) { + let prefix = line.prefix.as_ref(); + let prefix_width = UnicodeWidthStr::width(prefix); + let available = self.width.saturating_sub(prefix_width).max(1); + let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect(); + let indent_width = UnicodeWidthStr::width(indent_str.as_str()); + let wrapped_indent = if line.repeat_prefix { + prefix.to_string() + } else { + format!("{}{}", indent_str, " ".repeat(prefix_width - indent_width)) + }; + + let lines = wrap_preserving_code(line.rest, available); + if lines.is_empty() { + self.out.push(prefix.to_string()); + return; + } + + for (index, wrapped_line) in lines.iter().enumerate() { + if index == 0 { + self.out.push(format!("{prefix}{wrapped_line}")); + } else { + self.out.push(format!("{wrapped_indent}{wrapped_line}")); + } + } } - let mut segment = String::new(); - for (text, hard_break) in buf { - if !segment.is_empty() { - segment.push(' '); + + pub(super) fn flush_paragraph(&mut self, state: &mut ParagraphState) { + if state.buf.is_empty() { + return; } - segment.push_str(text); - if *hard_break { - for line in wrap_preserving_code(&segment, width - indent.len()) { - out.push(format!("{indent}{line}")); + + let mut segment = String::new(); + for (text, hard_break) in &state.buf { + if !segment.is_empty() { + segment.push(' '); + } + segment.push_str(text); + if *hard_break { + self.push_wrapped_segment(&state.indent, &segment); + segment.clear(); } - segment.clear(); } + + if !segment.is_empty() { + self.push_wrapped_segment(&state.indent, &segment); + } + + state.clear(); } - if !segment.is_empty() { - for line in wrap_preserving_code(&segment, width - indent.len()) { - out.push(format!("{indent}{line}")); + + fn push_wrapped_segment(&mut self, indent: &str, segment: &str) { + for line in wrap_preserving_code(segment, self.width - indent.len()) { + self.out.push(format!("{indent}{line}")); } } -} -pub(super) fn handle_prefix_line( - out: &mut Vec, - buf: &mut Vec<(String, bool)>, - indent: &mut String, - width: usize, - prefix: &str, - rest: &str, - repeat_prefix: bool, -) { - flush_paragraph(out, buf, indent, width); - buf.clear(); - indent.clear(); - append_wrapped_with_prefix(out, prefix, rest, width, repeat_prefix); + pub(super) fn push_verbatim(&mut self, state: &mut ParagraphState, line: &str) { + self.flush_paragraph(state); + self.out.push(line.to_string()); + } + + pub(super) fn handle_prefix_line( + &mut self, + state: &mut ParagraphState, + prefix_line: &PrefixLine<'_>, + ) { + self.flush_paragraph(state); + self.append_wrapped_with_prefix(prefix_line); + } } diff --git a/src/wrap/tests.rs b/src/wrap/tests.rs index aba72934..6e244eae 100644 --- a/src/wrap/tests.rs +++ b/src/wrap/tests.rs @@ -7,7 +7,7 @@ use rstest::rstest; use super::{ inline::{attach_punctuation_to_previous_line, determine_token_span, wrap_preserving_code}, - line_buffer::LineBuffer, + line_buffer::{LineBuffer, SplitContext}, tokenize::segment_inline, }; use crate::wrap::{BlockKind, classify_block, wrap_text}; @@ -123,7 +123,11 @@ fn line_buffer_split_preserves_multi_space_lines() { buffer.push_span(&tokens, 0, 2); let mut lines = Vec::new(); - assert!(buffer.split_with_span(&mut lines, &tokens, 2, 4, 8)); + let mut split = SplitContext { + lines: &mut lines, + width: 8, + }; + assert!(buffer.split_with_span(&mut split, &tokens, 2..4)); assert_eq!(lines, vec!["alpha ".to_string()]); assert_eq!(buffer.text(), "beta "); assert_eq!( @@ -139,7 +143,11 @@ fn line_buffer_split_trims_single_trailing_space() { buffer.push_span(&tokens, 0, 2); let mut lines = Vec::new(); - assert!(buffer.split_with_span(&mut lines, &tokens, 2, 3, 5)); + let mut split = SplitContext { + lines: &mut lines, + width: 5, + }; + assert!(buffer.split_with_span(&mut split, &tokens, 2..3)); assert_eq!(lines, vec!["alpha".to_string()]); assert_eq!(buffer.text(), "beta"); assert_eq!( @@ -160,7 +168,11 @@ fn line_buffer_split_tracks_multiple_whitespace_tokens() { buffer.push_span(&tokens, 0, 3); let mut lines = Vec::new(); - assert!(buffer.split_with_span(&mut lines, &tokens, 3, 4, 4)); + let mut split = SplitContext { + lines: &mut lines, + width: 4, + }; + assert!(buffer.split_with_span(&mut split, &tokens, 3..4)); assert_eq!(lines, vec!["foo ".to_string()]); assert_eq!(buffer.text(), "bar"); } @@ -176,8 +188,7 @@ fn line_buffer_trailing_whitespace_flushes_line() { assert!(buffer.flush_trailing_whitespace( &mut lines, &whitespace_tokens, - 0, - whitespace_tokens.len(), + 0..whitespace_tokens.len() )); assert_eq!(lines, vec!["foo ".to_string()]); assert!(buffer.text().is_empty()); diff --git a/src/wrap/tokenize/mod.rs b/src/wrap/tokenize/mod.rs index c7501861..7b0e0625 100644 --- a/src/wrap/tokenize/mod.rs +++ b/src/wrap/tokenize/mod.rs @@ -71,13 +71,11 @@ pub(super) fn segment_inline(text: &str) -> Vec { i = scan_while(text, i, char::is_whitespace); tokens.push(collect_range(text, start, i)); continue; - } else if ch == '`' { + } + + if ch == '`' { if has_odd_backslash_escape_bytes(bytes, i) { - if let Some(last) = tokens.last_mut() { - last.push('`'); - } else { - tokens.push(String::from("`")); - } + append_escaped_backtick(&mut tokens); i += ch.len_utf8(); continue; } @@ -99,31 +97,51 @@ pub(super) fn segment_inline(text: &str) -> Vec { tokens.push(collect_range(text, punct_start, new_i)); } i = new_i; - } else { - let start = i; - while i < text.len() { - let Some(current) = text[i..].chars().next() else { - break; - }; - if current.is_whitespace() || current == '`' { - break; - } - let current_escaped = has_odd_backslash_escape_bytes(bytes, i); - if current == '[' { - if !current_escaped && !bracket_follows_escaped_bang(bytes, i) { - break; - } - } else if looks_like_image_start(text, i, current) && !current_escaped { - break; - } - i += current.len_utf8(); - } - tokens.push(collect_range(text, start, i)); + continue; } + + let start = i; + i = scan_plain_text_end(text, bytes, i); + tokens.push(collect_range(text, start, i)); } tokens } +fn append_escaped_backtick(tokens: &mut Vec) { + if let Some(last) = tokens.last_mut() { + last.push('`'); + } else { + tokens.push(String::from("`")); + } +} + +fn scan_plain_text_end(text: &str, bytes: &[u8], mut index: usize) -> usize { + while index < text.len() { + let Some(current) = text[index..].chars().next() else { + break; + }; + if current.is_whitespace() || current == '`' { + break; + } + + let current_escaped = has_odd_backslash_escape_bytes(bytes, index); + if should_stop_plain_text(text, bytes, index, (current, current_escaped)) { + break; + } + + index += current.len_utf8(); + } + index +} + +fn should_stop_plain_text(text: &str, bytes: &[u8], index: usize, current: (char, bool)) -> bool { + let (ch, is_escaped) = current; + if ch == '[' { + return !is_escaped && !bracket_follows_escaped_bang(bytes, index); + } + looks_like_image_start(text, index, ch) && !is_escaped +} + fn next_token(line: &str, offset: usize) -> Option<(Token<'_>, usize)> { if offset >= line.len() { return None; diff --git a/src/wrap/tokenize/scanning.rs b/src/wrap/tokenize/scanning.rs index ef0edfdb..76efad2a 100644 --- a/src/wrap/tokenize/scanning.rs +++ b/src/wrap/tokenize/scanning.rs @@ -82,31 +82,33 @@ mod tests { use super::*; + struct ScanCollectCase { + text: &'static str, + start: usize, + predicate: Option bool>, + end: Option, + expected_idx: Option, + expected_str: Option<&'static str>, + } + #[rstest] - #[case::alpha_prefix("abc123", 0, Some(char::is_alphabetic as fn(char) -> bool), None, Some(3), None)] - #[case::numeric_suffix("abc123", 3, Some(char::is_numeric as fn(char) -> bool), None, Some("abc123".len()), None)] - #[case::multibyte_scan("åßç123", 0, Some(char::is_alphabetic as fn(char) -> bool), None, Some("åßç123".find('1').unwrap_or("åßç123".len())), Some("åßç"))] - #[case::collect_first_two("αβγδε", 0, None, Some("αβ".len()), None, Some("αβ"))] - #[case::collect_middle("αβγδε", "αβ".len(), None, Some("αβ".len() + "γδ".len()), None, Some("γδ"))] - fn scan_and_collect_cases( - #[case] text: &str, - #[case] start: usize, - #[case] predicate: Option bool>, - #[case] end: Option, - #[case] expected_idx: Option, - #[case] expected_str: Option<&str>, - ) { - if let Some(pred) = predicate { - let idx = scan_while(text, start, pred); - if let Some(expected) = expected_idx { + #[case::alpha_prefix(ScanCollectCase { text: "abc123", start: 0, predicate: Some(char::is_alphabetic as fn(char) -> bool), end: None, expected_idx: Some(3), expected_str: None })] + #[case::numeric_suffix(ScanCollectCase { text: "abc123", start: 3, predicate: Some(char::is_numeric as fn(char) -> bool), end: None, expected_idx: Some("abc123".len()), expected_str: None })] + #[case::multibyte_scan(ScanCollectCase { text: "åßç123", start: 0, predicate: Some(char::is_alphabetic as fn(char) -> bool), end: None, expected_idx: Some("åßç123".find('1').unwrap_or("åßç123".len())), expected_str: Some("åßç") })] + #[case::collect_first_two(ScanCollectCase { text: "αβγδε", start: 0, predicate: None, end: Some("αβ".len()), expected_idx: None, expected_str: Some("αβ") })] + #[case::collect_middle(ScanCollectCase { text: "αβγδε", start: "αβ".len(), predicate: None, end: Some("αβ".len() + "γδ".len()), expected_idx: None, expected_str: Some("γδ") })] + fn scan_and_collect_cases(#[case] case: ScanCollectCase) { + if let Some(pred) = case.predicate { + let idx = scan_while(case.text, case.start, pred); + if let Some(expected) = case.expected_idx { assert_eq!(idx, expected); } - if let Some(expected_slice) = expected_str { - assert_eq!(&text[..idx], expected_slice); + if let Some(expected_slice) = case.expected_str { + assert_eq!(&case.text[..idx], expected_slice); } - } else if let Some(end_idx) = end { - let collected = collect_range(text, start, end_idx); - if let Some(expected_slice) = expected_str { + } else if let Some(end_idx) = case.end { + let collected = collect_range(case.text, case.start, end_idx); + if let Some(expected_slice) = case.expected_str { assert_eq!(collected, expected_slice); } } else { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 663f68f0..62016b97 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -52,31 +52,43 @@ pub fn assert_wrapped_list_item(output: &[String], prefix: &str, expected: usize let mut open: Option = None; for line in output { - let chars: Vec = line.chars().collect(); - let mut i = 0; - while i < chars.len() { - if chars[i] == '`' { - let mut len = 0; - while i < chars.len() && chars[i] == '`' { - len += 1; - i += 1; - } - if let Some(open_len) = open { - if open_len == len { - open = None; - } - } else { - open = Some(len); - } - } else { - i += 1; - } - } + scan_code_spans(line, &mut open); assert!(open.is_none(), "code span split across lines"); } assert!(open.is_none(), "unclosed code span"); } +fn scan_code_spans(line: &str, open: &mut Option) { + let chars: Vec = line.chars().collect(); + let mut i = 0; + while i < chars.len() { + if chars[i] != '`' { + i += 1; + continue; + } + + let len = count_backticks(&chars, &mut i); + toggle_code_span(open, len); + } +} + +fn count_backticks(chars: &[char], index: &mut usize) -> usize { + let mut len = 0; + while *index < chars.len() && chars[*index] == '`' { + len += 1; + *index += 1; + } + len +} + +fn toggle_code_span(open: &mut Option, len: usize) { + if open.is_some_and(|open_len| open_len == len) { + *open = None; + } else { + *open = Some(len); + } +} + /// Assert that every line in a blockquote starts with the given prefix and is at most 80 /// characters. ///