diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index da399ade..71f950df 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,10 +18,12 @@ jobs: arch: x86_64 target: x86_64-unknown-linux-gnu ext: "" + cargo_binstall_archive: true - os: linux arch: aarch64 target: aarch64-unknown-linux-gnu ext: "" + cargo_binstall_archive: true # - os: windows # arch: x86_64 # target: x86_64-pc-windows-msvc @@ -42,6 +44,7 @@ jobs: arch: x86_64 target: x86_64-unknown-freebsd ext: "" + cargo_binstall_archive: false # - os: freebsd # arch: aarch64 # target: aarch64-unknown-freebsd @@ -104,11 +107,24 @@ jobs: run: cross +stable build --release --target ${{ matrix.target }} - name: Prepare artifact run: | - mkdir -p artifacts/${{ matrix.os }}-${{ matrix.arch }} - cp target/${{ matrix.target }}/release/${{ env.REPO_NAME }}${{ matrix.ext }} \ - artifacts/${{ matrix.os }}-${{ matrix.arch }}/${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }} - sha256sum artifacts/${{ matrix.os }}-${{ matrix.arch }}/${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }} > \ - artifacts/${{ matrix.os }}-${{ matrix.arch }}/${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }}.sha256 + set -euo pipefail + version="${GITHUB_REF_NAME#v}" + artifact_dir="artifacts/${{ matrix.os }}-${{ matrix.arch }}" + binary_path="target/${{ matrix.target }}/release/${{ env.REPO_NAME }}${{ matrix.ext }}" + binary_name="${{ env.REPO_NAME }}-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.ext }}" + + mkdir -p "${artifact_dir}" + cp "${binary_path}" "${artifact_dir}/${binary_name}" + sha256sum "${artifact_dir}/${binary_name}" > \ + "${artifact_dir}/${binary_name}.sha256" + + if [ "${{ matrix.cargo_binstall_archive }}" = "true" ]; then + archive_name="${{ env.REPO_NAME }}-${version}-${{ matrix.target }}.tar.gz" + tar -C "target/${{ matrix.target }}/release" -czf \ + "${artifact_dir}/${archive_name}" "${{ env.REPO_NAME }}${{ matrix.ext }}" + sha256sum "${artifact_dir}/${archive_name}" > \ + "${artifact_dir}/${archive_name}.sha256" + fi - name: Upload release artifact uses: actions/upload-artifact@v4 with: diff --git a/Cargo.toml b/Cargo.toml index a27fc9bb..acb11865 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,10 +4,18 @@ version = "0.3.1" edition = "2024" rust-version = "1.89" license = "ISC" +repository = "https://github.com/leynos/mdtablefix" description = """ `mdtablefix` unb0rks and reflows Markdown tables so that each column has a uniform width. When \ the `--wrap` option is used, it also wraps paragraphs and list items to 80 columns.""" +[package.metadata.binstall] + +[package.metadata.binstall.overrides.'cfg(all(target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64"), target_env = "gnu"))'] +pkg-url = "{ repo }/releases/download/v{ version }/{ name }-{ version }-{ target }.tar.gz" +bin-dir = "{ bin }{ binary-ext }" +pkg-fmt = "tgz" + [dependencies] anyhow = "1" clap = { version = "4", features = ["derive"] } diff --git a/README.md b/README.md index 1f932fb5..23ad0f67 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,13 @@ Install via Cargo: cargo install mdtablefix ``` +On Linux `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`, install +the prebuilt release archive via `cargo-binstall`: + +```bash +cargo binstall mdtablefix +``` + Or clone the repository and build from source: ```bash diff --git a/docs/release-process.md b/docs/release-process.md index c0f137c3..4d4cfc17 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -1,7 +1,8 @@ # Release Process This project publishes prebuilt binaries for multiple operating systems and -architectures. +architectures. It also publishes `cargo-binstall` archives for the supported +Linux release targets. The project targets the stable Rust `1.89.0` toolchain, as specified in `rust-toolchain.toml`. @@ -14,10 +15,7 @@ The GitHub Actions workflow `.github/workflows/release.yml` builds and uploads binaries for: - Linux (x86_64 and aarch64) -- FreeBSD (x86_64 and aarch64) -- macOS (x86_64 and aarch64) -- Windows (x86_64 and aarch64) -- OpenBSD (x86_64 and aarch64) +- FreeBSD (x86_64) Releases start from tags named `v..`. The workflow checks that the tag's version, without the leading `v`, matches the `Cargo.toml` @@ -26,6 +24,12 @@ that the tag's version, without the leading `v`, matches the `Cargo.toml` Each binary is named using the pattern `mdtablefix--` with an `.exe` suffix on Windows. +For Linux `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`, the +workflow also produces `cargo-binstall` archives named +`mdtablefix--.tar.gz`. Each archive contains the `mdtablefix` +binary at the archive root, matching the `Cargo.toml` +`[package.metadata.binstall]` configuration. + Binaries are uploaded as soon as they are built, so they are available from the workflow run while other targets build. @@ -33,13 +37,15 @@ workflow run while other targets build. The `release.yml` workflow defines a matrix of operating system and architecture combinations. Each entry includes the target triple used by -`cross` and a filename extension for Windows. During the build job, `cross` -compiles a release binary for every matrix row. +`cross` and whether the target also needs a `cargo-binstall` archive. During +the build job, `cross` compiles a release binary for every matrix row. `cross` is installed from a specific git tag to avoid unexpected behaviour from its main branch. Each binary is placed in an `artifacts/-` directory using the naming pattern `mdtablefix--[.exe]`. An SHA-256 checksum is -written alongside each binary for download verification. +written alongside each binary for download verification. The Linux +`cargo-binstall` targets additionally produce +`mdtablefix--.tar.gz` plus a matching SHA-256 checksum. After every build completes, the artefact is uploaded so that the GitHub Actions interface provides it immediately. Once the matrix has finished, the diff --git a/src/code_emphasis.rs b/src/code_emphasis.rs index f64610f8..b5acd194 100644 --- a/src/code_emphasis.rs +++ b/src/code_emphasis.rs @@ -10,6 +10,8 @@ //! transformation should run before wrapping and footnote conversion so marker //! adjacency is evaluated on the raw input. +use std::{iter::Peekable, vec::IntoIter}; + use crate::{ textproc::process_text, wrap::{Token, tokenize_markdown}, @@ -49,6 +51,103 @@ fn push_code(code: &str, out: &mut String) { out.push_str(&fence); } +fn has_code_emphasis_adjacent(source: &str) -> bool { + source.contains("`*") || source.contains("`_") || source.contains("*`") || source.contains("_`") +} + +fn handle_text_token<'a>( + raw: &'a str, + next: Option<&Token<'a>>, + out: &mut String, + pending: &mut &'a str, +) { + if !next.is_some_and(|token| matches!(token, Token::Code { .. })) { + out.push_str(raw); + return; + } + + let (lead, body, trail) = split_marks(raw); + if body.is_empty() && trail.is_empty() { + *pending = lead; + return; + } + + out.push_str(lead); + out.push_str(body); + *pending = trail; +} + +fn try_fold_matching_emphasis<'a>( + tokens: &mut Peekable>>, + pending: &mut &'a str, + code: &str, + out: &mut String, +) -> bool { + let Some(Token::Text(next)) = tokens.peek() else { + return false; + }; + let (lead, mid, trail) = split_marks(next); + if *pending == lead && mid.is_empty() && trail.is_empty() { + out.push_str(pending); + push_code(code, out); + out.push_str(lead); + *pending = ""; + tokens.next(); + return true; + } + false +} + +fn consume_code_affixes<'a>( + tokens: &mut Peekable>>, + pending: &mut &'a str, +) -> (&'a str, &'a str, bool) { + let mut prefix = std::mem::take(pending); + let mut suffix = ""; + let mut modified = !prefix.is_empty(); + + let Some(Token::Text(next)) = tokens.peek_mut() else { + return (prefix, suffix, modified); + }; + + let (lead, mid, _) = split_marks(next); + if lead.is_empty() { + return (prefix, suffix, modified); + } + + modified = true; + if prefix.is_empty() { + prefix = lead; + } else if mid.is_empty() { + suffix = lead; + } else { + prefix = ""; + } + *next = &next[lead.len()..]; + (prefix, suffix, modified) +} + +fn handle_code_token<'a>( + tokens: &mut Peekable>>, + code_token: (&'a str, &'a str), + out: &mut String, + pending: &mut &'a str, +) { + let (raw, code) = code_token; + if !pending.is_empty() && try_fold_matching_emphasis(tokens, pending, code, out) { + return; + } + + let (prefix, suffix, modified) = consume_code_affixes(tokens, pending); + out.push_str(prefix); + if modified { + push_code(code, out); + } else { + out.push_str(raw); + } + out.push_str(suffix); +} + /// Merge contiguous code and emphasis spans. /// /// Groups of emphasis markers and inline code with no separating spaces are @@ -75,11 +174,7 @@ pub fn fix_code_emphasis(lines: &[String]) -> Vec { return vec![String::new(); lines.len()]; } let source = lines.join("\n"); - if !source.contains("`*") - && !source.contains("`_") - && !source.contains("*`") - && !source.contains("_`") - { + if !has_code_emphasis_adjacent(&source) { return lines.to_vec(); } let mut tokens = tokenize_markdown(&source).into_iter().peekable(); @@ -87,66 +182,9 @@ pub fn fix_code_emphasis(lines: &[String]) -> Vec { let mut pending = ""; while let Some(token) = tokens.next() { match token { - Token::Text(raw) => { - if tokens - .peek() - .is_some_and(|t| matches!(t, Token::Code { .. })) - { - let (lead, body, trail) = split_marks(raw); - if body.is_empty() && trail.is_empty() { - pending = lead; - } else { - out.push_str(lead); - out.push_str(body); - pending = trail; - } - } else { - out.push_str(raw); - } - } + Token::Text(raw) => handle_text_token(raw, tokens.peek(), &mut out, &mut pending), Token::Code { raw, code, .. } => { - if !pending.is_empty() - && let Some(Token::Text(next)) = tokens.peek() - { - let (lead, mid, trail) = split_marks(next); - if mid.is_empty() && trail.is_empty() && lead == pending { - out.push_str(pending); - push_code(code, &mut out); - out.push_str(lead); - pending = ""; - tokens.next(); - continue; - } - } - let mut prefix = pending; - let mut suffix = ""; - let mut modified = !pending.is_empty(); - pending = ""; - if let Some(Token::Text(next)) = tokens.peek_mut() { - let (lead, mid, _) = split_marks(next); - if !lead.is_empty() { - modified = true; - if prefix.is_empty() { - prefix = lead; - } else if mid.is_empty() { - suffix = lead; - } else { - prefix = ""; - } - *next = &next[lead.len()..]; - } - } - if !prefix.is_empty() { - out.push_str(prefix); - } - if modified { - push_code(code, &mut out); - } else { - out.push_str(raw); - } - if !suffix.is_empty() { - out.push_str(suffix); - } + handle_code_token(&mut tokens, (raw, code), &mut out, &mut pending); } Token::Fence(f) => out.push_str(f), Token::Newline => out.push('\n'), diff --git a/src/fences.rs b/src/fences.rs index 26176023..0644120f 100644 --- a/src/fences.rs +++ b/src/fences.rs @@ -120,6 +120,24 @@ fn attach_specifier_to_fence(fence_line: &str, specifier: &str, spec_indent: &st format!("{final_indent}```{specifier}") } +fn orphan_specifier_target(lines: &[String], start: usize) -> Option { + let mut index = start; + while index < lines.len() && lines[index].trim().is_empty() { + index += 1; + } + if index >= lines.len() || FENCE_RE.captures(&lines[index]).is_none() { + return None; + } + Some(index) +} + +fn orphan_specifier_target_without_language(lines: &[String], start: usize) -> Option { + let target = orphan_specifier_target(lines, start)?; + let cap = FENCE_RE.captures(&lines[target])?; + let lang = cap.get(3).map_or("", |m| m.as_str()); + is_null_lang(lang).then_some(target) +} + /// Attach orphaned language specifiers to opening fences. /// /// After compressing fences, a language may appear on its own line directly @@ -156,19 +174,10 @@ pub fn attach_orphan_specifiers(lines: &[String]) -> Vec { let (spec, indent) = normalize_specifier(line); if ORPHAN_LANG_RE.is_match(&spec) && out.last().is_none_or(|l: &String| l.trim().is_empty()) { - let mut j = i + 1; - while j < lines.len() && lines[j].trim().is_empty() { - j += 1; - } - if j < lines.len() - && let Some(cap) = FENCE_RE.captures(&lines[j]) - { - let lang = cap.get(3).map_or("", |m| m.as_str()); - if is_null_lang(lang) { - out.push(attach_specifier_to_fence(&lines[j], &spec, &indent)); - i = j + 1; - continue; - } + if let Some(target) = orphan_specifier_target_without_language(lines, i + 1) { + out.push(attach_specifier_to_fence(&lines[target], &spec, &indent)); + i = target + 1; + continue; } out.push(line.clone()); i += 1; diff --git a/src/footnotes/inline.rs b/src/footnotes/inline.rs index 82744eb0..2cd75e35 100644 --- a/src/footnotes/inline.rs +++ b/src/footnotes/inline.rs @@ -28,28 +28,37 @@ static ATX_HEADING_RE: LazyLock = lazy_regex!( "atx heading prefix", ); +#[derive(Clone, Copy)] +struct InlineFootnote<'a> { + pre: &'a str, + punc: &'a str, + style: &'a str, + num: &'a str, + boundary: &'a str, +} + #[inline] -fn capture_parts<'a>(caps: &'a Captures<'a>) -> (&'a str, &'a str, &'a str, &'a str, &'a str) { - ( - &caps["pre"], - &caps["punc"], - &caps["style"], - &caps["num"], - &caps["boundary"], - ) +fn capture_parts<'a>(caps: &'a Captures<'a>) -> InlineFootnote<'a> { + InlineFootnote { + pre: &caps["pre"], + punc: &caps["punc"], + style: &caps["style"], + num: &caps["num"], + boundary: &caps["boundary"], + } } #[inline] -fn build_footnote(pre: &str, punc: &str, style: &str, num: &str, boundary: &str) -> String { - format!("{pre}{punc}{style}[^{num}]{boundary}") +fn build_footnote(parts: InlineFootnote<'_>) -> String { + format!( + "{}{}{}[^{}]{}", + parts.pre, parts.punc, parts.style, parts.num, parts.boundary + ) } /// Convert inline numeric references into Markdown footnote syntax. pub(super) fn convert_inline(text: &str) -> String { - let out = INLINE_FN_RE.replace_all(text, |caps: &Captures| { - let (pre, punc, style, num, boundary) = capture_parts(caps); - build_footnote(pre, punc, style, num, boundary) - }); + let out = INLINE_FN_RE.replace_all(text, |caps: &Captures| build_footnote(capture_parts(caps))); COLON_FN_RE .replace_all(&out, |caps: &Captures| { let pre = &caps["pre"]; diff --git a/src/footnotes/parsing.rs b/src/footnotes/parsing.rs index 109379df..88a2007d 100644 --- a/src/footnotes/parsing.rs +++ b/src/footnotes/parsing.rs @@ -14,7 +14,7 @@ pub(super) static DEF_RE: LazyLock = lazy_regex!( "footnote definition pattern should compile", ); -#[derive(Clone)] +#[derive(Clone, Copy)] pub(super) struct DefinitionParts<'a> { pub(super) prefix: &'a str, pub(super) number: usize, diff --git a/src/footnotes/renumber.rs b/src/footnotes/renumber.rs index 9d7655b6..203c4d59 100644 --- a/src/footnotes/renumber.rs +++ b/src/footnotes/renumber.rs @@ -99,27 +99,36 @@ fn collect_reference_mapping(lines: &[String]) -> HashMap { } for token in tokenize_markdown(line) { if let Token::Text(text) = token { - for caps in FOOTNOTE_REF_RE.captures_iter(text) { - let Some(mat) = caps.get(0) else { - continue; - }; - if is_definition_like(text, &mat) { - continue; - } - if let Ok(number) = caps["num"].parse::() { - if mapping.contains_key(&number) { - continue; - } - mapping.insert(number, next); - next += 1; - } - } + collect_reference_mapping_from_text(text, &mut mapping, &mut next); } } } mapping } +fn collect_reference_mapping_from_text( + text: &str, + mapping: &mut HashMap, + next: &mut usize, +) { + for caps in FOOTNOTE_REF_RE.captures_iter(text) { + let Some(mat) = caps.get(0) else { + continue; + }; + if is_definition_like(text, &mat) { + continue; + } + let Ok(number) = caps["num"].parse::() else { + continue; + }; + if mapping.contains_key(&number) { + continue; + } + mapping.insert(number, *next); + *next += 1; + } +} + #[derive(Clone)] struct DefinitionLine { index: usize, @@ -270,6 +279,18 @@ struct DefinitionUpdates { is_definition_line: Vec, } +struct DefinitionScanContext<'a> { + mapping: &'a mut HashMap, + next_number: &'a mut usize, + numeric_list_range: Option<(usize, usize)>, + skip_numeric_conversion: bool, +} + +struct DefinitionAccumulator { + definitions: Vec, + is_definition_line: Vec, +} + fn assign_new_number( mapping: &mut HashMap, number: usize, @@ -296,22 +317,58 @@ fn should_convert_numeric_line( numeric_range.is_some_and(|(start, end)| index >= start && index < end) } -fn collect_definition_updates( - lines: &[String], +fn definition_line_from_parts( + index: usize, + parts: super::parsing::DefinitionParts<'_>, mapping: &mut HashMap, -) -> DefinitionUpdates { - let mut next_number = mapping.values().copied().max().unwrap_or(0) + 1; - let mut definitions = Vec::new(); - let mut is_definition_line = vec![false; lines.len()]; - let mut numeric_candidates: Vec = Vec::new(); - let numeric_list_range = footnote_block_range(lines); - let skip_numeric_conversion = numeric_list_range - .as_ref() - .is_some_and(|(start, _)| has_existing_footnote_block(lines, *start)); + next_number: &mut usize, +) -> DefinitionLine { + let new_number = assign_new_number(mapping, parts.number, next_number); + let rewritten_rest = rewrite_tokens(parts.rest, mapping); + let mut line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); + line.push_str(parts.prefix); + write!(&mut line, "[^{new_number}]:").expect("write to string cannot fail"); + line.push_str(&rewritten_rest); + DefinitionLine { + index, + new_number, + line, + } +} + +fn numeric_candidate_from_line(line: &str, index: usize) -> Option { + let caps = FOOTNOTE_LINE_RE.captures(line)?; + let number = caps["num"].parse::().ok()?; + let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); + let rest = caps.name("rest").map_or("", |m| m.as_str()).to_string(); + let num_match = caps + .name("num") + .expect("numeric list capture missing number"); + let rest_match = caps + .name("rest") + .expect("numeric list capture missing rest"); + let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); + Some(NumericCandidate { + index, + number, + indent, + whitespace, + rest, + }) +} +fn collect_scan_updates( + lines: &[String], + ctx: &mut DefinitionScanContext<'_>, +) -> (DefinitionAccumulator, Vec) { + let mut acc = DefinitionAccumulator { + definitions: Vec::new(), + is_definition_line: vec![false; lines.len()], + }; + let mut numeric_candidates = Vec::new(); let mut in_fence = false; - for (idx, line) in lines.iter().enumerate() { + for (index, line) in lines.iter().enumerate() { if is_fence_line(line) { in_fence = !in_fence; continue; @@ -321,67 +378,76 @@ fn collect_definition_updates( } if let Some(parts) = parse_definition(line) { - let new_number = assign_new_number(mapping, parts.number, &mut next_number); - let rewritten_rest = rewrite_tokens(parts.rest, mapping); - let mut new_line = String::with_capacity(parts.prefix.len() + rewritten_rest.len() + 8); - new_line.push_str(parts.prefix); - write!(&mut new_line, "[^{new_number}]:").expect("write to string cannot fail"); - new_line.push_str(&rewritten_rest); - definitions.push(DefinitionLine { - index: idx, - new_number, - line: new_line, - }); - is_definition_line[idx] = true; - } else if should_convert_numeric_line(idx, numeric_list_range, skip_numeric_conversion) - && let Some(caps) = FOOTNOTE_LINE_RE.captures(line) + acc.definitions.push(definition_line_from_parts( + index, + parts, + ctx.mapping, + ctx.next_number, + )); + acc.is_definition_line[index] = true; + continue; + } + + if !should_convert_numeric_line(index, ctx.numeric_list_range, ctx.skip_numeric_conversion) { - if mapping.is_empty() && definitions.is_empty() { - continue; - } - let Ok(number) = caps["num"].parse::() else { - continue; - }; - let indent = caps.name("indent").map_or("", |m| m.as_str()).to_string(); - let rest = caps.name("rest").map_or("", |m| m.as_str()).to_string(); - let num_match = caps - .name("num") - .expect("numeric list capture missing number"); - let rest_match = caps - .name("rest") - .expect("numeric list capture missing rest"); - let whitespace = line[num_match.end() + 1..rest_match.start()].to_string(); - numeric_candidates.push(NumericCandidate { - index: idx, - number, - indent, - whitespace, - rest, - }); + continue; + } + if ctx.mapping.is_empty() && acc.definitions.is_empty() { + continue; + } + if let Some(candidate) = numeric_candidate_from_line(line, index) { + numeric_candidates.push(candidate); } } + (acc, numeric_candidates) +} + +fn finalize_numeric_candidates( + numeric_candidates: Vec, + ctx: &mut DefinitionScanContext<'_>, + acc: &mut DefinitionAccumulator, +) { for candidate in numeric_candidates.into_iter().rev() { - let new_number = assign_new_number(mapping, candidate.number, &mut next_number); - let rewritten_rest = rewrite_tokens(&candidate.rest, mapping); - let mut new_line = String::with_capacity( + let new_number = assign_new_number(ctx.mapping, candidate.number, ctx.next_number); + let rewritten_rest = rewrite_tokens(&candidate.rest, ctx.mapping); + let mut line = String::with_capacity( candidate.indent.len() + candidate.whitespace.len() + rewritten_rest.len() + 8, ); - new_line.push_str(&candidate.indent); - write!(&mut new_line, "[^{new_number}]:").expect("write to string cannot fail"); - new_line.push_str(&candidate.whitespace); - new_line.push_str(&rewritten_rest); - definitions.push(DefinitionLine { + line.push_str(&candidate.indent); + write!(&mut line, "[^{new_number}]:").expect("write to string cannot fail"); + line.push_str(&candidate.whitespace); + line.push_str(&rewritten_rest); + acc.definitions.push(DefinitionLine { index: candidate.index, new_number, - line: new_line, + line, }); - is_definition_line[candidate.index] = true; + acc.is_definition_line[candidate.index] = true; } +} + +fn collect_definition_updates( + lines: &[String], + mapping: &mut HashMap, +) -> DefinitionUpdates { + let mut next_number = mapping.values().copied().max().unwrap_or(0) + 1; + let numeric_list_range = footnote_block_range(lines); + let skip_numeric_conversion = numeric_list_range + .as_ref() + .is_some_and(|(start, _)| has_existing_footnote_block(lines, *start)); + let mut ctx = DefinitionScanContext { + mapping, + next_number: &mut next_number, + numeric_list_range, + skip_numeric_conversion, + }; + let (mut acc, numeric_candidates) = collect_scan_updates(lines, &mut ctx); + finalize_numeric_candidates(numeric_candidates, &mut ctx, &mut acc); DefinitionUpdates { - definitions, - is_definition_line, + definitions: acc.definitions, + is_definition_line: acc.is_definition_line, } } diff --git a/src/frontmatter.rs b/src/frontmatter.rs index 12cb5c87..9c316cc0 100644 --- a/src/frontmatter.rs +++ b/src/frontmatter.rs @@ -69,49 +69,35 @@ mod tests { /// Helper to convert `&[&str]` → `Vec`. fn s(v: &[&str]) -> Vec { v.iter().copied().map(str::to_string).collect() } + struct PrefixEmptyCase { + lines: Vec, + body_is_empty: bool, + check_body_equality: bool, + } + + struct FrontmatterSplitCase { + lines: Vec, + prefix_len: usize, + body_len: usize, + prefix_spot_checks: Vec<(usize, &'static str)>, + body_spot_check: Option<&'static str>, + } + /// Cases where `prefix` is empty (no frontmatter detected). #[rstest] - #[case::empty_input_returns_empty_slices( - s(&[]), - true, // body_is_empty - false // check_body_equality - )] - #[case::no_frontmatter_returns_empty_prefix( - s(&["# Heading", "Some text"]), - false, - true // check body == input lines - )] - #[case::unmatched_opener_treated_as_body( - s(&["---", "Some text", "More text"]), - false, - false - )] - #[case::indented_opener_not_recognized( - s(&[" ---", "title: Example", " ---"]), - false, - false - )] - #[case::later_dash_block_not_frontmatter( - s(&["# Heading", "", "---", "Not frontmatter", "---"]), - false, - false - )] - #[case::indented_closer_not_recognized( - s(&["---", "title: Example", " --- ", "# Heading"]), - false, - false - )] - fn prefix_empty_cases( - #[case] lines: Vec, - #[case] body_is_empty: bool, - #[case] check_body_equality: bool, - ) { - let (prefix, body) = split_leading_yaml_frontmatter(&lines); + #[case::empty_input_returns_empty_slices(PrefixEmptyCase { lines: s(&[]), body_is_empty: true, check_body_equality: false })] + #[case::no_frontmatter_returns_empty_prefix(PrefixEmptyCase { lines: s(&["# Heading", "Some text"]), body_is_empty: false, check_body_equality: true })] + #[case::unmatched_opener_treated_as_body(PrefixEmptyCase { lines: s(&["---", "Some text", "More text"]), body_is_empty: false, check_body_equality: false })] + #[case::indented_opener_not_recognized(PrefixEmptyCase { lines: s(&[" ---", "title: Example", " ---"]), body_is_empty: false, check_body_equality: false })] + #[case::later_dash_block_not_frontmatter(PrefixEmptyCase { lines: s(&["# Heading", "", "---", "Not frontmatter", "---"]), body_is_empty: false, check_body_equality: false })] + #[case::indented_closer_not_recognized(PrefixEmptyCase { lines: s(&["---", "title: Example", " --- ", "# Heading"]), body_is_empty: false, check_body_equality: false })] + fn prefix_empty_cases(#[case] case: PrefixEmptyCase) { + let (prefix, body) = split_leading_yaml_frontmatter(&case.lines); assert!(prefix.is_empty()); - if body_is_empty { + if case.body_is_empty { assert!(body.is_empty()); - } else if check_body_equality { - assert_eq!(body, &lines); + } else if case.check_body_equality { + assert_eq!(body, &case.lines); } else { assert!(!body.is_empty()); } @@ -119,72 +105,20 @@ mod tests { /// Cases where frontmatter is detected (non-empty `prefix`). #[rstest] - #[case::detects_frontmatter_with_triple_dash_closer( - s(&["---", "title: Example", "author: Test", "---", "# Heading", "Body text"]), - 4, // prefix_len - 2, // body_len - Some((0, "---")), - Some((3, "---")), - Some("# Heading") - )] - #[case::detects_frontmatter_with_triple_dot_closer( - s(&["---", "title: Example", "...", "# Heading"]), - 3, - 1, - Some((2, "...")), - None, - Some("# Heading") - )] - #[case::frontmatter_with_empty_body( - s(&["---", "title: Example", "---"]), - 3, - 0, - None, - None, - None - )] - #[case::frontmatter_only_no_body( - s(&["---", "---"]), - 2, - 0, - Some((1, "---")), - None, - None - )] - #[case::trailing_whitespace_on_closer_is_trimmed( - s(&["---", "title: Example", "--- ", "# Heading"]), - 3, - 1, - None, - None, - None - )] - #[case::multiline_yaml_values_preserved( - s(&["---", "description: |", " This is a multi-line", " YAML value", "---", "# Content"]), - 5, - 1, - None, - None, - Some("# Content") - )] - fn frontmatter_split_cases( - #[case] lines: Vec, - #[case] prefix_len: usize, - #[case] body_len: usize, - #[case] prefix_spot_check: Option<(usize, &str)>, - #[case] prefix_spot_check_2: Option<(usize, &str)>, - #[case] body_spot_check: Option<&str>, - ) { - let (prefix, body) = split_leading_yaml_frontmatter(&lines); - assert_eq!(prefix.len(), prefix_len); - assert_eq!(body.len(), body_len); - if let Some((idx, expected)) = prefix_spot_check { - assert_eq!(prefix[idx], expected); - } - if let Some((idx, expected)) = prefix_spot_check_2 { + #[case::detects_frontmatter_with_triple_dash_closer(FrontmatterSplitCase { lines: s(&["---", "title: Example", "author: Test", "---", "# Heading", "Body text"]), prefix_len: 4, body_len: 2, prefix_spot_checks: vec![(0, "---"), (3, "---")], body_spot_check: Some("# Heading") })] + #[case::detects_frontmatter_with_triple_dot_closer(FrontmatterSplitCase { lines: s(&["---", "title: Example", "...", "# Heading"]), prefix_len: 3, body_len: 1, prefix_spot_checks: vec![(2, "...")], body_spot_check: Some("# Heading") })] + #[case::frontmatter_with_empty_body(FrontmatterSplitCase { lines: s(&["---", "title: Example", "---"]), prefix_len: 3, body_len: 0, prefix_spot_checks: vec![], body_spot_check: None })] + #[case::frontmatter_only_no_body(FrontmatterSplitCase { lines: s(&["---", "---"]), prefix_len: 2, body_len: 0, prefix_spot_checks: vec![(1, "---")], body_spot_check: None })] + #[case::trailing_whitespace_on_closer_is_trimmed(FrontmatterSplitCase { lines: s(&["---", "title: Example", "--- ", "# Heading"]), prefix_len: 3, body_len: 1, prefix_spot_checks: vec![], body_spot_check: None })] + #[case::multiline_yaml_values_preserved(FrontmatterSplitCase { lines: s(&["---", "description: |", " This is a multi-line", " YAML value", "---", "# Content"]), prefix_len: 5, body_len: 1, prefix_spot_checks: vec![], body_spot_check: Some("# Content") })] + fn frontmatter_split_cases(#[case] case: FrontmatterSplitCase) { + let (prefix, body) = split_leading_yaml_frontmatter(&case.lines); + assert_eq!(prefix.len(), case.prefix_len); + assert_eq!(body.len(), case.body_len); + for (idx, expected) in case.prefix_spot_checks { assert_eq!(prefix[idx], expected); } - if let Some(expected) = body_spot_check { + if let Some(expected) = case.body_spot_check { assert_eq!(body[0], expected); } } diff --git a/src/html.rs b/src/html.rs index a3179eb8..b8c6e326 100644 --- a/src/html.rs +++ b/src/html.rs @@ -52,15 +52,7 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { match &handle.data { NodeData::Text { contents } => { for ch in contents.borrow().chars() { - if ch.is_whitespace() { - *last_space = true; - } else { - if *last_space && !out.is_empty() { - out.push(' '); - } - out.push(ch); - *last_space = false; - } + push_collapsed_text_char(ch, out, last_space); } } NodeData::Element { name, .. } => { @@ -80,6 +72,18 @@ fn collect_text(handle: &Handle, out: &mut String, last_space: &mut bool) { } } +fn push_collapsed_text_char(ch: char, out: &mut String, last_space: &mut bool) { + if ch.is_whitespace() { + *last_space = true; + return; + } + if *last_space && !out.is_empty() { + out.push(' '); + } + out.push(ch); + *last_space = false; +} + /// Returns `true` if `handle` is an HTML element with the given tag name. fn is_element(handle: &Handle, tag: &str) -> bool { if let NodeData::Element { name, .. } = &handle.data { @@ -204,26 +208,43 @@ fn table_lines_to_markdown(lines: &[String]) -> Vec { out } -/// Appends HTML table lines, tracking `` depth and converting them to Markdown when closed. -/// -/// Tracks the nesting depth of `
` tags, appending each line to the buffer. When all opened -/// tables are closed (depth reaches zero), converts the buffered HTML table lines to Markdown and -/// appends them to the output vector. Resets the buffer and updates the HTML state accordingly. -fn push_html_line( - line: &str, - buf: &mut Vec, - depth: &mut usize, - in_html: &mut bool, - out: &mut Vec, -) { +fn append_html_table_line(line: &str, buf: &mut Vec, depth: &mut usize) { buf.push(line.to_string()); *depth += TABLE_START_RE.find_iter(line).count(); if TABLE_END_RE.is_match(line) { *depth = depth.saturating_sub(TABLE_END_RE.find_iter(line).count()); - if *depth == 0 { - out.extend(html_table_to_markdown(buf)); - buf.clear(); - *in_html = false; + } +} + +fn flush_completed_html_table(buf: &mut Vec, depth: usize, out: &mut Vec) -> bool { + if depth != 0 { + return false; + } + out.extend(table_lines_to_markdown(buf)); + buf.clear(); + true +} + +#[derive(Default)] +struct HtmlTableState { + buf: Vec, + depth: usize, + in_html: bool, +} + +impl HtmlTableState { + fn flush_raw(&mut self, out: &mut Vec) { + if !self.buf.is_empty() { + out.append(&mut self.buf); + } + self.depth = 0; + self.in_html = false; + } + + fn push_html_line(&mut self, line: &str, out: &mut Vec) { + append_html_table_line(line, &mut self.buf, &mut self.depth); + if flush_completed_html_table(&mut self.buf, self.depth, out) { + self.in_html = false; } } } @@ -258,15 +279,8 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { for line in lines { if depth > 0 || TABLE_START_RE.is_match(line.trim_start()) { - buf.push(line.clone()); - depth += TABLE_START_RE.find_iter(line).count(); - if TABLE_END_RE.is_match(line) { - depth = depth.saturating_sub(TABLE_END_RE.find_iter(line).count()); - if depth == 0 { - out.extend(table_lines_to_markdown(&buf)); - buf.clear(); - } - } + append_html_table_line(line, &mut buf, &mut depth); + let _ = flush_completed_html_table(&mut buf, depth, &mut out); continue; } @@ -306,17 +320,13 @@ pub(crate) fn html_table_to_markdown(lines: &[String]) -> Vec { /// ``` pub fn convert_html_tables(lines: &[String]) -> Vec { let mut out = Vec::new(); - let mut buf = Vec::new(); - let mut depth = 0usize; - let mut in_html = false; + let mut html_state = HtmlTableState::default(); let mut in_code = false; for line in lines { if is_fence(line).is_some() { - if in_html { - out.append(&mut buf); - in_html = false; - depth = 0; + if html_state.in_html { + html_state.flush_raw(&mut out); } in_code = !in_code; out.push(line.clone()); @@ -328,22 +338,22 @@ pub fn convert_html_tables(lines: &[String]) -> Vec { continue; } - if in_html { - push_html_line(line, &mut buf, &mut depth, &mut in_html, &mut out); + if html_state.in_html { + html_state.push_html_line(line, &mut out); continue; } if TABLE_START_RE.is_match(line.trim_start()) { - in_html = true; - push_html_line(line, &mut buf, &mut depth, &mut in_html, &mut out); + html_state.in_html = true; + html_state.push_html_line(line, &mut out); continue; } out.push(line.clone()); } - if !buf.is_empty() { - out.extend(buf); + if !html_state.buf.is_empty() { + out.extend(html_state.buf); } out diff --git a/src/lists.rs b/src/lists.rs index 1431a8a0..4fdd5748 100644 --- a/src/lists.rs +++ b/src/lists.rs @@ -61,21 +61,32 @@ fn is_plain_paragraph_line(line: &str) -> bool { ) } -fn handle_paragraph_restart( - indent: usize, - line: &str, - prev_blank: bool, - indent_stack: &mut Vec, - counters: &mut HashMap, -) -> bool { - let inclusive = prev_blank - && indent_stack - .last() - .is_some_and(|&d| indent <= d && is_plain_paragraph_line(line)); - if inclusive { - prune_deeper(indent, true, indent_stack, counters); +struct ListState { + indent_stack: Vec, + counters: HashMap, +} + +impl ListState { + fn prune_deeper(&mut self, indent: usize, inclusive: bool) { + prune_deeper( + indent, + inclusive, + &mut self.indent_stack, + &mut self.counters, + ); + } + + fn handle_paragraph_restart(&mut self, indent: usize, line: &str, prev_blank: bool) -> bool { + let inclusive = prev_blank + && self + .indent_stack + .last() + .is_some_and(|&depth| indent <= depth && is_plain_paragraph_line(line)); + if inclusive { + self.prune_deeper(indent, true); + } + inclusive } - inclusive } /// Renumber ordered Markdown list items across the given lines. @@ -86,8 +97,10 @@ fn handle_paragraph_restart( #[must_use] pub fn renumber_lists(lines: &[String]) -> Vec { let mut out = Vec::with_capacity(lines.len()); - let mut indent_stack: Vec = Vec::new(); - let mut counters: HashMap = HashMap::new(); + let mut state = ListState { + indent_stack: Vec::new(), + counters: HashMap::new(), + }; // Track fenced code blocks consistently across list processing. let mut fences = FenceTracker::default(); #[allow(clippy::unnecessary_map_or)] @@ -110,11 +123,11 @@ pub fn renumber_lists(lines: &[String]) -> Vec { continue; } if let Some((indent, indent_str, sep, rest)) = parse_numbered(line) { - prune_deeper(indent, false, &mut indent_stack, &mut counters); - if indent_stack.last().is_none_or(|&d| d < indent) { - indent_stack.push(indent); + state.prune_deeper(indent, false); + if state.indent_stack.last().is_none_or(|&d| d < indent) { + state.indent_stack.push(indent); } - let num = counters.entry(indent).or_insert(1); + let num = state.counters.entry(indent).or_insert(1); let current = *num; *num += 1; out.push(format!("{indent_str}{current}.{sep}{rest}")); @@ -128,16 +141,15 @@ pub fn renumber_lists(lines: &[String]) -> Vec { let indent_str = &line[..indent_end]; let indent = indent_len(indent_str); if HEADING_RE.is_match(line) || THEMATIC_BREAK_RE.is_match(line.trim_end()) { - indent_stack.clear(); - counters.clear(); + state.indent_stack.clear(); + state.counters.clear(); out.push(line.clone()); prev_blank = false; continue; } - let did_inclusive = - handle_paragraph_restart(indent, line, prev_blank, &mut indent_stack, &mut counters); + let did_inclusive = state.handle_paragraph_restart(indent, line, prev_blank); if !did_inclusive { - prune_deeper(indent, false, &mut indent_stack, &mut counters); + state.prune_deeper(indent, false); } out.push(line.clone()); prev_blank = false; diff --git a/src/process.rs b/src/process.rs index 1dd34a05..2664a1cd 100644 --- a/src/process.rs +++ b/src/process.rs @@ -53,72 +53,67 @@ pub struct Options { } /// Flushes buffered lines to `out`, formatting as a table when required. -fn flush_buffer(buf: &mut Vec, in_table: &mut bool, out: &mut Vec) { - if buf.is_empty() { - return; - } - if *in_table { - out.extend(reflow_table(buf)); - buf.clear(); - } else { - out.extend(std::mem::take(buf)); - } - *in_table = false; +struct ProcessBuffer { + out: Vec, + buf: Vec, + in_table: bool, } -/// Detects fence lines and toggles code mode, flushing buffered content. -fn handle_fence_line( - line: &str, - buf: &mut Vec, - in_table: &mut bool, - out: &mut Vec, - fences: &mut FenceTracker, -) -> bool { - if !fences.observe(line) { - return false; +impl ProcessBuffer { + fn flush(&mut self) { + if self.buf.is_empty() { + return; + } + if self.in_table { + self.out.extend(reflow_table(&self.buf)); + self.buf.clear(); + } else { + self.out.extend(std::mem::take(&mut self.buf)); + } + self.in_table = false; } - flush_buffer(buf, in_table, out); - out.push(line.to_string()); - true -} + fn push_verbatim(&mut self, line: &str) { + self.flush(); + self.out.push(line.to_string()); + } -/// Buffers table lines, returning `true` when a line was consumed. -fn handle_table_line( - line: &str, - buf: &mut Vec, - in_table: &mut bool, - out: &mut Vec, -) -> bool { - let trimmed = line.trim_start(); + fn handle_fence_line(&mut self, line: &str, fences: &mut FenceTracker) -> bool { + if !fences.observe(line) { + return false; + } - if trimmed.starts_with('|') { - *in_table = true; - buf.push(line.to_string()); - return true; + self.push_verbatim(line); + true } - if line.trim().is_empty() { - if *in_table { - flush_buffer(buf, in_table, out); + + fn handle_table_line(&mut self, line: &str) -> bool { + if line.trim_start().starts_with('|') { + self.in_table = true; + self.buf.push(line.to_string()); + return true; } - return false; - } - if *in_table && (line.contains('|') || crate::table::SEP_RE.is_match(line.trim())) { - buf.push(line.to_string()); - return true; - } - if *in_table { - if classify_block(line).is_some() { - // Flush when a new Markdown block (heading, list, quote, footnote, directive, - // or digit-prefixed text) begins so wrapping and table detection stay aligned. - flush_buffer(buf, in_table, out); + if line.trim().is_empty() { + if self.in_table { + self.flush(); + } return false; } - // Plain paragraphs also end the table so the caller can reprocess them for wrapping. - flush_buffer(buf, in_table, out); - return false; + if self.in_table && (line.contains('|') || crate::table::SEP_RE.is_match(line.trim())) { + self.buf.push(line.to_string()); + return true; + } + if self.in_table { + if classify_block(line).is_some() { + // Flush when a new Markdown block begins so wrapping and table + // detection stay aligned. + self.flush(); + return false; + } + self.flush(); + } + false } - false } /// Processes a stream of Markdown lines using the provided [`Options`]. @@ -157,32 +152,35 @@ pub fn process_stream_inner(lines: &[String], opts: Options) -> Vec { let pre = convert_html_tables(&lines); - let mut out = Vec::new(); - let mut buf = Vec::new(); + let mut state = ProcessBuffer { + out: Vec::new(), + buf: Vec::new(), + in_table: false, + }; // Track fences so subsequent logic respects shared semantics. let mut fence_tracker = FenceTracker::default(); - let mut in_table = false; for line in &pre { - if handle_fence_line(line, &mut buf, &mut in_table, &mut out, &mut fence_tracker) { + if state.handle_fence_line(line, &mut fence_tracker) { continue; } if fence_tracker.in_fence() { - out.push(line.clone()); + state.out.push(line.clone()); continue; } - if handle_table_line(line, &mut buf, &mut in_table, &mut out) { + if state.handle_table_line(line) { continue; } - flush_buffer(&mut buf, &mut in_table, &mut out); - out.push(line.clone()); + state.flush(); + state.out.push(line.clone()); } - flush_buffer(&mut buf, &mut in_table, &mut out); + state.flush(); + let mut out = state.out; if opts.headings { out = crate::headings::convert_setext_headings(&out); } diff --git a/src/table.rs b/src/table.rs index 7e0fc700..66d8c779 100644 --- a/src/table.rs +++ b/src/table.rs @@ -144,16 +144,10 @@ fn parse_and_validate(trimmed: &[String], sep_line: Option<&String>) -> Option

], - output_rows: &[Vec], - sep_cells: Option>, - max_cols: usize, - indent: &str, -) -> Vec { - let widths = crate::reflow::calculate_widths(cleaned, max_cols); - let out = crate::reflow::format_rows(output_rows, &widths, indent); - crate::reflow::insert_separator(out, sep_cells, &widths, indent) +fn calculate_and_format(parsed: &ParsedTable, indent: &str) -> Vec { + let widths = crate::reflow::calculate_widths(&parsed.cleaned, parsed.max_cols); + let out = crate::reflow::format_rows(&parsed.output_rows, &widths, indent); + crate::reflow::insert_separator(out, parsed.sep_cells.clone(), &widths, indent) } /// Reflow a Markdown table so columns align uniformly. @@ -188,13 +182,7 @@ pub fn reflow_table(lines: &[String]) -> Vec { return lines.to_vec(); }; - calculate_and_format( - &parsed.cleaned, - &parsed.output_rows, - parsed.sep_cells, - parsed.max_cols, - &indent, - ) + calculate_and_format(&parsed, &indent) } #[cfg(test)] diff --git a/src/wrap.rs b/src/wrap.rs index fbad449b..4499eb62 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -8,6 +8,8 @@ //! The [`Token`] enum and [`tokenize_markdown`] function are public so callers //! can perform custom token-based processing. +use std::borrow::Cow; + mod block; mod fence; mod inline; @@ -17,7 +19,7 @@ mod tokenize; use block::{BLOCKQUOTE_RE, BULLET_RE, FOOTNOTE_RE}; pub(crate) use block::{BlockKind, classify_block}; pub use fence::{FenceTracker, is_fence}; -use paragraph::{flush_paragraph, handle_prefix_line}; +use paragraph::{ParagraphState, ParagraphWriter, PrefixLine}; /// Token emitted by the `tokenize::segment_inline` parser and used by /// higher-level wrappers. /// @@ -46,6 +48,74 @@ fn is_indented_code_line(line: &str) -> bool { indent_width >= 4 && line.chars().any(|c| !c.is_whitespace()) } +fn is_table_or_separator(line: &str) -> bool { + line.trim_start().starts_with('|') || crate::table::SEP_RE.is_match(line.trim()) +} + +fn is_passthrough_block(line: &str) -> bool { + is_table_or_separator(line) + || matches!( + classify_block(line), + Some(BlockKind::Heading | BlockKind::MarkdownlintDirective) + ) + || line.trim().is_empty() + || is_indented_code_line(line) +} + +fn prefix_line(line: &str) -> Option> { + if let Some(cap) = BULLET_RE.captures(line) { + let prefix = cap.get(1).expect("bullet regex capture").as_str(); + let rest = cap.get(2).expect("bullet regex remainder capture").as_str(); + return Some(PrefixLine { + prefix: Cow::Borrowed(prefix), + rest, + repeat_prefix: false, + }); + } + + if let Some(cap) = FOOTNOTE_RE.captures(line) { + let prefix = cap.get(1).expect("footnote prefix capture").as_str(); + let marker = cap.get(2).expect("footnote marker capture").as_str(); + let rest = cap + .get(3) + .expect("footnote regex remainder capture") + .as_str(); + return Some(PrefixLine { + prefix: Cow::Owned(format!("{prefix}{marker}")), + rest, + repeat_prefix: false, + }); + } + + BLOCKQUOTE_RE.captures(line).map(|cap| PrefixLine { + prefix: Cow::Borrowed(cap.get(1).expect("blockquote prefix capture").as_str()), + rest: cap + .get(2) + .expect("blockquote regex remainder capture") + .as_str(), + repeat_prefix: true, + }) +} + +fn line_break_parts(line: &str) -> (String, bool) { + let trimmed_end = line.trim_end(); + let text_without_html_breaks = trimmed_end + .trim_end_matches("
") + .trim_end_matches("
") + .trim_end_matches("
"); + + let is_trailing_spaces = line.ends_with(" "); + let is_html_br = trimmed_end != text_without_html_breaks; + let backslash_count = trimmed_end.chars().rev().take_while(|&c| c == '\\').count(); + let is_backslash_escape = backslash_count % 2 == 1; + let hard_break = is_trailing_spaces || is_html_br || is_backslash_escape; + let text = text_without_html_breaks + .trim_start() + .trim_end_matches(' ') + .to_string(); + (text, hard_break) +} + /// Wrap text lines to the given width. /// /// # Panics @@ -53,122 +123,37 @@ fn is_indented_code_line(line: &str) -> bool { #[must_use] pub fn wrap_text(lines: &[String], width: usize) -> Vec { let mut out = Vec::new(); - let mut buf: Vec<(String, bool)> = Vec::new(); - let mut indent = String::new(); + let mut state = ParagraphState::default(); + let mut writer = ParagraphWriter::new(&mut out, width); // Track fenced code blocks so wrapping honours shared fence semantics. let mut fence_tracker = FenceTracker::default(); for line in lines { - if fence::handle_fence_line( - &mut out, - &mut buf, - &mut indent, - width, - line, - &mut fence_tracker, - ) { + if fence::handle_fence_line(line, &mut writer, &mut state, &mut fence_tracker) { continue; } if fence_tracker.in_fence() { - out.push(line.clone()); + writer.push_verbatim(&mut state, line); continue; } - if line.trim_start().starts_with('|') || crate::table::SEP_RE.is_match(line.trim()) { - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(line.clone()); + if is_passthrough_block(line) { + writer.push_verbatim(&mut state, line); continue; } - if matches!( - classify_block(line), - Some(BlockKind::Heading | BlockKind::MarkdownlintDirective) - ) { - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(line.clone()); + if let Some(prefix_line) = prefix_line(line) { + writer.handle_prefix_line(&mut state, &prefix_line); continue; } - if line.trim().is_empty() { - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(String::new()); - continue; - } - - if let Some(cap) = BULLET_RE.captures(line) { - let prefix = cap.get(1).expect("bullet regex capture").as_str(); - let rest = cap.get(2).expect("bullet regex remainder capture").as_str(); - handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, false); - continue; - } - - if let Some(cap) = FOOTNOTE_RE.captures(line) { - let prefix = format!("{}{}", &cap[1], &cap[2]); - let rest = cap - .get(3) - .expect("footnote regex remainder capture") - .as_str(); - handle_prefix_line(&mut out, &mut buf, &mut indent, width, &prefix, rest, false); - continue; - } - - if let Some(cap) = BLOCKQUOTE_RE.captures(line) { - let prefix = cap.get(1).expect("blockquote prefix capture").as_str(); - let rest = cap - .get(2) - .expect("blockquote regex remainder capture") - .as_str(); - handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, true); - continue; - } - - if is_indented_code_line(line) { - // Preserve indented code blocks verbatim so wrapping does not merge them into - // paragraphs. - flush_paragraph(&mut out, &buf, &indent, width); - buf.clear(); - indent.clear(); - out.push(line.clone()); - continue; - } - - if buf.is_empty() { - indent = line.chars().take_while(|c| c.is_whitespace()).collect(); - } - let trimmed_end = line.trim_end(); - let text_without_html_breaks = trimmed_end - .trim_end_matches("
") - .trim_end_matches("
") - .trim_end_matches("
"); - - let is_trailing_spaces = line.ends_with(" "); - let is_html_br = trimmed_end != text_without_html_breaks; - let backslash_count = line - .trim_end() - .chars() - .rev() - .take_while(|&c| c == '\\') - .count(); - let is_backslash_escape = backslash_count % 2 == 1; - - let hard_break = is_trailing_spaces || is_html_br || is_backslash_escape; - - let text = text_without_html_breaks - .trim_start() - .trim_end_matches(' ') - .to_string(); - - buf.push((text, hard_break)); + state.note_indent(line); + let (text, hard_break) = line_break_parts(line); + state.push(text, hard_break); } - flush_paragraph(&mut out, &buf, &indent, width); + writer.flush_paragraph(&mut state); out } diff --git a/src/wrap/fence.rs b/src/wrap/fence.rs index 19d543d7..1eaea4f4 100644 --- a/src/wrap/fence.rs +++ b/src/wrap/fence.rs @@ -2,6 +2,8 @@ use regex::Regex; +use super::paragraph::{ParagraphState, ParagraphWriter}; + pub(super) static FENCE_RE: std::sync::LazyLock = // Capture: indent, fence run of 3+ backticks/tilde, and the full info string (incl. leading // spaces) @@ -41,21 +43,16 @@ pub fn is_fence(line: &str) -> Option<(&str, &str, &str)> { /// /// Returns `true` if the line was processed as a fence. pub(crate) fn handle_fence_line( - out: &mut Vec, - buf: &mut Vec<(String, bool)>, - indent: &mut String, - width: usize, line: &str, + writer: &mut ParagraphWriter<'_>, + state: &mut ParagraphState, tracker: &mut FenceTracker, ) -> bool { if !tracker.observe(line) { return false; } - super::flush_paragraph(out, buf, indent, width); - buf.clear(); - indent.clear(); - out.push(line.to_string()); + writer.push_verbatim(state, line); true } diff --git a/src/wrap/inline.rs b/src/wrap/inline.rs index 78f261a2..8b8027fd 100644 --- a/src/wrap/inline.rs +++ b/src/wrap/inline.rs @@ -4,9 +4,14 @@ //! inline code, links, and trailing punctuation without reimplementing the //! grouping logic in multiple places. +use std::ops::Range; + use unicode_width::UnicodeWidthStr; -use super::{line_buffer::LineBuffer, tokenize}; +use super::{ + line_buffer::{LineBuffer, SplitContext}, + tokenize, +}; #[derive(Copy, Clone, PartialEq, Eq)] enum SpanKind { @@ -167,24 +172,23 @@ pub(super) fn attach_punctuation_to_previous_line( fn push_span_with_carry( buffer: &mut LineBuffer, tokens: &[String], - start: usize, - end: usize, + span: Range, carried_whitespace: &mut String, ) { - if start >= end { + if span.start >= span.end { return; } if carried_whitespace.is_empty() { - buffer.push_span(tokens, start, end); + buffer.push_span(tokens, span.start, span.end); return; } let mut first_token = std::mem::take(carried_whitespace); - first_token.push_str(tokens[start].as_str()); + first_token.push_str(tokens[span.start].as_str()); buffer.push_token(first_token.as_str()); - if start + 1 < end { - buffer.push_span(tokens, start + 1, end); + if span.start + 1 < span.end { + buffer.push_span(tokens, span.start + 1, span.end); } } @@ -201,12 +205,13 @@ pub(super) fn wrap_preserving_code(text: &str, width: usize) -> Vec { while i < tokens.len() { let (group_end, group_width) = determine_token_span(&tokens, i); - let span_is_whitespace = tokens[i..group_end] + let span = i..group_end; + let span_is_whitespace = tokens[span.clone()] .iter() .all(|tok| is_whitespace_token(tok)); if span_is_whitespace && !carried_whitespace.is_empty() && group_end != tokens.len() { - for tok in &tokens[i..group_end] { + for tok in &tokens[span.clone()] { carried_whitespace.push_str(tok); } i = group_end; @@ -220,31 +225,35 @@ pub(super) fn wrap_preserving_code(text: &str, width: usize) -> Vec { } if buffer.width() + group_width <= width { - push_span_with_carry(&mut buffer, &tokens, i, group_end, &mut carried_whitespace); + push_span_with_carry(&mut buffer, &tokens, span.clone(), &mut carried_whitespace); i = group_end; continue; } - if buffer.split_with_span(&mut lines, &tokens, i, group_end, width) { + let mut split = SplitContext { + lines: &mut lines, + width, + }; + if buffer.split_with_span(&mut split, &tokens, span.clone()) { i = group_end; continue; } - if buffer.flush_trailing_whitespace(&mut lines, &tokens, i, group_end) { + if buffer.flush_trailing_whitespace(&mut lines, &tokens, span.clone()) { i = group_end; continue; } buffer.flush_into(&mut lines); if span_is_whitespace { - for tok in &tokens[i..group_end] { + for tok in &tokens[span] { carried_whitespace.push_str(tok); } i = group_end; continue; } - push_span_with_carry(&mut buffer, &tokens, i, group_end, &mut carried_whitespace); + push_span_with_carry(&mut buffer, &tokens, i..group_end, &mut carried_whitespace); i = group_end; } diff --git a/src/wrap/line_buffer.rs b/src/wrap/line_buffer.rs index feff9c55..0eafa3b2 100644 --- a/src/wrap/line_buffer.rs +++ b/src/wrap/line_buffer.rs @@ -3,6 +3,8 @@ //! This module encapsulates the mutable state required to accumulate tokens into //! wrapped lines while reusing allocations between iterations. +use std::ops::Range; + use unicode_width::UnicodeWidthStr; #[derive(Default)] @@ -12,6 +14,11 @@ pub(crate) struct LineBuffer { last_split: Option, } +pub(crate) struct SplitContext<'a> { + pub(crate) lines: &'a mut Vec, + pub(crate) width: usize, +} + impl LineBuffer { pub(crate) fn new() -> Self { Self::default() } @@ -60,11 +67,9 @@ impl LineBuffer { pub(crate) fn split_with_span( &mut self, - lines: &mut Vec, + ctx: &mut SplitContext<'_>, tokens: &[String], - start: usize, - end: usize, - width: usize, + span: Range, ) -> bool { let Some(pos) = self.last_split else { return false; @@ -89,24 +94,26 @@ impl LineBuffer { }; if let Some((start_idx, end_idx)) = head_bounds { - lines.push(self.text[start_idx..end_idx].to_owned()); + ctx.lines.push(self.text[start_idx..end_idx].to_owned()); } self.text.drain(..trimmed_tail_start); - for tok in &tokens[start..end] { + for tok in &tokens[span.clone()] { self.text.push_str(tok); } self.width = UnicodeWidthStr::width(self.text.as_str()); - if end > start && tokens[end - 1].chars().all(char::is_whitespace) && !self.text.is_empty() + if span.end > span.start + && tokens[span.end - 1].chars().all(char::is_whitespace) + && !self.text.is_empty() { self.last_split = Some(self.text.len()); } else { self.last_split = None; } - if self.width > width { - lines.push(self.text.trim_end().to_string()); + if self.width > ctx.width { + ctx.lines.push(self.text.trim_end().to_string()); self.text.clear(); self.width = 0; self.last_split = None; @@ -119,13 +126,12 @@ impl LineBuffer { &mut self, lines: &mut Vec, tokens: &[String], - start: usize, - end: usize, + span: Range, ) -> bool { - if end != tokens.len() { + if span.end != tokens.len() { return false; } - if !tokens[start..end] + if !tokens[span.clone()] .iter() .all(|tok| tok.chars().all(char::is_whitespace)) { @@ -137,7 +143,7 @@ impl LineBuffer { return true; } - for tok in &tokens[start..end] { + for tok in &tokens[span] { self.text.push_str(tok); } lines.push(std::mem::take(&mut self.text)); diff --git a/src/wrap/paragraph.rs b/src/wrap/paragraph.rs index 79e52a17..358c946f 100644 --- a/src/wrap/paragraph.rs +++ b/src/wrap/paragraph.rs @@ -3,82 +3,117 @@ //! These helpers keep paragraph logic focused on buffer management while //! deferring inline wrapping to `inline::wrap_preserving_code`. +use std::borrow::Cow; + use unicode_width::UnicodeWidthStr; use super::inline::wrap_preserving_code; -fn append_wrapped_with_prefix( - out: &mut Vec, - prefix: &str, - text: &str, - width: usize, - repeat_prefix: bool, -) { - let prefix_width = UnicodeWidthStr::width(prefix); - let available = width.saturating_sub(prefix_width).max(1); - let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect(); - let indent_width = UnicodeWidthStr::width(indent_str.as_str()); - let wrapped_indent = if repeat_prefix { - prefix.to_string() - } else { - format!("{}{}", indent_str, " ".repeat(prefix_width - indent_width)) - }; - - let lines = wrap_preserving_code(text, available); - if lines.is_empty() { - out.push(prefix.to_string()); - return; +pub(super) struct PrefixLine<'a> { + pub(super) prefix: Cow<'a, str>, + pub(super) rest: &'a str, + pub(super) repeat_prefix: bool, +} + +#[derive(Default)] +pub(super) struct ParagraphState { + buf: Vec<(String, bool)>, + indent: String, +} + +impl ParagraphState { + pub(super) fn clear(&mut self) { + self.buf.clear(); + self.indent.clear(); } - for (i, line) in lines.iter().enumerate() { - if i == 0 { - out.push(format!("{prefix}{line}")); - } else { - out.push(format!("{wrapped_indent}{line}")); + pub(super) fn note_indent(&mut self, line: &str) { + if self.buf.is_empty() { + self.indent = line.chars().take_while(|c| c.is_whitespace()).collect(); } } + + pub(super) fn push(&mut self, text: String, hard_break: bool) { + self.buf.push((text, hard_break)); + } } -pub(super) fn flush_paragraph( - out: &mut Vec, - buf: &[(String, bool)], - indent: &str, +pub(super) struct ParagraphWriter<'a> { + out: &'a mut Vec, width: usize, -) { - if buf.is_empty() { - return; +} + +impl<'a> ParagraphWriter<'a> { + pub(super) fn new(out: &'a mut Vec, width: usize) -> Self { Self { out, width } } + + fn append_wrapped_with_prefix(&mut self, line: &PrefixLine<'_>) { + let prefix = line.prefix.as_ref(); + let prefix_width = UnicodeWidthStr::width(prefix); + let available = self.width.saturating_sub(prefix_width).max(1); + let indent_str: String = prefix.chars().take_while(|c| c.is_whitespace()).collect(); + let indent_width = UnicodeWidthStr::width(indent_str.as_str()); + let wrapped_indent = if line.repeat_prefix { + prefix.to_string() + } else { + format!("{}{}", indent_str, " ".repeat(prefix_width - indent_width)) + }; + + let lines = wrap_preserving_code(line.rest, available); + if lines.is_empty() { + self.out.push(prefix.to_string()); + return; + } + + for (index, wrapped_line) in lines.iter().enumerate() { + if index == 0 { + self.out.push(format!("{prefix}{wrapped_line}")); + } else { + self.out.push(format!("{wrapped_indent}{wrapped_line}")); + } + } } - let mut segment = String::new(); - for (text, hard_break) in buf { - if !segment.is_empty() { - segment.push(' '); + + pub(super) fn flush_paragraph(&mut self, state: &mut ParagraphState) { + if state.buf.is_empty() { + return; } - segment.push_str(text); - if *hard_break { - for line in wrap_preserving_code(&segment, width - indent.len()) { - out.push(format!("{indent}{line}")); + + let mut segment = String::new(); + for (text, hard_break) in &state.buf { + if !segment.is_empty() { + segment.push(' '); + } + segment.push_str(text); + if *hard_break { + self.push_wrapped_segment(&state.indent, &segment); + segment.clear(); } - segment.clear(); } + + if !segment.is_empty() { + self.push_wrapped_segment(&state.indent, &segment); + } + + state.clear(); } - if !segment.is_empty() { - for line in wrap_preserving_code(&segment, width - indent.len()) { - out.push(format!("{indent}{line}")); + + fn push_wrapped_segment(&mut self, indent: &str, segment: &str) { + for line in wrap_preserving_code(segment, self.width - indent.len()) { + self.out.push(format!("{indent}{line}")); } } -} -pub(super) fn handle_prefix_line( - out: &mut Vec, - buf: &mut Vec<(String, bool)>, - indent: &mut String, - width: usize, - prefix: &str, - rest: &str, - repeat_prefix: bool, -) { - flush_paragraph(out, buf, indent, width); - buf.clear(); - indent.clear(); - append_wrapped_with_prefix(out, prefix, rest, width, repeat_prefix); + pub(super) fn push_verbatim(&mut self, state: &mut ParagraphState, line: &str) { + self.flush_paragraph(state); + self.out.push(line.to_string()); + } + + pub(super) fn handle_prefix_line( + &mut self, + state: &mut ParagraphState, + prefix_line: &PrefixLine<'_>, + ) { + self.flush_paragraph(state); + self.append_wrapped_with_prefix(prefix_line); + } } diff --git a/src/wrap/tests.rs b/src/wrap/tests.rs index aba72934..6e244eae 100644 --- a/src/wrap/tests.rs +++ b/src/wrap/tests.rs @@ -7,7 +7,7 @@ use rstest::rstest; use super::{ inline::{attach_punctuation_to_previous_line, determine_token_span, wrap_preserving_code}, - line_buffer::LineBuffer, + line_buffer::{LineBuffer, SplitContext}, tokenize::segment_inline, }; use crate::wrap::{BlockKind, classify_block, wrap_text}; @@ -123,7 +123,11 @@ fn line_buffer_split_preserves_multi_space_lines() { buffer.push_span(&tokens, 0, 2); let mut lines = Vec::new(); - assert!(buffer.split_with_span(&mut lines, &tokens, 2, 4, 8)); + let mut split = SplitContext { + lines: &mut lines, + width: 8, + }; + assert!(buffer.split_with_span(&mut split, &tokens, 2..4)); assert_eq!(lines, vec!["alpha ".to_string()]); assert_eq!(buffer.text(), "beta "); assert_eq!( @@ -139,7 +143,11 @@ fn line_buffer_split_trims_single_trailing_space() { buffer.push_span(&tokens, 0, 2); let mut lines = Vec::new(); - assert!(buffer.split_with_span(&mut lines, &tokens, 2, 3, 5)); + let mut split = SplitContext { + lines: &mut lines, + width: 5, + }; + assert!(buffer.split_with_span(&mut split, &tokens, 2..3)); assert_eq!(lines, vec!["alpha".to_string()]); assert_eq!(buffer.text(), "beta"); assert_eq!( @@ -160,7 +168,11 @@ fn line_buffer_split_tracks_multiple_whitespace_tokens() { buffer.push_span(&tokens, 0, 3); let mut lines = Vec::new(); - assert!(buffer.split_with_span(&mut lines, &tokens, 3, 4, 4)); + let mut split = SplitContext { + lines: &mut lines, + width: 4, + }; + assert!(buffer.split_with_span(&mut split, &tokens, 3..4)); assert_eq!(lines, vec!["foo ".to_string()]); assert_eq!(buffer.text(), "bar"); } @@ -176,8 +188,7 @@ fn line_buffer_trailing_whitespace_flushes_line() { assert!(buffer.flush_trailing_whitespace( &mut lines, &whitespace_tokens, - 0, - whitespace_tokens.len(), + 0..whitespace_tokens.len() )); assert_eq!(lines, vec!["foo ".to_string()]); assert!(buffer.text().is_empty()); diff --git a/src/wrap/tokenize/mod.rs b/src/wrap/tokenize/mod.rs index c7501861..7b0e0625 100644 --- a/src/wrap/tokenize/mod.rs +++ b/src/wrap/tokenize/mod.rs @@ -71,13 +71,11 @@ pub(super) fn segment_inline(text: &str) -> Vec { i = scan_while(text, i, char::is_whitespace); tokens.push(collect_range(text, start, i)); continue; - } else if ch == '`' { + } + + if ch == '`' { if has_odd_backslash_escape_bytes(bytes, i) { - if let Some(last) = tokens.last_mut() { - last.push('`'); - } else { - tokens.push(String::from("`")); - } + append_escaped_backtick(&mut tokens); i += ch.len_utf8(); continue; } @@ -99,31 +97,51 @@ pub(super) fn segment_inline(text: &str) -> Vec { tokens.push(collect_range(text, punct_start, new_i)); } i = new_i; - } else { - let start = i; - while i < text.len() { - let Some(current) = text[i..].chars().next() else { - break; - }; - if current.is_whitespace() || current == '`' { - break; - } - let current_escaped = has_odd_backslash_escape_bytes(bytes, i); - if current == '[' { - if !current_escaped && !bracket_follows_escaped_bang(bytes, i) { - break; - } - } else if looks_like_image_start(text, i, current) && !current_escaped { - break; - } - i += current.len_utf8(); - } - tokens.push(collect_range(text, start, i)); + continue; } + + let start = i; + i = scan_plain_text_end(text, bytes, i); + tokens.push(collect_range(text, start, i)); } tokens } +fn append_escaped_backtick(tokens: &mut Vec) { + if let Some(last) = tokens.last_mut() { + last.push('`'); + } else { + tokens.push(String::from("`")); + } +} + +fn scan_plain_text_end(text: &str, bytes: &[u8], mut index: usize) -> usize { + while index < text.len() { + let Some(current) = text[index..].chars().next() else { + break; + }; + if current.is_whitespace() || current == '`' { + break; + } + + let current_escaped = has_odd_backslash_escape_bytes(bytes, index); + if should_stop_plain_text(text, bytes, index, (current, current_escaped)) { + break; + } + + index += current.len_utf8(); + } + index +} + +fn should_stop_plain_text(text: &str, bytes: &[u8], index: usize, current: (char, bool)) -> bool { + let (ch, is_escaped) = current; + if ch == '[' { + return !is_escaped && !bracket_follows_escaped_bang(bytes, index); + } + looks_like_image_start(text, index, ch) && !is_escaped +} + fn next_token(line: &str, offset: usize) -> Option<(Token<'_>, usize)> { if offset >= line.len() { return None; diff --git a/src/wrap/tokenize/scanning.rs b/src/wrap/tokenize/scanning.rs index ef0edfdb..76efad2a 100644 --- a/src/wrap/tokenize/scanning.rs +++ b/src/wrap/tokenize/scanning.rs @@ -82,31 +82,33 @@ mod tests { use super::*; + struct ScanCollectCase { + text: &'static str, + start: usize, + predicate: Option bool>, + end: Option, + expected_idx: Option, + expected_str: Option<&'static str>, + } + #[rstest] - #[case::alpha_prefix("abc123", 0, Some(char::is_alphabetic as fn(char) -> bool), None, Some(3), None)] - #[case::numeric_suffix("abc123", 3, Some(char::is_numeric as fn(char) -> bool), None, Some("abc123".len()), None)] - #[case::multibyte_scan("åßç123", 0, Some(char::is_alphabetic as fn(char) -> bool), None, Some("åßç123".find('1').unwrap_or("åßç123".len())), Some("åßç"))] - #[case::collect_first_two("αβγδε", 0, None, Some("αβ".len()), None, Some("αβ"))] - #[case::collect_middle("αβγδε", "αβ".len(), None, Some("αβ".len() + "γδ".len()), None, Some("γδ"))] - fn scan_and_collect_cases( - #[case] text: &str, - #[case] start: usize, - #[case] predicate: Option bool>, - #[case] end: Option, - #[case] expected_idx: Option, - #[case] expected_str: Option<&str>, - ) { - if let Some(pred) = predicate { - let idx = scan_while(text, start, pred); - if let Some(expected) = expected_idx { + #[case::alpha_prefix(ScanCollectCase { text: "abc123", start: 0, predicate: Some(char::is_alphabetic as fn(char) -> bool), end: None, expected_idx: Some(3), expected_str: None })] + #[case::numeric_suffix(ScanCollectCase { text: "abc123", start: 3, predicate: Some(char::is_numeric as fn(char) -> bool), end: None, expected_idx: Some("abc123".len()), expected_str: None })] + #[case::multibyte_scan(ScanCollectCase { text: "åßç123", start: 0, predicate: Some(char::is_alphabetic as fn(char) -> bool), end: None, expected_idx: Some("åßç123".find('1').unwrap_or("åßç123".len())), expected_str: Some("åßç") })] + #[case::collect_first_two(ScanCollectCase { text: "αβγδε", start: 0, predicate: None, end: Some("αβ".len()), expected_idx: None, expected_str: Some("αβ") })] + #[case::collect_middle(ScanCollectCase { text: "αβγδε", start: "αβ".len(), predicate: None, end: Some("αβ".len() + "γδ".len()), expected_idx: None, expected_str: Some("γδ") })] + fn scan_and_collect_cases(#[case] case: ScanCollectCase) { + if let Some(pred) = case.predicate { + let idx = scan_while(case.text, case.start, pred); + if let Some(expected) = case.expected_idx { assert_eq!(idx, expected); } - if let Some(expected_slice) = expected_str { - assert_eq!(&text[..idx], expected_slice); + if let Some(expected_slice) = case.expected_str { + assert_eq!(&case.text[..idx], expected_slice); } - } else if let Some(end_idx) = end { - let collected = collect_range(text, start, end_idx); - if let Some(expected_slice) = expected_str { + } else if let Some(end_idx) = case.end { + let collected = collect_range(case.text, case.start, end_idx); + if let Some(expected_slice) = case.expected_str { assert_eq!(collected, expected_slice); } } else { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 663f68f0..62016b97 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -52,31 +52,43 @@ pub fn assert_wrapped_list_item(output: &[String], prefix: &str, expected: usize let mut open: Option = None; for line in output { - let chars: Vec = line.chars().collect(); - let mut i = 0; - while i < chars.len() { - if chars[i] == '`' { - let mut len = 0; - while i < chars.len() && chars[i] == '`' { - len += 1; - i += 1; - } - if let Some(open_len) = open { - if open_len == len { - open = None; - } - } else { - open = Some(len); - } - } else { - i += 1; - } - } + scan_code_spans(line, &mut open); assert!(open.is_none(), "code span split across lines"); } assert!(open.is_none(), "unclosed code span"); } +fn scan_code_spans(line: &str, open: &mut Option) { + let chars: Vec = line.chars().collect(); + let mut i = 0; + while i < chars.len() { + if chars[i] != '`' { + i += 1; + continue; + } + + let len = count_backticks(&chars, &mut i); + toggle_code_span(open, len); + } +} + +fn count_backticks(chars: &[char], index: &mut usize) -> usize { + let mut len = 0; + while *index < chars.len() && chars[*index] == '`' { + len += 1; + *index += 1; + } + len +} + +fn toggle_code_span(open: &mut Option, len: usize) { + if open.is_some_and(|open_len| open_len == len) { + *open = None; + } else { + *open = Some(len); + } +} + /// Assert that every line in a blockquote starts with the given prefix and is at most 80 /// characters. ///