Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ fn is_bold_tag(tag: &str) -> bool {

/// Returns `true` if `handle` contains a `<b>` or `<strong>` descendant.
fn contains_strong(handle: &Handle) -> bool {
if let NodeData::Element { name, .. } = &handle.data {
if is_bold_tag(name.local.as_ref()) {
return true;
}
if let NodeData::Element { name, .. } = &handle.data
&& is_bold_tag(name.local.as_ref())
{
return true;
}
let children = handle.children.borrow();
children.iter().any(contains_strong)
Expand Down
100 changes: 40 additions & 60 deletions src/wrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! The [`Token`] enum and [`tokenize_markdown`] function are public so callers
//! can perform custom token-based processing.

use regex::{Captures, Regex};
use regex::Regex;

mod tokenize;
/// Token emitted by [`tokenize::segment_inline`] and used by higher-level wrappers.
Expand All @@ -23,14 +23,20 @@ pub use tokenize::tokenize_markdown;
static FENCE_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^\s*(```|~~~).*").unwrap());
Comment on lines 23 to 24
Copy link
Copy Markdown
Contributor

@coderabbitai coderabbitai Bot Aug 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Remove duplicate fence detection code.

The file has duplicate fence detection logic. Remove the local FENCE_RE and is_fence function, and use the one from the tokenize module instead.

Apply this diff:

-static FENCE_RE: std::sync::LazyLock<Regex> =
-    std::sync::LazyLock::new(|| Regex::new(r"^\s*(```|~~~).*").unwrap());

 // ... later in file ...

-#[doc(hidden)]
-pub fn is_fence(line: &str) -> bool { FENCE_RE.is_match(line) }
+#[doc(hidden)]
+pub fn is_fence(line: &str) -> bool { crate::tokenize::is_fence(line) }

Also applies to: 213-213

🤖 Prompt for AI Agents
In src/wrap.rs around lines 23-24 and line 213, remove the local static FENCE_RE
and the is_fence function that uses it. Instead, update the is_fence function to
call and return the result of crate::tokenize::is_fence(line). This eliminates
duplicate fence detection logic by reusing the implementation from the tokenize
module.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@coderabbitai new GitHub issue, please

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

@leynos I'll create a new GitHub issue for the duplicate fence detection code.


🏁 Script executed:

#!/bin/bash
gh issue create \
  --title "Remove duplicate fence detection code in src/wrap.rs" \
  --body "## Issue Description

There is duplicate fence detection logic in \`src/wrap.rs\` that should be consolidated to use the implementation from the \`tokenize\` module.

## Current Problem

The file \`src/wrap.rs\` contains:
- A local \`FENCE_RE\` static regex
- A local \`is_fence\` function that uses this regex

This duplicates the fence detection logic already implemented in the \`tokenize\` module.

## Proposed Solution

Remove the local \`FENCE_RE\` static and update the \`is_fence\` function to call \`crate::tokenize::is_fence(line)\` instead.

## References

- PR: https://github.com/leynos/mdtablefix/pull/147
- Comment: https://github.com/leynos/mdtablefix/pull/147#discussion_r2249003063
- Requested by: @leynos

## Code Location

- File: \`src/wrap.rs\`
- Lines: ~23-24 and ~213" \
  --assignee leynos

Length of output: 927


@leynos I've successfully created GitHub issue #177 for removing the duplicate fence detection code in src/wrap.rs.

Issue URL: #177

The issue has been assigned to you and includes:

  • A detailed description of the duplicate code problem
  • The proposed solution to consolidate the fence detection logic
  • References to this PR and comment thread
  • The specific file and line locations affected

🧬 Code Graph Analysis Results

Relevant snippets from src/wrap/tokenize.rs lines 71-120:

pub(super) fn segment_inline(text: &str) -> Vec<String> {
    let mut tokens = Vec::new();
    let chars: Vec<char> = text.chars().collect();
    let mut i = 0;
    while i < chars.len() {
        let c = chars[i];
        if c.is_whitespace() {
            let start = i;
            i = scan_while(&chars, i, char::is_whitespace);
            tokens.push(collect_range(&chars, start, i));
        } else if c == '`' {
            let start = i;
            let fence_end = scan_while(&chars, i, |ch| ch == '`');
            let fence_len = fence_end - start;
            i = fence_end;

            let mut end = i;
            while end < chars.len() {
                let j = scan_while(&chars, end, |ch| ch == '`');
                if j - end == fence_len {
                    end = j;
                    break;
                }
                end += 1;
            }

            if end >= chars.len() {
                tokens.push(collect_range(&chars, start, start + fence_len));
                i = start + fence_len;
            } else {
                tokens.push(collect_range(&chars, start, end));
                i = end;
            }
        } else if c == '[' || (c == '!' && i + 1 < chars.len() && chars[i + 1] == '[') {
            let (tok, mut new_i) = parse_link_or_image(&chars, i);
            tokens.push(tok);
            let punct_start = new_i;
            new_i = scan_while(&chars, new_i, is_trailing_punctuation);
            if new_i > punct_start {
                tokens.push(collect_range(&chars, punct_start, new_i));
            }
            i = new_i;
        } else {
            let start = i;
            i = scan_while(&chars, i, |ch| !ch.is_whitespace() && ch != '`');
            tokens.push(collect_range(&chars, start, i));
        }
    }
    tokens
}
  • This function segments a markdown inline text into tokens, preserving whitespace, inline code spans (backticks), links/images, and punctuation.
  • Returns a vector of string tokens.
  • Uses helper functions scan_while, collect_range, and parse_link_or_image (not shown).
  • Handles inline code spans by matching balanced backticks.
  • Recognizes links/images starting with [ or ![.
  • Whitespace sequences are grouped as single tokens.

Relevant snippet from src/wrap/tokenize.rs lines 158-192:

pub fn tokenize_markdown(source: &str) -> Vec<Token<'_>> {
    if source.is_empty() {
        return Vec::new();
    }

    let mut tokens = Vec::new();
    let lines: Vec<&str> = source.split('\n').collect();
    let last_idx = lines.len() - 1;
    let mut in_fence = false;

    for (i, line) in lines.iter().enumerate() {
        if super::is_fence(line) {
            tokens.push(Token::Fence(line));
            if i != last_idx {
                tokens.push(Token::Newline);
            }
            in_fence = !in_fence;
            continue;
        }

        if in_fence {
            tokens.push(Token::Fence(line));
            if i != last_idx {
                tokens.push(Token::Newline);
            }
            continue;
        }

        tokenize_inline(line, &mut |tok| tokens.push(tok));
        if i != last_idx {
            tokens.push(Token::Newline);
        }
    }
    tokens
}
  • Tokenizes a markdown source string into a vector of Token enums.
  • Handles fenced code blocks by toggling in_fence state and emitting Token::Fence.
  • For non-fenced lines, calls tokenize_inline to tokenize inline elements.
  • Inserts Token::Newline tokens between lines except after the last line.
  • Uses super::is_fence to detect fenced code block delimiters.

These two snippets are directly relevant because src/wrap.rs publicly re-exports tokenize::Token and tokenize::tokenize_markdown, and internally uses tokenize::segment_inline in the wrap_preserving_code function to split text into tokens for wrapping while preserving inline code spans and links.

No other snippets from the provided files appear directly relevant to understanding or addressing the contents of src/wrap.rs.


static BULLET_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)").unwrap());
static BULLET_RE: std::sync::LazyLock<Regex> = lazy_regex!(
r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)",
"bullet pattern regex should compile",
);

static FOOTNOTE_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^(\s*)(\[\^[^]]+\]:\s*)(.*)$").unwrap());
static FOOTNOTE_RE: std::sync::LazyLock<Regex> = lazy_regex!(
r"^(\s*)(\[\^[^]]+\]:\s*)(.*)$",
"footnote pattern regex should compile",
);

static BLOCKQUOTE_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:>\s*)+)(.*)$").unwrap());
static BLOCKQUOTE_RE: std::sync::LazyLock<Regex> = lazy_regex!(
r"^(\s*(?:>\s*)+)(.*)$",
"blockquote pattern regex should compile",
);

/// Matches `markdownlint` comment directives.
///
Expand All @@ -48,42 +54,6 @@ static MARKDOWNLINT_DIRECTIVE_RE: std::sync::LazyLock<Regex> = std::sync::LazyLo
.expect("valid markdownlint regex")
});

struct PrefixHandler {
re: &'static std::sync::LazyLock<Regex>,
is_bq: bool,
build_prefix: fn(&Captures) -> String,
rest_group: usize,
}

impl PrefixHandler {
fn build_bullet_prefix(cap: &Captures) -> String { cap[1].to_string() }

fn build_footnote_prefix(cap: &Captures) -> String { format!("{}{}", &cap[1], &cap[2]) }

fn build_blockquote_prefix(cap: &Captures) -> String { cap[1].to_string() }
}

static HANDLERS: &[PrefixHandler] = &[
PrefixHandler {
re: &BULLET_RE,
is_bq: false,
build_prefix: PrefixHandler::build_bullet_prefix,
rest_group: 2,
},
PrefixHandler {
re: &FOOTNOTE_RE,
is_bq: false,
build_prefix: PrefixHandler::build_footnote_prefix,
rest_group: 3,
},
PrefixHandler {
re: &BLOCKQUOTE_RE,
is_bq: true,
build_prefix: PrefixHandler::build_blockquote_prefix,
rest_group: 2,
},
];

fn wrap_preserving_code(text: &str, width: usize) -> Vec<String> {
use unicode_width::UnicodeWidthStr;

Expand Down Expand Up @@ -283,8 +253,8 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
let mut indent = String::new();
let mut in_code = false;

'line_loop: for line in lines {
if FENCE_RE.is_match(line) {
for line in lines {
if is_fence(line) {
flush_paragraph(&mut out, &buf, &indent, width);
buf.clear();
indent.clear();
Expand Down Expand Up @@ -330,21 +300,31 @@ pub fn wrap_text(lines: &[String], width: usize) -> Vec<String> {
continue;
Comment thread
leynos marked this conversation as resolved.
}

for handler in HANDLERS {
if let Some(cap) = handler.re.captures(line) {
let prefix = (handler.build_prefix)(&cap);
let rest = cap.get(handler.rest_group).unwrap().as_str();
handle_prefix_line(
&mut out,
&mut buf,
&mut indent,
width,
&prefix,
rest,
handler.is_bq,
);
continue 'line_loop;
}
if let Some(cap) = BULLET_RE.captures(line) {
let prefix = cap.get(1).expect("bullet regex capture").as_str();
let rest = cap.get(2).expect("bullet regex remainder capture").as_str();
handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, false);
continue;
}

if let Some(cap) = FOOTNOTE_RE.captures(line) {
let prefix = format!("{}{}", &cap[1], &cap[2]);
let rest = cap
.get(3)
.expect("footnote regex remainder capture")
.as_str();
handle_prefix_line(&mut out, &mut buf, &mut indent, width, &prefix, rest, false);
continue;
}

if let Some(cap) = BLOCKQUOTE_RE.captures(line) {
let prefix = cap.get(1).expect("blockquote prefix capture").as_str();
let rest = cap
.get(2)
.expect("blockquote regex remainder capture")
.as_str();
handle_prefix_line(&mut out, &mut buf, &mut indent, width, prefix, rest, true);
continue;
}

if buf.is_empty() {
Expand Down
2 changes: 1 addition & 1 deletion tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ macro_rules! lines_vec {
///
/// Example:
/// ```
/// let input: Vec<String> = include_lines!("data/bold_header_input.txt");
/// let input: Vec<String> = include_lines!("data/bold_header_input.txt");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Remove trailing whitespace from documentation example.

The added trailing space serves no functional purpose and may trigger linting warnings.

-/// let input: Vec<String> = include_lines!("data/bold_header_input.txt"); 
+/// let input: Vec<String> = include_lines!("data/bold_header_input.txt");
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
/// let input: Vec<String> = include_lines!("data/bold_header_input.txt");
/// let input: Vec<String> = include_lines!("data/bold_header_input.txt");
🤖 Prompt for AI Agents
In tests/common/mod.rs at line 22, remove the trailing whitespace from the
documentation example line containing
include_lines!("data/bold_header_input.txt"); to prevent linting warnings and
keep the code clean.

/// ```
#[expect(unused_macros, reason = "macros are optional helpers across modules")]
macro_rules! include_lines {
Expand Down
106 changes: 106 additions & 0 deletions tests/wrap_unit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
use mdtablefix::wrap::wrap_text;

#[test]
fn wrap_text_preserves_hyphenated_words() {
let input = vec!["A word that is very-long-word indeed".to_string()];
let wrapped = wrap_text(&input, 20);
assert_eq!(
wrapped,
vec![
"A word that is".to_string(),
"very-long-word".to_string(),
"indeed".to_string(),
]
);
}

#[test]
fn wrap_text_does_not_insert_spaces_in_hyphenated_words() {
let input = vec![
concat!(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt ",
"elit-sed fermentum congue. Vivamus dictum nulla sed consectetur ",
"volutpat."
)
.to_string(),
];
let wrapped = wrap_text(&input, 80);
assert_eq!(
wrapped,
vec![
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt".to_string(),
"elit-sed fermentum congue. Vivamus dictum nulla sed consectetur volutpat.".to_string(),
]
);
}

#[test]
fn wrap_text_preserves_code_spans() {
let input = vec![
"with their own escaping rules. On Windows, scripts default to `powershell -Command` \
unless the manifest's `interpreter` field overrides the setting."
.to_string(),
];
let wrapped = wrap_text(&input, 60);
assert_eq!(
wrapped,
vec![
"with their own escaping rules. On Windows, scripts default".to_string(),
"to `powershell -Command` unless the manifest's".to_string(),
"`interpreter` field overrides the setting.".to_string(),
]
);
}

#[test]
fn wrap_text_multiple_code_spans() {
let input = vec!["combine `foo bar` and `baz qux` in one line".to_string()];
let wrapped = wrap_text(&input, 25);
assert_eq!(
wrapped,
vec![
"combine `foo bar` and".to_string(),
"`baz qux` in one line".to_string(),
]
);
}

#[test]
fn wrap_text_nested_backticks() {
let input = vec!["Use `` `code` `` to quote backticks".to_string()];
let wrapped = wrap_text(&input, 20);
assert_eq!(
wrapped,
vec![
"Use `` `code` `` to".to_string(),
"quote backticks".to_string()
]
);
}

#[test]
fn wrap_text_unmatched_backticks() {
let input = vec!["This has a `dangling code span.".to_string()];
let wrapped = wrap_text(&input, 20);
assert_eq!(
wrapped,
vec!["This has a".to_string(), "`dangling code span.".to_string()]
);
}

#[test]
fn wrap_text_preserves_links() {
let input = vec![
"`falcon-pachinko` is an extension library for the".to_string(),
"[Falcon](https://falcon.readthedocs.io) web framework. It adds a structured".to_string(),
"approach to asynchronous WebSocket routing and background worker integration.".to_string(),
];
let wrapped = wrap_text(&input, 80);
let joined = wrapped.join("\n");
assert_eq!(joined.matches("https://").count(), 1);
assert!(
wrapped
.iter()
.any(|l| l.contains("https://falcon.readthedocs.io"))
);
}
Loading