From 0e0bd9a48fb9dfc1034b168f8fb4c9657008e4ca Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 30 Jul 2025 00:03:09 +0100 Subject: [PATCH] Add lazy_regex macro and clarify regex error messages --- AGENTS.md | 32 ++++++++++---------- README.md | 2 +- src/ellipsis.rs | 5 ++-- src/footnotes.rs | 17 ++++++----- src/html.rs | 9 ++++-- src/lib.rs | 7 +++++ src/wrap.rs | 76 +++++++++++++++++++++++++++++++++--------------- 7 files changed, 97 insertions(+), 51 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d6350ace..06f65d12 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,8 +9,8 @@ - **Clarity over cleverness.** Be concise, but favour explicit over terse or obscure idioms. Prefer code that's easy to follow. - **Use functions and composition.** Avoid repetition by extracting reusable - logic. Prefer generators or comprehensions, and declarative code to imperative - repetition when readable. + logic. Prefer generators or comprehensions, and declarative code to + imperative repetition when readable. - **Small, meaningful functions.** Functions must be small, clear in purpose, single responsibility, and obey command/query segregation. - **Clear commit messages.** Commit messages should be descriptive, explaining @@ -25,12 +25,14 @@ ("-ize" / "-yse" / "-our") spelling and grammar, with the exception of references to external APIs. - **Illustrate with clear examples.** Function documentation must include clear - examples demonstrating the usage and outcome of the function. Test documentation - should omit examples where the example serves only to reiterate the test logic. -- **Keep file size managable.** No single code file may be longer than 400 lines. + examples demonstrating the usage and outcome of the function. Test + documentation should omit examples where the example serves only to reiterate + the test logic. +- **Keep file size managable.** No single code file may be longer than 400 + lines. Long switch statements or dispatch tables should be broken up by feature and - constituents colocated with targets. Large blocks of test data should be moved - to external data files. + constituents colocated with targets. Large blocks of test data should be + moved to external data files. ## Documentation Maintenance @@ -42,8 +44,8 @@ relevant file(s) in the `docs/` directory to reflect the latest state. **Ensure the documentation remains accurate and current.** - Documentation must use en-GB-oxendict ("-ize" / "-yse" / "-our") spelling - and grammar. (EXCEPTION: the naming of the "LICENSE" file, which - is to be left unchanged for community consistency.) + and grammar. (EXCEPTION: the naming of the "LICENSE" file, which is to be + left unchanged for community consistency.) ## Change Quality & Committing @@ -153,19 +155,19 @@ project: specified in `Cargo.toml` must use SemVer-compatible caret requirements (e.g., `some-crate = "1.2.3"`). This is Cargo's default and allows for safe, non-breaking updates to minor and patch versions while preventing breaking - changes from new major versions. This approach is critical for ensuring - build stability and reproducibility. + changes from new major versions. This approach is critical for ensuring build + stability and reproducibility. - **Prohibit unstable version specifiers.** The use of wildcard (`*`) or - open-ended inequality (`>=`) version requirements is strictly forbidden - as they introduce unacceptable risk and unpredictability. Tilde requirements + open-ended inequality (`>=`) version requirements is strictly forbidden as + they introduce unacceptable risk and unpredictability. Tilde requirements (`~`) should only be used where a dependency must be locked to patch-level updates for a specific, documented reason. ### Error Handling - **Prefer semantic error enums**. Derive `std::error::Error` (via the - `thiserror` crate) for any condition the caller might inspect, retry, or - map to an HTTP status. + `thiserror` crate) for any condition the caller might inspect, retry, or map + to an HTTP status. - **Use an *opaque* error only at the app boundary**. Use `eyre::Report` for human-readable logs; these should not be exposed in public APIs. - **Never export the opaque type from a library**. Convert to domain enums at diff --git a/README.md b/README.md index a7027606..4934a7c9 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ alongside regular Markdown tables. See [HTML table support for more details](docs/architecture.md#html-table-support-in-mdtablefix) -. + . ## Module structure diff --git a/src/ellipsis.rs b/src/ellipsis.rs index 2f51565b..1b84f0b1 100644 --- a/src/ellipsis.rs +++ b/src/ellipsis.rs @@ -5,12 +5,13 @@ //! complete triple remain. Fenced code blocks and inline code spans are left //! untouched. +use std::sync::LazyLock; + use regex::Regex; use crate::wrap::{Token, tokenize_markdown}; -static DOT_RE: std::sync::LazyLock = - std::sync::LazyLock::new(|| Regex::new(r"\.{3,}").unwrap()); +static DOT_RE: LazyLock = lazy_regex!(r"\.{3,}", "ellipsis pattern regex should compile"); /// Replace `...` with `…` outside code spans and fences. #[must_use] diff --git a/src/footnotes.rs b/src/footnotes.rs index cf494fbd..24b5491a 100644 --- a/src/footnotes.rs +++ b/src/footnotes.rs @@ -4,16 +4,19 @@ //! footnote links and rewrites the trailing numeric list into a footnote //! block. Only the final contiguous list of footnotes is processed. +use std::sync::LazyLock; + use regex::{Captures, Regex}; -static INLINE_FN_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| { - Regex::new(r"(?P
^|[^0-9])(?P[.!?);:])(?P