From b004c3e8f17239379bd60c7d646dc4bfdee2e806 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 1 Aug 2025 11:01:41 +0100 Subject: [PATCH 1/3] Add wrap+renumber regression test --- src/wrap.rs | 7 +- .../wrap_renumber_regression_expected.txt | 71 +++++++++++++++++++ tests/data/wrap_renumber_regression_input.txt | 30 ++++++++ tests/wrap_renumber.rs | 17 +++++ 4 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 tests/data/wrap_renumber_regression_expected.txt create mode 100644 tests/data/wrap_renumber_regression_input.txt create mode 100644 tests/wrap_renumber.rs diff --git a/src/wrap.rs b/src/wrap.rs index b26688ee..8dc92dea 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -8,11 +8,12 @@ use regex::{Captures, Regex}; mod tokenize; -/// Token emitted by [`tokenize::segment_inline`] and used by higher-level wrappers. +/// Token emitted by [`tokenize::segment_inline`] and used by higher-level +/// wrappers. /// -/// Re-export this so callers of [`crate::textproc`] can implement custom +/// Re-export these so callers of [`crate::textproc`] can implement custom /// transformations without depending on internal modules. -pub use tokenize::Token; +pub use tokenize::{Token, tokenize_markdown}; static FENCE_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^\s*(```|~~~).*").unwrap()); diff --git a/tests/data/wrap_renumber_regression_expected.txt b/tests/data/wrap_renumber_regression_expected.txt new file mode 100644 index 00000000..c7cf2eda --- /dev/null +++ b/tests/data/wrap_renumber_regression_expected.txt @@ -0,0 +1,71 @@ +1. People following @[vee.cool](http://vee.cool) — Bluesky, accessed on July + 20, 2025, +2. Canop/termimad: A library to display rich (Markdown) snippets and texts in a + rust terminal application - GitHub, accessed on July 20, 2025, + +3. Termimad: use Markdown to display rich text in a terminal application - Rust + Users Forum, accessed on July 20, 2025, + +4. termimad - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + +5. The Hitchhiker's Guide to E2E Testing | by Tally Barak - Medium, accessed on + July 20, 2025, + +6. How to Write Tests - The Rust Programming Language - Rust Documentation, + accessed on July 20, 2025, + +7. termimad - [crates.io](http://crates.io): Rust Package Registry, accessed on + July 20, 2025, +8. assert_cmd - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + +9. assert_cmd - [crates.io](http://crates.io): Rust Package Registry, accessed + on July 20, 2025, +10. assert-rs/assert_cmd - Command - GitHub, accessed on July 20, 2025, + +11. campbellC/third-wheel: A rust implementation of a man-in-the-middle proxy + for whatever - GitHub, accessed on July 20, 2025, + +12. Overview | Insta Snapshots, accessed on July 20, 2025, + +13. insta - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + +14. Insta Snapshots, accessed on July 20, 2025, +15. Testing - Command Line Applications in Rust, accessed on July 20, 2025, + +16. Test Organization - The Rust Programming Language, accessed on July 20, + 2025, +17. insta - [crates.io](http://crates.io): Rust Package Registry, accessed on + July 20, 2025, +18. third-wheel - [crates.io](http://crates.io): Rust Package Registry, + accessed on July 20, 2025, +19. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + +20. Should unit tests really be put in the same file as the source? - Rust + Users Forum, accessed on July 20, 2025, + +21. Skeleton And Principles For A Maintainable Test Suite | Luca Palmieri, + accessed on July 20, 2025, + +22. Command in assert_cmd::cmd - Rust - [Docs.rs](http://Docs.rs), accessed on + July 20, 2025, + +23. How I test Rust command-line apps with assert_cmd - alexwlchan, accessed on + July 20, 2025, + +24. assert_cmd for n00bs : r/rust - Reddit, accessed on July 20, 2025, + +25. Snapshot Testing - Rust Project Primer, accessed on July 20, 2025, + +26. Snapshot testing - Advanced Rust testing - Rust Exercises, accessed on July + 20, 2025, + +27. insta - Rust, accessed on July 20, 2025, + +28. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + +29. Complete Guide To Testing Code In Rust | Zero To Mastery, accessed on July + 20, 2025, + +30. Ultimate Guide to Testing and Debugging Rust Code | 2024 - Rapid + Innovation, accessed on July 20, 2025, + diff --git a/tests/data/wrap_renumber_regression_input.txt b/tests/data/wrap_renumber_regression_input.txt new file mode 100644 index 00000000..131361fd --- /dev/null +++ b/tests/data/wrap_renumber_regression_input.txt @@ -0,0 +1,30 @@ +1. People following @[vee.cool](http://vee.cool) — Bluesky, accessed on July 20, 2025, +2. Canop/termimad: A library to display rich (Markdown) snippets and texts in a rust terminal application - GitHub, accessed on July 20, 2025, +3. Termimad: use Markdown to display rich text in a terminal application - Rust Users Forum, accessed on July 20, 2025, +4. termimad - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, +5. The Hitchhiker's Guide to E2E Testing | by Tally Barak - Medium, accessed on July 20, 2025, +6. How to Write Tests - The Rust Programming Language - Rust Documentation, accessed on July 20, 2025, +7. termimad - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, +8. assert_cmd - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, +9. assert_cmd - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, +10. assert-rs/assert_cmd - Command - GitHub, accessed on July 20, 2025, +11. campbellC/third-wheel: A rust implementation of a man-in-the-middle proxy for whatever - GitHub, accessed on July 20, 2025, +12. Overview | Insta Snapshots, accessed on July 20, 2025, +13. insta - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, +14. Insta Snapshots, accessed on July 20, 2025, +15. Testing - Command Line Applications in Rust, accessed on July 20, 2025, +16. Test Organization - The Rust Programming Language, accessed on July 20, 2025, +17. insta - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, +18. third-wheel - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, +19. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, +20. Should unit tests really be put in the same file as the source? - Rust Users Forum, accessed on July 20, 2025, +21. Skeleton And Principles For A Maintainable Test Suite | Luca Palmieri, accessed on July 20, 2025, +22. Command in assert_cmd::cmd - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, +23. How I test Rust command-line apps with assert_cmd - alexwlchan, accessed on July 20, 2025, +24. assert_cmd for n00bs : r/rust - Reddit, accessed on July 20, 2025, +25. Snapshot Testing - Rust Project Primer, accessed on July 20, 2025, +26. Snapshot testing - Advanced Rust testing - Rust Exercises, accessed on July 20, 2025, +27. insta - Rust, accessed on July 20, 2025, +28. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, +29. Complete Guide To Testing Code In Rust | Zero To Mastery, accessed on July 20, 2025, +30. Ultimate Guide to Testing and Debugging Rust Code | 2024 - Rapid Innovation, accessed on July 20, 2025, diff --git a/tests/wrap_renumber.rs b/tests/wrap_renumber.rs new file mode 100644 index 00000000..ae7c0ad6 --- /dev/null +++ b/tests/wrap_renumber.rs @@ -0,0 +1,17 @@ +//! Regression test for combined wrapping and renumbering. + +use mdtablefix::{process_stream, renumber_lists}; + +#[macro_use] +mod prelude; + +#[test] +fn wrap_then_renumber_preserves_order() { + let input: Vec = include_lines!("data/wrap_renumber_regression_input.txt"); + let expected: Vec = include_lines!("data/wrap_renumber_regression_expected.txt"); + + let mut out = process_stream(&input); + out = renumber_lists(&out); + + assert_eq!(out, expected); +} From 7329f01f1b8171b67036dfa8ee4edb5aa7f2c8a5 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 1 Aug 2025 11:32:08 +0100 Subject: [PATCH 2/3] Add blank lines in wrap renumber fixtures --- docs/architecture.md | 8 +++-- src/lib.rs | 2 +- .../wrap_renumber_regression_expected.txt | 29 +++++++++++++++++++ tests/data/wrap_renumber_regression_input.txt | 29 +++++++++++++++++++ 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index d4f52a9b..c861bed1 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -268,9 +268,11 @@ The `lib` module re-exports the public API from the other modules. The references. The `textproc` module contains shared token-processing helpers used by both the `ellipsis` and `footnotes` modules. Tokenization is handled by `wrap::tokenize_markdown`, replacing the small state machine that previously -resided in `process_tokens`. The `process` module provides streaming helpers -that combine the lower-level functions. The `io` module handles filesystem -operations, delegating the text processing to `process`. +resided in `process_tokens`. Both `Token` and `tokenize_markdown` are re- +exported by the crate root for use by downstream tools. The `process` module +provides streaming helpers that combine the lower-level functions. The `io` +module handles filesystem operations, delegating the text processing to +`process`. The helper `html_table_to_markdown` is retained for backward compatibility but is deprecated. New code should call `convert_html_tables` instead. diff --git a/src/lib.rs b/src/lib.rs index 3edae610..47353966 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,4 +48,4 @@ pub use io::{rewrite, rewrite_no_wrap}; pub use lists::renumber_lists; pub use process::{Options, process_stream, process_stream_no_wrap, process_stream_opts}; pub use table::{reflow_table, split_cells}; -pub use wrap::{is_fence, wrap_text}; +pub use wrap::{Token, is_fence, tokenize_markdown, wrap_text}; diff --git a/tests/data/wrap_renumber_regression_expected.txt b/tests/data/wrap_renumber_regression_expected.txt index c7cf2eda..52329fbf 100644 --- a/tests/data/wrap_renumber_regression_expected.txt +++ b/tests/data/wrap_renumber_regression_expected.txt @@ -1,71 +1,100 @@ 1. People following @[vee.cool](http://vee.cool) — Bluesky, accessed on July 20, 2025, + 2. Canop/termimad: A library to display rich (Markdown) snippets and texts in a rust terminal application - GitHub, accessed on July 20, 2025, + 3. Termimad: use Markdown to display rich text in a terminal application - Rust Users Forum, accessed on July 20, 2025, + 4. termimad - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 5. The Hitchhiker's Guide to E2E Testing | by Tally Barak - Medium, accessed on July 20, 2025, + 6. How to Write Tests - The Rust Programming Language - Rust Documentation, accessed on July 20, 2025, + 7. termimad - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 8. assert_cmd - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 9. assert_cmd - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 10. assert-rs/assert_cmd - Command - GitHub, accessed on July 20, 2025, + 11. campbellC/third-wheel: A rust implementation of a man-in-the-middle proxy for whatever - GitHub, accessed on July 20, 2025, + 12. Overview | Insta Snapshots, accessed on July 20, 2025, + 13. insta - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 14. Insta Snapshots, accessed on July 20, 2025, + 15. Testing - Command Line Applications in Rust, accessed on July 20, 2025, + 16. Test Organization - The Rust Programming Language, accessed on July 20, 2025, + 17. insta - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 18. third-wheel - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 19. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 20. Should unit tests really be put in the same file as the source? - Rust Users Forum, accessed on July 20, 2025, + 21. Skeleton And Principles For A Maintainable Test Suite | Luca Palmieri, accessed on July 20, 2025, + 22. Command in assert_cmd::cmd - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 23. How I test Rust command-line apps with assert_cmd - alexwlchan, accessed on July 20, 2025, + 24. assert_cmd for n00bs : r/rust - Reddit, accessed on July 20, 2025, + 25. Snapshot Testing - Rust Project Primer, accessed on July 20, 2025, + 26. Snapshot testing - Advanced Rust testing - Rust Exercises, accessed on July 20, 2025, + 27. insta - Rust, accessed on July 20, 2025, + 28. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 29. Complete Guide To Testing Code In Rust | Zero To Mastery, accessed on July 20, 2025, + 30. Ultimate Guide to Testing and Debugging Rust Code | 2024 - Rapid Innovation, accessed on July 20, 2025, diff --git a/tests/data/wrap_renumber_regression_input.txt b/tests/data/wrap_renumber_regression_input.txt index 131361fd..3fd744a6 100644 --- a/tests/data/wrap_renumber_regression_input.txt +++ b/tests/data/wrap_renumber_regression_input.txt @@ -1,30 +1,59 @@ 1. People following @[vee.cool](http://vee.cool) — Bluesky, accessed on July 20, 2025, + 2. Canop/termimad: A library to display rich (Markdown) snippets and texts in a rust terminal application - GitHub, accessed on July 20, 2025, + 3. Termimad: use Markdown to display rich text in a terminal application - Rust Users Forum, accessed on July 20, 2025, + 4. termimad - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 5. The Hitchhiker's Guide to E2E Testing | by Tally Barak - Medium, accessed on July 20, 2025, + 6. How to Write Tests - The Rust Programming Language - Rust Documentation, accessed on July 20, 2025, + 7. termimad - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 8. assert_cmd - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 9. assert_cmd - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 10. assert-rs/assert_cmd - Command - GitHub, accessed on July 20, 2025, + 11. campbellC/third-wheel: A rust implementation of a man-in-the-middle proxy for whatever - GitHub, accessed on July 20, 2025, + 12. Overview | Insta Snapshots, accessed on July 20, 2025, + 13. insta - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 14. Insta Snapshots, accessed on July 20, 2025, + 15. Testing - Command Line Applications in Rust, accessed on July 20, 2025, + 16. Test Organization - The Rust Programming Language, accessed on July 20, 2025, + 17. insta - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 18. third-wheel - [crates.io](http://crates.io): Rust Package Registry, accessed on July 20, 2025, + 19. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 20. Should unit tests really be put in the same file as the source? - Rust Users Forum, accessed on July 20, 2025, + 21. Skeleton And Principles For A Maintainable Test Suite | Luca Palmieri, accessed on July 20, 2025, + 22. Command in assert_cmd::cmd - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 23. How I test Rust command-line apps with assert_cmd - alexwlchan, accessed on July 20, 2025, + 24. assert_cmd for n00bs : r/rust - Reddit, accessed on July 20, 2025, + 25. Snapshot Testing - Rust Project Primer, accessed on July 20, 2025, + 26. Snapshot testing - Advanced Rust testing - Rust Exercises, accessed on July 20, 2025, + 27. insta - Rust, accessed on July 20, 2025, + 28. tempfile - Rust - [Docs.rs](http://Docs.rs), accessed on July 20, 2025, + 29. Complete Guide To Testing Code In Rust | Zero To Mastery, accessed on July 20, 2025, + 30. Ultimate Guide to Testing and Debugging Rust Code | 2024 - Rapid Innovation, accessed on July 20, 2025, From 8d71afb700c8604d8ca8046a3e80a69ae7a098c5 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 1 Aug 2025 12:19:54 +0100 Subject: [PATCH 3/3] Use expect for fence regex --- src/wrap.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wrap.rs b/src/wrap.rs index 8dc92dea..cd01d31d 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -16,7 +16,7 @@ mod tokenize; pub use tokenize::{Token, tokenize_markdown}; static FENCE_RE: std::sync::LazyLock = - std::sync::LazyLock::new(|| Regex::new(r"^\s*(```|~~~).*").unwrap()); + std::sync::LazyLock::new(|| Regex::new(r"^\s*(```|~~~).*").expect("valid fence regex")); static BULLET_RE: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new(r"^(\s*(?:[-*+]|\d+[.)])\s+)(.*)").unwrap());