diff --git a/Cargo.lock b/Cargo.lock index b29d29fd..0c640cf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -158,6 +158,31 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "difflib" version = "0.4.0" @@ -170,6 +195,12 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "errno" version = "0.3.13" @@ -406,6 +437,7 @@ dependencies = [ "libc", "markup5ever_rcdom", "once_cell", + "rayon", "regex", "rstest", "tempfile", @@ -581,6 +613,26 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.13" diff --git a/Cargo.toml b/Cargo.toml index e025ec42..dad1e79b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ anyhow = "1" clap = { version = "4", features = ["derive"] } regex = "1" once_cell = "1" +rayon = "^1.0" html5ever = "0.27" markup5ever_rcdom = "0.3" unicode-width = ">=0.1, <0.2" diff --git a/README.md b/README.md index 85cc9a2e..82d28942 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ cargo install --path . ## Command-line usage - ```bash mdtablefix [--wrap] [--renumber] [--breaks] [--ellipsis] [--fences] [--footnotes] [--in-place] [FILE...] ``` @@ -111,8 +110,8 @@ A brief intermission for pizza. ## Library usage -The crate exposes helper functions for embedding the table-reflow logic in -Rust projects: +The crate exposes helper functions for embedding the table-reflow logic in Rust +projects: ```rust use mdtablefix::{process_stream_opts, rewrite, Options}; @@ -159,8 +158,9 @@ For an overview of how the crate's internal modules relate to each other, see ## Testing -The test suite is structured using the `rstest` crate. See [Rust testing with -rstest fixtures](docs/rust-testing-with-rstest-fixtures.md) for details. +The test suite is structured using the `rstest` crate. See +[Rust testing with rstest fixtures](docs/rust-testing-with-rstest-fixtures.md) +for details. ## License diff --git a/docs/parallel-processing-roadmap.md b/docs/parallel-processing-roadmap.md index 4ab98234..4a19c9b1 100644 --- a/docs/parallel-processing-roadmap.md +++ b/docs/parallel-processing-roadmap.md @@ -4,15 +4,15 @@ The command-line tool currently processes input files sequentially. The steps below outline the work required to allow concurrent processing while preserving serial output order. -- [ ] **Adopt `rayon` for concurrency** +- [x] **Adopt `rayon` for concurrency** - Use `rayon` thread pools to spawn work for each file path. - Ensure the approach integrates cleanly with existing modules. -- [ ] **Add chosen crate to `Cargo.toml`** +- [x] **Add chosen crate to `Cargo.toml`** - Pin an explicit version and document the decision in `docs/`. -- [ ] **Refactor `main.rs` to launch parallel tasks** +- [x] **Refactor `main.rs` to launch parallel tasks** - Spawn a worker for each file path using the concurrency crate. - - Maintain a list of handles so outputs can be gathered in order. -- [ ] **Collect results sequentially** + - Maintain a list of handles, so outputs can be gathered in order. +- [x] **Collect results sequentially** - Await or join handles in the same order the files were supplied. - Print each processed file or error message before moving to the next. - [ ] **Extend tests for parallel execution** diff --git a/docs/rayon-concurrency.md b/docs/rayon-concurrency.md new file mode 100644 index 00000000..7412f609 --- /dev/null +++ b/docs/rayon-concurrency.md @@ -0,0 +1,7 @@ +# Concurrency with `rayon` + +`mdtablefix` uses the `rayon` crate to process multiple files concurrently. +`rayon` provides a work-stealing thread pool and simple parallel iterators. The +tool relies on Rayon’s global thread pool so that no manual setup is required. +The dependency is specified as `^1.0` in `Cargo.toml` to track stable API +changes within the same major release. diff --git a/src/main.rs b/src/main.rs index fa2eabe4..42266576 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,9 @@ +//! Command-line interface for the mdtablefix tool. +//! +//! This module provides the main entry point and CLI parsing for fixing +//! markdown table formatting. It supports concurrent processing of multiple +//! files using Rayon for improved performance. + use std::{ borrow::Cow, fs, @@ -7,6 +13,7 @@ use std::{ use clap::Parser; use mdtablefix::{Options, format_breaks, process_stream_opts, renumber_lists}; +use rayon::prelude::*; #[derive(Parser)] #[command(about = "Reflow broken markdown tables")] @@ -67,11 +74,17 @@ fn process_lines(lines: &[String], opts: FormatOpts) -> Vec { out } -fn rewrite_path(path: &Path, opts: FormatOpts) -> std::io::Result<()> { +fn handle_file(path: &Path, in_place: bool, opts: FormatOpts) -> anyhow::Result> { let content = fs::read_to_string(path)?; let lines: Vec = content.lines().map(str::to_string).collect(); - let fixed = process_lines(&lines, opts); - fs::write(path, fixed.join("\n") + "\n") + let fixed = process_lines(&lines, opts).join("\n"); + + if in_place { + fs::write(path, format!("{fixed}\n"))?; + Ok(None) + } else { + Ok(Some(fixed)) + } } /// Entry point for the command-line tool that reflows broken markdown tables. @@ -109,14 +122,22 @@ fn main() -> anyhow::Result<()> { return Ok(()); } - for path in cli.files { - if cli.in_place { - rewrite_path(&path, cli.opts)?; - } else { - let content = fs::read_to_string(&path)?; - let lines: Vec = content.lines().map(str::to_string).collect(); - let fixed = process_lines(&lines, cli.opts); - println!("{}", fixed.join("\n")); + if cli.in_place { + cli.files + .par_iter() + .try_for_each(|p| handle_file(p, true, cli.opts).map(|_| ()))?; + } else { + let outputs: Vec = cli + .files + .par_iter() + .map(|p| handle_file(p, false, cli.opts)) + .collect::>>()? + .into_iter() + .flatten() + .collect(); + + for out in outputs { + println!("{out}"); } }