diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000..2adadd58 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,63 @@ +name: Benchmarks + +# spell-checker:ignore codspeed dtolnay Swatinem sccache + +on: + pull_request: + push: + branches: + - '*' + +permissions: + contents: read # to fetch code (actions/checkout) + +# End the current execution if there is a new changeset in the PR. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +jobs: + benchmarks: + name: Run benchmarks (CodSpeed) + runs-on: ubuntu-latest + strategy: + matrix: + benchmark-target: + - { package: sed } + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Install system dependencies + shell: bash + run: | + sudo apt-get -y update + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 + + - name: Install cargo-codspeed + shell: bash + run: cargo install cargo-codspeed --locked + + - name: Build benchmarks for ${{ matrix.benchmark-target.package }} + shell: bash + run: | + echo "Building benchmarks for ${{ matrix.benchmark-target.package }}" + cargo codspeed build -p ${{ matrix.benchmark-target.package }} + + - name: Run benchmarks for ${{ matrix.benchmark-target.package }} + uses: CodSpeedHQ/action@v4 + env: + CODSPEED_LOG: debug + with: + mode: simulation + run: | + echo "Running benchmarks for ${{ matrix.benchmark-target.package }}" + cargo codspeed run -p ${{ matrix.benchmark-target.package }} > /dev/null + token: ${{ secrets.CODSPEED_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index cdfc4be8..4f72f82f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,6 +70,21 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "assert_fs" version = "1.1.3" @@ -207,12 +222,88 @@ dependencies = [ "roff", ] +[[package]] +name = "codspeed" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b847e05a34be5c38f3f2a5052178a3bd32e6b5702f3ea775efde95c483a539" +dependencies = [ + "anyhow", + "cc", + "colored", + "getrandom 0.2.16", + "glob", + "libc", + "nix", + "serde", + "serde_json", + "statrs", +] + +[[package]] +name = "codspeed-divan-compat" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f0e9fe5eaa39995ec35e46407f7154346cc25bd1300c64c21636f3d00cb2cc" +dependencies = [ + "clap", + "codspeed", + "codspeed-divan-compat-macros", + "codspeed-divan-compat-walltime", + "regex", +] + +[[package]] +name = "codspeed-divan-compat-macros" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88c8babf2a40fd2206a2e030cf020d0d58144cd56e1dc408bfba02cdefb08b4f" +dependencies = [ + "divan-macros", + "itertools", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "codspeed-divan-compat-walltime" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f26092328e12a36704ffc552f379c6405dd94d3149970b79b22d371717c2aae" +dependencies = [ + "cfg-if", + "clap", + "codspeed", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + [[package]] name = "colorchoice" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys 0.59.0", +] + +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -292,6 +383,17 @@ dependencies = [ "syn", ] +[[package]] +name = "divan-macros" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dc51d98e636f5e3b0759a39257458b22619cac7e96d932da6eeb052891bb67c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dns-lookup" version = "3.0.0" @@ -325,6 +427,18 @@ version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.12" @@ -406,6 +520,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.3.3" @@ -415,7 +540,7 @@ dependencies = [ "cfg-if", "libc", "r-efi", - "wasi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -448,6 +573,12 @@ dependencies = [ "walkdir", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "iana-time-zone" version = "0.1.63" @@ -488,6 +619,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "intl-memoizer" version = "0.5.3" @@ -513,6 +654,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -570,6 +720,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.178" @@ -803,6 +959,15 @@ dependencies = [ "yansi", ] +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -853,7 +1018,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom", + "getrandom 0.3.3", ] [[package]] @@ -879,6 +1044,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -925,6 +1096,12 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + [[package]] name = "same-file" version = "1.0.6" @@ -943,6 +1120,7 @@ dependencies = [ "clap", "clap_complete", "clap_mangen", + "codspeed-divan-compat", "ctor", "fancy-regex", "libc", @@ -973,24 +1151,47 @@ checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1025,6 +1226,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx", + "num-traits", +] + [[package]] name = "strsim" version = "0.11.1" @@ -1063,7 +1274,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.3.3", "once_cell", "rustix", "windows-sys 0.60.2", @@ -1160,6 +1371,36 @@ dependencies = [ "zerovec", ] +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + [[package]] name = "type-map" version = "0.5.1" @@ -1218,6 +1459,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5eddd390f3fdef74f104a948559e6de29203f60f8f563c8c9f528cd4c88ee78" dependencies = [ "clap", + "codspeed-divan-compat", "dns-lookup", "fluent", "fluent-bundle", @@ -1227,6 +1469,7 @@ dependencies = [ "nix", "os_display", "phf", + "tempfile", "thiserror", "time", "unic-langid", @@ -1272,6 +1515,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" @@ -1642,6 +1891,15 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen-rt" version = "0.39.0" diff --git a/Cargo.toml b/Cargo.toml index 9b939314..6db28809 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ chrono = { version = "0.4.37", default-features = false, features = [ clap = { version = "4.4", features = ["wrap_help", "cargo"] } clap_complete = "4.5" clap_mangen = "0.2" +divan = { package = "codspeed-divan-compat", version = "4.0.5" } fancy-regex = "0.17.0" libc = "0.2.153" memchr = "2.7.4" @@ -75,12 +76,13 @@ uucore = { workspace = true } [dev-dependencies] chrono = { workspace = true } +divan = { workspace = true } libc = { workspace = true } pretty_assertions = "1" rand = { workspace = true } regex = { workspace = true } tempfile = { workspace = true } -uucore = { workspace = true, features = ["entries", "process", "signals"] } +uucore = { workspace = true, features = ["entries", "process", "signals", "benchmark"] } uutests = "0.5.0" [target.'cfg(unix)'.dev-dependencies] @@ -104,6 +106,10 @@ name = "uudoc" path = "src/bin/uudoc.rs" required-features = ["uudoc"] +[[bench]] +name = "sed_bench" +harness = false + # The default release profile. It contains all optimizations, without # sacrificing debug info. With this profile (like in the standard # release profile), the debug info and the stack traces will still be available. diff --git a/benches/sed_bench.rs b/benches/sed_bench.rs new file mode 100644 index 00000000..58ac7bc6 --- /dev/null +++ b/benches/sed_bench.rs @@ -0,0 +1,262 @@ +// Definitions for the compiled code data structures +// +// SPDX-License-Identifier: MIT +// +// This file is part of the uutils sed package. +// It is licensed under the MIT License. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use divan::{Bencher, black_box}; +use sed::sed::uumain; +use uucore::benchmark::{create_test_file, run_util_function}; + +/// Benchmark no-op on short lines +#[divan::bench] +fn no_op_short(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..10_000_000 { + data.extend_from_slice(format!("{}\n", i).as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function(uumain, &["", file_path_str])); + }); +} + +/// Benchmark access log no-op +#[divan::bench] +fn access_log_no_op(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..5_000_000 { + let line = format!( + "192.168.{}.{} - - [01/Jan/2024:00:00:00 +0000] \"GET /index.html HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"\n", + (i / 256) % 256, + i % 256 + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function(uumain, &["", file_path_str])); + }); +} + +/// Benchmark access log no substitution +#[divan::bench] +fn access_log_no_subst(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..1_000_000 { + let line = format!( + "192.168.{}.{} - - [01/Jan/2024:00:00:00 +0000] \"GET /index.html HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"\n", + (i / 256) % 256, + i % 256 + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["s/Chrome/Chromium/", file_path_str], + )); + }); +} + +/// Benchmark access log substitution +#[divan::bench] +fn access_log_subst(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..1_000_000 { + let line = format!( + "192.168.{}.{} - - [01/Jan/2024:00:00:00 +0000] \"GET /index.html HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"\n", + (i / 256) % 256, + i % 256 + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["s/Mozilla/Chromium/", file_path_str], + )); + }); +} + +/// Benchmark access log no deletion +#[divan::bench] +fn access_log_no_del(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..1_000_000 { + let line = format!( + "192.168.{}.{} - - [01/Jan/2024:00:00:00 +0000] \"GET /index.html HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"\n", + (i / 256) % 256, + i % 256 + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function(uumain, &["/Chrome/d", file_path_str])); + }); +} + +/// Benchmark access log full deletion +#[divan::bench] +fn access_log_all_del(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..1_000_000 { + let line = format!( + "192.168.{}.{} - - [01/Jan/2024:00:00:00 +0000] \"GET /index.html HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"\n", + (i / 256) % 256, + i % 256 + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function(uumain, &["/Mozilla/d", file_path_str])); + }); +} + +/// Benchmark transliteration +#[divan::bench] +fn access_log_translit(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..500_000 { + let line = format!( + "192.168.{}.{} - - [01/Jan/2024:00:00:00 +0000] \"GET /index.html HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"\n", + (i / 256) % 256, + i % 256 + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["y/0123456789/9876543210/", file_path_str], + )); + }); +} + +/// Benchmark text append +#[divan::bench] +fn access_log_append(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..2_000_000 { + let line = format!( + "192.168.{}.{} - - [01/Jan/2024:00:00:00 +0000] \"GET /index.html HTTP/1.1\" 200 1234 \"-\" \"Mozilla/5.0\"\n", + (i / 256) % 256, + i % 256 + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["athe-line-ends-here", file_path_str], + )); + }); +} + +/// Benchmark remove carriage return +#[divan::bench] +fn remove_cr(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..500_000 { + let line = format!("line {} with windows endings\r\n", i); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function(uumain, &["s/\r$//", file_path_str])); + }); +} + +/// Benchmark genomic data substitution +#[divan::bench] +fn genome_subst(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..100_000 { + let chr = format!("chr{}", 1 + i % 22); + let line = format!( + "{}\t{}\t{}\t.\t{}\t.\t.\n", + chr, + i * 100, + if i % 2 == 1 { "A" } else { "T" }, + if i % 2 == 1 { "G" } else { "C" } + ); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["/^#/d; s/\t\\./\tNA/g; s/\\.$/NA/", file_path_str], + )); + }); +} + +/// Benchmark number formatting +#[divan::bench] +fn number_fix(bencher: Bencher) { + let temp_dir = tempfile::tempdir().unwrap(); + let mut data = Vec::new(); + for i in 0..100_000 { + let euros = i % 10000; + let cents = i % 100; + let thousands = euros / 1000; + let remainder = euros % 1000; + let line = format!("{}.{:03},{:02}\n", thousands, remainder, cents); + data.extend_from_slice(line.as_bytes()); + } + let file_path = create_test_file(&data, temp_dir.path()); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &[ + "s/\\([0-9]\\)\\.\\([0-9]\\)/\\1\\2/g;s/\\([0-9]\\),\\([0-9]\\)/\\1.\\2/g", + file_path_str, + ], + )); + }); +} + +fn main() { + divan::main(); +}