From bfac7cf959abbc44347154e27e32900f54ebd370 Mon Sep 17 00:00:00 2001 From: Diomidis Spinellis Date: Mon, 28 Jul 2025 13:27:52 +0300 Subject: [PATCH] Optimize a, c, i text usage Avoid the cost of string copies through the use of a reference counted str. The benchmark results improve as follows: access-log-append aci-optimize is 1.07 times faster than rust-f2a2d868 --- src/uu/sed/src/command.rs | 5 ++--- src/uu/sed/src/compiler.rs | 24 ++++++++++++------------ src/uu/sed/src/processor.rs | 4 ++-- util/benchmark.sh | 3 +++ 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/uu/sed/src/command.rs b/src/uu/sed/src/command.rs index 35e8ad19..d477c346 100644 --- a/src/uu/sed/src/command.rs +++ b/src/uu/sed/src/command.rs @@ -14,7 +14,6 @@ use crate::named_writer::NamedWriter; use crate::script_char_provider::ScriptCharProvider; use crate::script_line_provider::ScriptLineProvider; -use std::borrow::Cow; use std::cell::RefCell; use std::collections::HashMap; use std::path::PathBuf; // For file descriptors and equivalent @@ -74,7 +73,7 @@ pub struct ProcessingContext { #[derive(Clone, Debug)] /// Elements that shall be appended at the end of each command processing cycle pub enum AppendElement { - Text(String), // The specified text string + Text(Rc), // The specified text string Path(PathBuf), // The contents of the specified file path } @@ -322,7 +321,7 @@ pub enum CommandData { NamedWriter(Rc>), // File output for 'w' Number(usize), // Number for 'l', 'q', 'Q' (GNU) Substitution(Box), // Substitute command 's' - Text(Cow<'static, str>), // Text for 'a', 'c', 'i' + Text(Rc), // Text for 'a', 'c', 'i' Transliteration(Box), // Transliteration command 'y' } diff --git a/src/uu/sed/src/compiler.rs b/src/uu/sed/src/compiler.rs index aea92ea6..f2aaee53 100644 --- a/src/uu/sed/src/compiler.rs +++ b/src/uu/sed/src/compiler.rs @@ -19,11 +19,11 @@ use crate::named_writer::NamedWriter; use crate::script_char_provider::ScriptCharProvider; use crate::script_line_provider::{ScriptLineProvider, ScriptValue}; -use std::borrow::Cow; use std::cell::RefCell; use std::mem; use std::path::PathBuf; use std::rc::Rc; + use terminal_size::{Width, terminal_size}; use uucore::error::{UResult, USimpleError}; @@ -1105,7 +1105,7 @@ fn compile_text_command_gnu( line.advance(); } } - cmd.data = CommandData::Text(Cow::Owned(text)); + cmd.data = CommandData::Text(Rc::from(text)); Ok(CommandHandling::Continue) } @@ -1151,7 +1151,7 @@ fn compile_text_command_posix( break; } } - cmd.data = CommandData::Text(Cow::Owned(text)); + cmd.data = CommandData::Text(Rc::from(text)); Ok(CommandHandling::Continue) } @@ -2541,7 +2541,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, "line1\n"); + assert_eq!(text.to_string(), "line1\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2558,7 +2558,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, "line1\n"); + assert_eq!(text.to_string(), "line1\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2574,7 +2574,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, "there\n"); + assert_eq!(text.to_string(), "there\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2590,7 +2590,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, "there\n"); + assert_eq!(text.to_string(), "there\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2606,7 +2606,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, "\n"); + assert_eq!(text.to_string(), "\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2622,7 +2622,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, ""); + assert_eq!(text.to_string(), ""); } _ => panic!("Expected CommandData::Text"), } @@ -2638,7 +2638,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, "tom\n"); + assert_eq!(text.to_string(), "tom\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2654,7 +2654,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, ">helll\x08o\nto\nall\x07\n"); + assert_eq!(text.to_string(), ">helll\x08o\nto\nall\x07\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2670,7 +2670,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text, "line1\nline2\n"); + assert_eq!(text.to_string(), "line1\nline2\n"); } _ => panic!("Expected CommandData::Text"), } diff --git a/src/uu/sed/src/processor.rs b/src/uu/sed/src/processor.rs index d79f10ae..6965b62d 100644 --- a/src/uu/sed/src/processor.rs +++ b/src/uu/sed/src/processor.rs @@ -355,7 +355,7 @@ fn flush_appends(output: &mut OutputBuffer, context: &mut ProcessingContext) -> for elem in &context.append_elements { match elem { AppendElement::Text(text) => { - output.write_str(text.clone())?; + output.write_str(&**text)?; } AppendElement::Path(path) => { output.copy_file(path)?; @@ -476,7 +476,7 @@ fn process_file( let text = extract_variant!(command, Text); context .append_elements - .push(AppendElement::Text(text.clone().into_owned())); + .push(AppendElement::Text(text.clone())); } 'b' => { // Branch to the specified label or end if none is given. diff --git a/util/benchmark.sh b/util/benchmark.sh index ebbb02aa..a3b6232b 100755 --- a/util/benchmark.sh +++ b/util/benchmark.sh @@ -81,6 +81,9 @@ bench_run access-log-translit "$PROG y/0123456789/9876543210/ access.log" # Multiple substitutions bench_run access-log-complex-sub "$PROG -f $SCRIPTS/http-log-redact.sed access.log" +# Text append +bench_run access-log-append "$PROG athe-line-ends-here access.log" + rm access.log # Remove \r