diff --git a/README.md b/README.md index 989d0167..ba183a5a 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,9 @@ cargo run --release * In addition to `\n`, other escape sequences (octal, hex, C) are supported in the strings of the `y` command. Under POSIX these yield undefined behavior. +* The `a`, `c`, and `i` commands do not require an initial backslash, + allow text to appear on the same line, and support escape sequences + in the specified text. * The substitution command replacement group `\0` is a synonym for &. * A `Q` command (optionally followed by an exit code) quits immediately. * The `q` command can be optionally followed by an exit code. diff --git a/src/uu/sed/src/compiler.rs b/src/uu/sed/src/compiler.rs index a0475768..aea92ea6 100644 --- a/src/uu/sed/src/compiler.rs +++ b/src/uu/sed/src/compiler.rs @@ -1023,15 +1023,101 @@ fn compile_number_command( /// Compile commands that take text as an argument. // Handles a, c, i +// According to POSIX, these commands expect \ followed by text. +// As a GNU extension the initial \ can be ommitted, and from then on +// character escapes are honored. fn compile_text_command( lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, - _context: &mut ProcessingContext, + context: &mut ProcessingContext, ) -> UResult { line.advance(); // Skip the command character. line.eat_spaces(); // Skip any leading whitespace. + if context.posix { + compile_text_command_posix(lines, line, cmd, context) + } else { + compile_text_command_gnu(lines, line, cmd, context) + } +} + +/// Compile commands that take text as an argument (GNU syntax). +// Handles a, c, i; after the command and initial whitespace have been consumed. +// According to POSIX, these commands expect \ followed by text. +// As a GNU extension the initial \ can be ommitted, and from then on +// character escapes are honored. +fn compile_text_command_gnu( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, + _context: &mut ProcessingContext, +) -> UResult { + // True after a \ at the end of a line + let mut escaped_newline = false; + + // Skip optional \. + if !line.eol() && line.current() == '\\' { + line.advance(); + escaped_newline = line.eol(); + } + + // Gather replacement text. Stop on a non-escaped newline. + let mut text = String::new(); + 'text_content: loop { + if escaped_newline { + match lines.next_line()? { + None => { + break 'text_content; + } + Some(line_string) => { + *line = ScriptCharProvider::new(&line_string); + } + } + escaped_newline = false; + } + + // Non-escaped newline + if line.eol() { + text.push('\n'); + break 'text_content; + } + + if line.current() == '\\' { + line.advance(); + + if line.eol() { + escaped_newline = true; + text.push('\n'); + continue 'text_content; + } + + match parse_char_escape(line) { + Some(decoded) => text.push(decoded), + None => { + // Invalid escapes result in the escaped character. + text.push(line.current()); + line.advance(); + } + } + } else { + text.push(line.current()); + line.advance(); + } + } + cmd.data = CommandData::Text(Cow::Owned(text)); + Ok(CommandHandling::Continue) +} + +/// Compile commands that take text as an argument (POSIX syntax). +// Handles a, c, i; after the command and initial whitespace have been consumed. +// According to POSIX, these commands expect \ followed by text. +fn compile_text_command_posix( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, + _context: &mut ProcessingContext, +) -> UResult { if line.eol() || line.current() != '\\' { return compilation_error( lines, @@ -2462,11 +2548,12 @@ mod tests { } #[test] - fn test_compile_spaces_single_line_text_command() { + fn test_compile_text_command_posix_spaces_single_line() { let mut chars = make_char_provider("a \\ "); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); let mut context = ProcessingContext::default(); + context.posix = true; compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { @@ -2477,6 +2564,102 @@ mod tests { } } + #[test] + fn test_compile_text_command_gnu_optional_backslash() { + let mut chars = make_char_provider("athere"); + let mut lines = make_line_provider(&["line1", "line2"]); + let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); + + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text, "there\n"); + } + _ => panic!("Expected CommandData::Text"), + } + } + + #[test] + fn test_compile_text_command_gnu_optional_backslash_spaces() { + let mut chars = make_char_provider("a \t there"); + let mut lines = make_line_provider(&["line1", "line2"]); + let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); + + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text, "there\n"); + } + _ => panic!("Expected CommandData::Text"), + } + } + + #[test] + fn test_compile_text_command_gnu_optional_backslash_eol_eof() { + let mut chars = make_char_provider("a"); + let mut lines = make_line_provider(&[]); + let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); + + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text, "\n"); + } + _ => panic!("Expected CommandData::Text"), + } + } + + #[test] + fn test_compile_text_command_gnu_optional_backslash_escape_eof() { + let mut chars = make_char_provider("a\\"); + let mut lines = make_line_provider(&[]); + let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); + + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text, ""); + } + _ => panic!("Expected CommandData::Text"), + } + } + + #[test] + fn test_compile_text_command_gnu_no_first_escape() { + let mut chars = make_char_provider("a\\tom"); + let mut lines = make_line_provider(&[]); + let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); + + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text, "tom\n"); + } + _ => panic!("Expected CommandData::Text"), + } + } + + #[test] + fn test_compile_text_command_gnu_char_escapes() { + let mut chars = make_char_provider("i\\>\\h\\elll\\bo\\nto\\"); + let mut lines = make_line_provider(&["all\\a", ""]); + let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); + + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text, ">helll\x08o\nto\nall\x07\n"); + } + _ => panic!("Expected CommandData::Text"), + } + } + #[test] fn test_compile_two_line_text_command() { let mut chars = make_char_provider("a\\"); @@ -2494,11 +2677,12 @@ mod tests { } #[test] - fn test_compile_text_command_without_backslash() { + fn test_compile_text_command_posix_without_backslash() { let mut chars = make_char_provider("a"); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); let mut context = ProcessingContext::default(); + context.posix = true; let result = compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context); assert!(result.is_err()); @@ -2507,11 +2691,12 @@ mod tests { } #[test] - fn test_compile_text_command_with_trailing_chars() { + fn test_compile_text_command_posix_with_trailing_chars() { let mut chars = make_char_provider("a \\ foo"); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); let mut context = ProcessingContext::default(); + context.posix = true; let result = compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context); assert!(result.is_err()); diff --git a/src/uu/sed/src/delimited_parser.rs b/src/uu/sed/src/delimited_parser.rs index 59fc16a9..9a77213f 100644 --- a/src/uu/sed/src/delimited_parser.rs +++ b/src/uu/sed/src/delimited_parser.rs @@ -91,6 +91,10 @@ pub fn parse_char_escape(line: &mut ScriptCharProvider) -> Option { line.advance(); Some('\x07') } + 'b' => { + line.advance(); + Some('\x08') + } 'f' => { line.advance(); Some('\x0c') diff --git a/src/uu/sed/src/sed.rs b/src/uu/sed/src/sed.rs index c7147127..dede22f7 100644 --- a/src/uu/sed/src/sed.rs +++ b/src/uu/sed/src/sed.rs @@ -92,7 +92,7 @@ pub fn uu_app() -> Command { arg!(-l --length "Specify the 'l' command line-wrap length.") .value_parser(clap::value_parser!(u32)), arg!(-n --quiet "Suppress automatic printing of pattern space.").aliases(["silent"]), - arg!(--posix "Disable all POSIX extensions."), + arg!(--posix "Disable non-POSIX extensions."), arg!(-s --separate "Consider files as separate rather than as a long stream."), arg!(--sandbox "Operate in a sandbox by disabling e/r/w commands."), arg!(-u --unbuffered "Load minimal input data and flush output buffers regularly."), diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index cfe26c51..5a8b0db7 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -129,6 +129,23 @@ macro_rules! check_output { }; } +// Run ucmd twice to test POSIX conformance: Once where posix is "--posix" +// and once where it has the dummy value "--follow-symlinks". +// This shall be used to test commands that behave differently under POSIX. +macro_rules! check_output_posix { + ($name:ident, [$($args:expr),* $(,)?]) => { + #[test] + fn $name() { + for posix in ["--posix", "--follow-symlinks"] { + new_ucmd!() + .args(&[posix $(, $args)*]) // prepend posix, then add args + .succeeds() + .stdout_is_fixture(&format!("output/{}", stringify!($name))); + } + } + }; +} + //////////////////////////////////////////////////////////// // Individual command tests @@ -496,7 +513,10 @@ tb"#, //////////////////////////////////////////////////////////// // Text: a, c, i -check_output!( + +// Check both POSIX and GNU parsing routines. + +check_output_posix!( text_insert_quit, [ "-e", @@ -509,7 +529,7 @@ hello ] ); -check_output!( +check_output_posix!( text_insert_between_subst, [ "-n", @@ -525,7 +545,7 @@ s/^/after_i/p ] ); -check_output!( +check_output_posix!( text_append_between_subst, [ "-n", @@ -542,7 +562,7 @@ s/^/after_a/p ] ); -check_output!( +check_output_posix!( text_append_before_next, [ "-n", @@ -559,7 +579,7 @@ s/$/$/p ] ); -check_output!( +check_output_posix!( text_change_global, [ "-n", @@ -572,10 +592,9 @@ hello ] ); -check_output!( +check_output_posix!( text_change_line, [ - "-n", "-e", r#" 8c\ @@ -585,10 +604,9 @@ hello ] ); -check_output!( +check_output_posix!( text_change_range, [ - "-n", "-e", r#" 3,14c\ @@ -598,11 +616,9 @@ hello ] ); -// SunOS and GNU sed behave differently. We follow POSIX. -check_output!( +check_output_posix!( text_change_reverse_range, [ - "-n", "-e", r#" 8,3c\ @@ -615,7 +631,7 @@ hello check_output!(text_delete, ["d", LINES1]); // Check that the pattern space is deleted. -check_output!( +check_output_posix!( text_change_print, [ "-n", @@ -629,6 +645,15 @@ p ] ); +// GNU syntax extensions: +// Text can follow the initial \. +// Character escapes are supported. +// Invalid escapes result in the escaped character. +check_output!( + text_insert_gnu, + ["-e", "i\\>\\h\\elll\x08o\\nto\\\nall\\a", LINES1] +); + //////////////////////////////////////////////////////////// // r, w commands check_output!(read_ok, [format!("4r {LINES2}"), LINES1.to_string()]); diff --git a/tests/fixtures/sed/output/text_change_line b/tests/fixtures/sed/output/text_change_line index ce013625..4d1aeb6b 100644 --- a/tests/fixtures/sed/output/text_change_line +++ b/tests/fixtures/sed/output/text_change_line @@ -1 +1,14 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 hello +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/text_change_range b/tests/fixtures/sed/output/text_change_range index ce013625..760fdd2d 100644 --- a/tests/fixtures/sed/output/text_change_range +++ b/tests/fixtures/sed/output/text_change_range @@ -1 +1,3 @@ +l1_1 +l1_2 hello diff --git a/tests/fixtures/sed/output/text_change_reverse_range b/tests/fixtures/sed/output/text_change_reverse_range index ce013625..4d1aeb6b 100644 --- a/tests/fixtures/sed/output/text_change_reverse_range +++ b/tests/fixtures/sed/output/text_change_reverse_range @@ -1 +1,14 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 hello +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/text_insert_gnu b/tests/fixtures/sed/output/text_insert_gnu new file mode 100644 index 00000000..0090ec7d --- /dev/null +++ b/tests/fixtures/sed/output/text_insert_gnu @@ -0,0 +1,56 @@ +>helllo +to +all +l1_1 +>helllo +to +all +l1_2 +>helllo +to +all +l1_3 +>helllo +to +all +l1_4 +>helllo +to +all +l1_5 +>helllo +to +all +l1_6 +>helllo +to +all +l1_7 +>helllo +to +all +l1_8 +>helllo +to +all +l1_9 +>helllo +to +all +l1_10 +>helllo +to +all +l1_11 +>helllo +to +all +l1_12 +>helllo +to +all +l1_13 +>helllo +to +all +l1_14