From 7453e6d2d93ba2f4b953b8a881131ba0f4133751 Mon Sep 17 00:00:00 2001 From: Diomidis Spinellis Date: Tue, 22 Jul 2025 00:40:20 +0300 Subject: [PATCH 1/2] Refactor command specifications with handler fn --- src/uu/sed/src/compiler.rs | 371 +++++++++++++++++++------------------ 1 file changed, 194 insertions(+), 177 deletions(-) diff --git a/src/uu/sed/src/compiler.rs b/src/uu/sed/src/compiler.rs index 0574ecf1..1cfcf588 100644 --- a/src/uu/sed/src/compiler.rs +++ b/src/uu/sed/src/compiler.rs @@ -34,175 +34,215 @@ const DEFAULT_OUTPUT_WIDTH: usize = 60; // A global, immutable map of command properties, initialized on first access static CMD_MAP: LazyLock> = LazyLock::new(build_command_map); -// Types of command arguments recognized by the parser +/// The type of functions that compile individual commands +type CommandHandler = fn( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, + context: &mut ProcessingContext, +) -> UResult<()>; + +// Special handling required by a particulare command #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum CommandArgs { - Empty, // d D g G h H n N p P x = \0 - Text, // a c i - NonSelect, // ! - BeginGroup, // { - EndGroup, // } - Label, // b t : - Number, // l q Q (GNU extension) - ReadFile, // r - WriteFile, // w - Substitute, // s - Transliterate, // y +enum CommandHandling { + NonSelect, // ! + EndGroup, // } + Other, // All other commands } // Command specification #[derive(Debug, Clone, Copy)] struct CommandSpec { - code: char, // Command letter used by sed - n_addr: usize, // Number of supported addresses - args: CommandArgs, // Type of command arguments + code: char, // Command letter used by sed + n_addr: usize, // Number of supported addresses + handler: CommandHandler, // Argument-specific command compilation handler + handling: CommandHandling, // Special type of command handling } // Build the command specification map (char -> CommandSpec) fn build_command_map() -> HashMap { let formats = [ + CommandSpec { + code: 'd', + n_addr: 2, + handler: compile_empty_command, + handling: CommandHandling::Other, + }, + CommandSpec { + code: ':', + n_addr: 0, + handler: compile_label_command, + handling: CommandHandling::Other, + }, CommandSpec { code: '{', n_addr: 2, - args: CommandArgs::BeginGroup, + handler: compile_block_command, + handling: CommandHandling::Other, }, CommandSpec { code: '}', n_addr: 0, - args: CommandArgs::EndGroup, + handler: compile_placeholder, + handling: CommandHandling::EndGroup, }, CommandSpec { code: 'a', n_addr: 1, - args: CommandArgs::Text, + handler: compile_text_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'b', n_addr: 2, - args: CommandArgs::Label, + handler: compile_label_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'c', n_addr: 2, - args: CommandArgs::Text, + handler: compile_text_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'd', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'D', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'g', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'G', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'h', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'H', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'i', n_addr: 1, - args: CommandArgs::Text, + handler: compile_text_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'l', n_addr: 2, - args: CommandArgs::Number, + handler: compile_number_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'n', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'N', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'p', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'P', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'q', n_addr: 1, - args: CommandArgs::Number, + handler: compile_number_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'Q', // GNU extension n_addr: 1, - args: CommandArgs::Number, + handler: compile_number_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'r', n_addr: 1, - args: CommandArgs::ReadFile, + handler: compile_read_file_command, + handling: CommandHandling::Other, }, CommandSpec { code: 's', n_addr: 2, - args: CommandArgs::Substitute, + handler: compile_subst_command, + handling: CommandHandling::Other, }, CommandSpec { code: 't', n_addr: 2, - args: CommandArgs::Label, + handler: compile_label_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'w', n_addr: 2, - args: CommandArgs::WriteFile, + handler: compile_write_file_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'x', n_addr: 2, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, CommandSpec { code: 'y', n_addr: 2, - args: CommandArgs::Transliterate, + handler: compile_trans_command, + handling: CommandHandling::Other, }, CommandSpec { code: '!', n_addr: 2, - args: CommandArgs::NonSelect, + handler: compile_placeholder, + handling: CommandHandling::NonSelect, }, CommandSpec { code: ':', n_addr: 0, - args: CommandArgs::Label, + handler: compile_label_command, + handling: CommandHandling::Other, }, CommandSpec { code: '=', n_addr: 1, - args: CommandArgs::Empty, + handler: compile_empty_command, + handling: CommandHandling::Other, }, ]; - formats.into_iter().map(|f| (f.code, f)).collect() } @@ -405,14 +445,14 @@ fn compile_sequence( let mut cmd_spec = get_cmd_spec(lines, line, n_addr)?; // The ! command shall be followed by another one - match cmd_spec.args { - CommandArgs::NonSelect => { + match cmd_spec.handling { + CommandHandling::NonSelect => { line.advance(); line.eat_spaces(); cmd.borrow_mut().non_select = true; cmd_spec = get_cmd_spec(lines, line, n_addr)?; } - CommandArgs::EndGroup => { + CommandHandling::EndGroup => { if context.parsed_block_nesting == 0 { return compilation_error(lines, line, "unexpected `}'"); } @@ -426,7 +466,12 @@ fn compile_sequence( _ => (), } - compile_command(lines, line, &mut cmd, cmd_spec, context)?; + // Compile the command according to its specification. + let mut mcmd = cmd.borrow_mut(); + mcmd.code = line.current(); + (cmd_spec.handler)(lines, line, &mut mcmd, context)?; + drop(mcmd); + if let Some(ref t) = tail { // there's already a tail: link it t.borrow_mut().next = Some(cmd.clone()); @@ -832,11 +877,12 @@ pub fn compile_replacement( } } +// Handles s fn compile_subst_command( lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, - context: &ProcessingContext, + context: &mut ProcessingContext, ) -> UResult<()> { line.advance(); // move past 's' @@ -889,10 +935,12 @@ fn compile_subst_command( parse_command_ending(lines, line, cmd) } +// Handles y fn compile_trans_command( lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, + _context: &mut ProcessingContext, ) -> UResult<()> { line.advance(); // move past 'y' @@ -1016,12 +1064,24 @@ pub fn compile_subst_flags( Ok(()) } +/// Placeholder for commands that are handled otherwise +// USed for ! } +fn compile_placeholder( + _lines: &mut ScriptLineProvider, + _line: &mut ScriptCharProvider, + _cmd: &mut Command, + _context: &mut ProcessingContext, +) -> UResult<()> { + Ok(()) +} + /// Compile a command that doesn't take any arguments // Handles d D g G h H l n N p P q x = fn compile_empty_command( - lines: &ScriptLineProvider, + lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, + _context: &mut ProcessingContext, ) -> UResult<()> { line.advance(); // Skip the command character line.eat_spaces(); // Skip any trailing whitespace @@ -1029,10 +1089,51 @@ fn compile_empty_command( parse_command_ending(lines, line, cmd) } +// Handles r +fn compile_read_file_command( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, + _context: &mut ProcessingContext, +) -> UResult<()> { + let path = read_file_path(lines, line)?; + cmd.data = CommandData::Path(path); + Ok(()) +} + +// Handles w +fn compile_write_file_command( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, + _context: &mut ProcessingContext, +) -> UResult<()> { + let location = ScriptLocation::at_position(lines, line); + let path = read_file_path(lines, line)?; + cmd.data = CommandData::NamedWriter(NamedWriter::new(path, location)?); + Ok(()) +} + +// Handles { +fn compile_block_command( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, + context: &mut ProcessingContext, +) -> UResult<()> { + line.advance(); // move past '{' + context.parsed_block_nesting += 1; + let block_body = compile_sequence(lines, line, context)?; + cmd.data = CommandData::BranchTarget(block_body); + Ok(()) +} + +// Handles b, t, : fn compile_label_command( - lines: &ScriptLineProvider, + lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, + _context: &mut ProcessingContext, ) -> UResult<()> { /// Return true if `c` is in the POSIX portable filename character set. fn is_portable_filename_char(c: char) -> bool { @@ -1071,10 +1172,13 @@ fn output_width() -> usize { } } +/// Compile commands that take a number as an argument. +// Handles l q Q fn compile_number_command( - lines: &ScriptLineProvider, + lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, + _context: &mut ProcessingContext, ) -> UResult<()> { line.advance(); // Skip the command character line.eat_spaces(); // Skip any leading whitespace @@ -1098,10 +1202,13 @@ fn compile_number_command( parse_command_ending(lines, line, cmd) } +/// Compile commands that take text as an argument. +// Handles a, c, i fn compile_text_command( lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, + _context: &mut ProcessingContext, ) -> UResult<()> { line.advance(); // Skip the command character. @@ -1143,71 +1250,6 @@ fn compile_text_command( Ok(()) } -// Compile the specified command -fn compile_command( - lines: &mut ScriptLineProvider, - line: &mut ScriptCharProvider, - cmd: &mut Rc>, - cmd_spec: &'static CommandSpec, - context: &mut ProcessingContext, -) -> UResult<()> { - let mut cmd = cmd.borrow_mut(); - cmd.code = line.current(); - - match cmd_spec.args { - CommandArgs::BeginGroup => { - // { - line.advance(); // move past '{' - context.parsed_block_nesting += 1; - let block_body = compile_sequence(lines, line, context)?; - cmd.data = CommandData::BranchTarget(block_body); - } - CommandArgs::EndGroup => { // } - // Implemented at a higher level. - } - CommandArgs::Empty => { - // d D g G h H l n N p P q x = - return compile_empty_command(lines, line, &mut cmd); - } - CommandArgs::Label => { - // b t : - compile_label_command(lines, line, &mut cmd)?; - } - CommandArgs::NonSelect => { // ! - // Implemented at a higher level. - } - CommandArgs::Number => { - // l q Q - compile_number_command(lines, line, &mut cmd)?; - } - CommandArgs::Substitute => { - // s - return compile_subst_command(lines, line, &mut cmd, context); - } - CommandArgs::Transliterate => { - // y - return compile_trans_command(lines, line, &mut cmd); - } - CommandArgs::Text => { - // a c i - compile_text_command(lines, line, &mut cmd)?; - } - CommandArgs::ReadFile => { - // r - let path = read_file_path(lines, line)?; - cmd.data = CommandData::Path(path); - } - CommandArgs::WriteFile => { - // w - let location = ScriptLocation::at_position(lines, line); - let path = read_file_path(lines, line)?; - cmd.data = CommandData::NamedWriter(NamedWriter::new(path, location)?); - } - } - - Ok(()) -} - // Return the specification for the command letter at the current line position // checking for diverse errors. fn get_cmd_spec( @@ -1280,70 +1322,28 @@ mod tests { fn test_lookup_empty_command() { let cmd = lookup_command('d').unwrap(); assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::Empty); + assert_eq!(cmd.handling, CommandHandling::Other); } #[test] fn test_lookup_text_command() { let cmd = lookup_command('a').unwrap(); assert_eq!(cmd.n_addr, 1); - assert_eq!(cmd.args, CommandArgs::Text); + assert_eq!(cmd.handling, CommandHandling::Other); } #[test] fn test_lookup_nonselect_command() { let cmd = lookup_command('!').unwrap(); assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::NonSelect); - } - - #[test] - fn test_lookup_group_command() { - let cmd = lookup_command('{').unwrap(); - assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::BeginGroup); + assert_eq!(cmd.handling, CommandHandling::NonSelect); } #[test] fn test_lookup_endgroup_command() { let cmd = lookup_command('}').unwrap(); assert_eq!(cmd.n_addr, 0); - assert_eq!(cmd.args, CommandArgs::EndGroup); - } - - #[test] - fn test_lookup_label_command() { - let cmd = lookup_command(':').unwrap(); - assert_eq!(cmd.n_addr, 0); - assert_eq!(cmd.args, CommandArgs::Label); - } - - #[test] - fn test_lookup_readfile_command() { - let cmd = lookup_command('r').unwrap(); - assert_eq!(cmd.n_addr, 1); - assert_eq!(cmd.args, CommandArgs::ReadFile); - } - - #[test] - fn test_lookup_writefile_command() { - let cmd = lookup_command('w').unwrap(); - assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::WriteFile); - } - - #[test] - fn test_lookup_substitute_command() { - let cmd = lookup_command('s').unwrap(); - assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::Substitute); - } - - #[test] - fn test_lookup_translate_command() { - let cmd = lookup_command('y').unwrap(); - assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::Transliterate); + assert_eq!(cmd.handling, CommandHandling::EndGroup); } #[test] @@ -2093,8 +2093,10 @@ mod tests { fn test_compile_subst_invalid_delimiter_backslash() { let (mut lines, mut chars) = make_providers("s\\foo\\bar\\"); let mut cmd = Command::default(); + let mut context = ctx(); - let err = compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap_err(); + let err = + compile_subst_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap_err(); assert!( err.to_string() .contains("substitute pattern cannot be delimited") @@ -2105,8 +2107,10 @@ mod tests { fn test_compile_subst_extra_characters_at_end() { let (mut lines, mut chars) = make_providers("s/foo/bar/x"); let mut cmd = Command::default(); + let mut context = ctx(); - let err = compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap_err(); + let err = + compile_subst_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap_err(); assert!(err.to_string().contains("invalid substitute flag")); } @@ -2114,8 +2118,9 @@ mod tests { fn test_compile_subst_semicolon_indicates_continue() { let (mut lines, mut chars) = make_providers("s/foo/bar/;"); let mut cmd = Command::default(); + let mut context = ctx(); - compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap(); + compile_subst_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); if let CommandData::Substitution(subst) = &cmd.data { assert_eq!(subst.replacement.parts.len(), 1); @@ -2128,8 +2133,9 @@ mod tests { fn test_compile_subst_sets_command_data() { let (mut lines, mut chars) = make_providers("s/foo/bar/"); let mut cmd = Command::default(); + let mut context = ctx(); - compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap(); + compile_subst_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Substitution(subst) => { assert_eq!(subst.replacement.parts.len(), 1); @@ -2145,8 +2151,10 @@ mod tests { fn test_compile_subst_invalid_group_reference() { let (mut lines, mut chars) = make_providers(r"s/f(o)o/\2/"); let mut cmd = Command::default(); + let mut context = ctx(); - let err = compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap_err(); + let err = + compile_subst_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap_err(); assert!(err.to_string().contains("invalid reference \\2")); } @@ -2337,8 +2345,9 @@ mod tests { fn test_compile_label_command() { let (mut lines, mut chars) = make_providers(": foo"); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); - compile_label_command(&mut lines, &mut chars, &mut cmd).unwrap(); + compile_label_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Label(label) => { let name = label.clone().unwrap(); @@ -2352,9 +2361,11 @@ mod tests { fn test_compile_missing_label_command() { let (mut lines, mut chars) = make_providers(": ;"); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); cmd.code = ':'; - let err = compile_label_command(&mut lines, &mut chars, &mut cmd).unwrap_err(); + let err = + compile_label_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap_err(); assert!(err.to_string().contains("empty label")); } @@ -2362,9 +2373,10 @@ mod tests { fn test_compile_empty_label_command() { let (mut lines, mut chars) = make_providers("b ;"); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); cmd.code = 'b'; - compile_label_command(&mut lines, &mut chars, &mut cmd).unwrap(); + compile_label_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Label(label) => { assert!(label.is_none()); @@ -2560,8 +2572,9 @@ mod tests { let mut chars = make_char_provider("a\\"); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); - compile_text_command(&mut lines, &mut chars, &mut cmd).unwrap(); + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { assert_eq!(text, "line1\n"); @@ -2575,8 +2588,9 @@ mod tests { let mut chars = make_char_provider("a \\ "); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); - compile_text_command(&mut lines, &mut chars, &mut cmd).unwrap(); + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { assert_eq!(text, "line1\n"); @@ -2590,8 +2604,9 @@ mod tests { let mut chars = make_char_provider("a\\"); let mut lines = make_line_provider(&["line1\\", "line2"]); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); - compile_text_command(&mut lines, &mut chars, &mut cmd).unwrap(); + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { assert_eq!(text, "line1\nline2\n"); @@ -2605,8 +2620,9 @@ mod tests { let mut chars = make_char_provider("a"); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); - let result = compile_text_command(&mut lines, &mut chars, &mut cmd); + let result = compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context); assert!(result.is_err()); let err = result.unwrap_err().to_string(); assert!(err.contains("expects \\ followed by text")); @@ -2617,8 +2633,9 @@ mod tests { let mut chars = make_char_provider("a \\ foo"); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); + let mut context = ProcessingContext::default(); - let result = compile_text_command(&mut lines, &mut chars, &mut cmd); + let result = compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context); assert!(result.is_err()); let err = result.unwrap_err().to_string(); assert!(err.contains("extra characters after \\")); From 3fbceaca8fdfc1a1bc6547545eab0122b0566bcb Mon Sep 17 00:00:00 2001 From: Diomidis Spinellis Date: Tue, 22 Jul 2025 13:02:05 +0300 Subject: [PATCH 2/2] Refactor to remove command_handling field Have each command return its required handling. --- src/uu/sed/src/compiler.rs | 434 +++++++++++++------------------------ 1 file changed, 156 insertions(+), 278 deletions(-) diff --git a/src/uu/sed/src/compiler.rs b/src/uu/sed/src/compiler.rs index 1cfcf588..a0475768 100644 --- a/src/uu/sed/src/compiler.rs +++ b/src/uu/sed/src/compiler.rs @@ -21,18 +21,21 @@ use crate::script_line_provider::{ScriptLineProvider, ScriptValue}; use std::borrow::Cow; use std::cell::RefCell; -use std::collections::HashMap; use std::mem; use std::path::PathBuf; use std::rc::Rc; -use std::sync::LazyLock; use terminal_size::{Width, terminal_size}; use uucore::error::{UResult, USimpleError}; const DEFAULT_OUTPUT_WIDTH: usize = 60; -// A global, immutable map of command properties, initialized on first access -static CMD_MAP: LazyLock> = LazyLock::new(build_command_map); +// Handling required after processing a command +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CommandHandling { + GetNext, // Get next command and process that: ! + Return, // Return from the sequence parser: } + Continue, // Continue sequence parsing: all other commands +} /// The type of functions that compile individual commands type CommandHandler = fn( @@ -40,210 +43,13 @@ type CommandHandler = fn( line: &mut ScriptCharProvider, cmd: &mut Command, context: &mut ProcessingContext, -) -> UResult<()>; - -// Special handling required by a particulare command -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum CommandHandling { - NonSelect, // ! - EndGroup, // } - Other, // All other commands -} +) -> UResult; // Command specification #[derive(Debug, Clone, Copy)] struct CommandSpec { - code: char, // Command letter used by sed - n_addr: usize, // Number of supported addresses - handler: CommandHandler, // Argument-specific command compilation handler - handling: CommandHandling, // Special type of command handling -} - -// Build the command specification map (char -> CommandSpec) -fn build_command_map() -> HashMap { - let formats = [ - CommandSpec { - code: 'd', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: ':', - n_addr: 0, - handler: compile_label_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: '{', - n_addr: 2, - handler: compile_block_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: '}', - n_addr: 0, - handler: compile_placeholder, - handling: CommandHandling::EndGroup, - }, - CommandSpec { - code: 'a', - n_addr: 1, - handler: compile_text_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'b', - n_addr: 2, - handler: compile_label_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'c', - n_addr: 2, - handler: compile_text_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'd', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'D', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'g', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'G', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'h', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'H', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'i', - n_addr: 1, - handler: compile_text_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'l', - n_addr: 2, - handler: compile_number_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'n', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'N', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'p', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'P', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'q', - n_addr: 1, - handler: compile_number_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'Q', // GNU extension - n_addr: 1, - handler: compile_number_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'r', - n_addr: 1, - handler: compile_read_file_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 's', - n_addr: 2, - handler: compile_subst_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 't', - n_addr: 2, - handler: compile_label_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'w', - n_addr: 2, - handler: compile_write_file_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'x', - n_addr: 2, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: 'y', - n_addr: 2, - handler: compile_trans_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: '!', - n_addr: 2, - handler: compile_placeholder, - handling: CommandHandling::NonSelect, - }, - CommandSpec { - code: ':', - n_addr: 0, - handler: compile_label_command, - handling: CommandHandling::Other, - }, - CommandSpec { - code: '=', - n_addr: 1, - handler: compile_empty_command, - handling: CommandHandling::Other, - }, - ]; - formats.into_iter().map(|f| (f.code, f)).collect() + n_addr: usize, // Number of supported addresses + handler: CommandHandler, // Argument-specific command compilation handler } /// Compile the scripts into an executable data structure. @@ -442,35 +248,21 @@ fn compile_sequence( let mut cmd = Rc::new(RefCell::new(Command::at_position(lines, line))); let n_addr = compile_address_range(lines, line, &mut cmd, context)?; line.eat_spaces(); - let mut cmd_spec = get_cmd_spec(lines, line, n_addr)?; + let mut cmd_spec = get_verified_cmd_spec(lines, line, n_addr)?; - // The ! command shall be followed by another one - match cmd_spec.handling { - CommandHandling::NonSelect => { - line.advance(); - line.eat_spaces(); - cmd.borrow_mut().non_select = true; - cmd_spec = get_cmd_spec(lines, line, n_addr)?; - } - CommandHandling::EndGroup => { - if context.parsed_block_nesting == 0 { - return compilation_error(lines, line, "unexpected `}'"); - } - context.parsed_block_nesting -= 1; - line.advance(); - line.eat_spaces(); - let mut cmd_ref = cmd.borrow_mut(); - parse_command_ending(lines, line, &mut cmd_ref)?; - return Ok(head); + // Compile the command according to its specification. + let mut cmd_mut = cmd.borrow_mut(); + cmd_mut.code = line.current(); + match (cmd_spec.handler)(lines, line, &mut cmd_mut, context)? { + CommandHandling::GetNext => { + cmd_spec = get_verified_cmd_spec(lines, line, n_addr)?; + cmd_mut.code = line.current(); + (cmd_spec.handler)(lines, line, &mut cmd_mut, context)?; } - _ => (), + CommandHandling::Return => return Ok(head), + CommandHandling::Continue => (), } - - // Compile the command according to its specification. - let mut mcmd = cmd.borrow_mut(); - mcmd.code = line.current(); - (cmd_spec.handler)(lines, line, &mut mcmd, context)?; - drop(mcmd); + drop(cmd_mut); if let Some(ref t) = tail { // there's already a tail: link it @@ -883,7 +675,7 @@ fn compile_subst_command( line: &mut ScriptCharProvider, cmd: &mut Command, context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { line.advance(); // move past 's' let delimiter = line.current(); @@ -932,7 +724,8 @@ fn compile_subst_command( cmd.data = CommandData::Substitution(subst); - parse_command_ending(lines, line, cmd) + parse_command_ending(lines, line, cmd)?; + Ok(CommandHandling::Continue) } // Handles y @@ -941,7 +734,7 @@ fn compile_trans_command( line: &mut ScriptCharProvider, cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { line.advance(); // move past 'y' let delimiter = line.current(); @@ -967,7 +760,8 @@ fn compile_trans_command( cmd.data = CommandData::Transliteration(transliteration); line.advance(); // move past last delimiter - parse_command_ending(lines, line, cmd) + parse_command_ending(lines, line, cmd)?; + Ok(CommandHandling::Continue) } /// Parse the substitution command's optional flags @@ -1064,15 +858,37 @@ pub fn compile_subst_flags( Ok(()) } -/// Placeholder for commands that are handled otherwise -// USed for ! } -fn compile_placeholder( - _lines: &mut ScriptLineProvider, - _line: &mut ScriptCharProvider, - _cmd: &mut Command, +// Handles } +fn compile_end_group_command( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, + context: &mut ProcessingContext, +) -> UResult { + if context.parsed_block_nesting == 0 { + return compilation_error(lines, line, "unexpected `}'"); + } + context.parsed_block_nesting -= 1; + line.advance(); + line.eat_spaces(); + parse_command_ending(lines, line, cmd)?; + Ok(CommandHandling::Return) +} + +// Handles ! +fn compile_negation_command( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { - Ok(()) +) -> UResult { + line.advance(); + line.eat_spaces(); + if cmd.non_select { + return compilation_error(lines, line, "negation already applied"); + } + cmd.non_select = true; + Ok(CommandHandling::GetNext) } /// Compile a command that doesn't take any arguments @@ -1082,11 +898,12 @@ fn compile_empty_command( line: &mut ScriptCharProvider, cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { line.advance(); // Skip the command character line.eat_spaces(); // Skip any trailing whitespace - parse_command_ending(lines, line, cmd) + parse_command_ending(lines, line, cmd)?; + Ok(CommandHandling::Continue) } // Handles r @@ -1095,10 +912,10 @@ fn compile_read_file_command( line: &mut ScriptCharProvider, cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { let path = read_file_path(lines, line)?; cmd.data = CommandData::Path(path); - Ok(()) + Ok(CommandHandling::Continue) } // Handles w @@ -1107,11 +924,11 @@ fn compile_write_file_command( line: &mut ScriptCharProvider, cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { let location = ScriptLocation::at_position(lines, line); let path = read_file_path(lines, line)?; cmd.data = CommandData::NamedWriter(NamedWriter::new(path, location)?); - Ok(()) + Ok(CommandHandling::Continue) } // Handles { @@ -1120,12 +937,12 @@ fn compile_block_command( line: &mut ScriptCharProvider, cmd: &mut Command, context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { line.advance(); // move past '{' context.parsed_block_nesting += 1; let block_body = compile_sequence(lines, line, context)?; cmd.data = CommandData::BranchTarget(block_body); - Ok(()) + Ok(CommandHandling::Continue) } // Handles b, t, : @@ -1134,7 +951,7 @@ fn compile_label_command( line: &mut ScriptCharProvider, cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { /// Return true if `c` is in the POSIX portable filename character set. fn is_portable_filename_char(c: char) -> bool { c.is_ascii_alphanumeric() // A–Z, a–z, 0–9 @@ -1160,7 +977,8 @@ fn compile_label_command( } line.eat_spaces(); // Skip any trailing whitespace - parse_command_ending(lines, line, cmd) + parse_command_ending(lines, line, cmd)?; + Ok(CommandHandling::Continue) } /// Return the width of the command's terminal or a default. @@ -1179,7 +997,7 @@ fn compile_number_command( line: &mut ScriptCharProvider, cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { line.advance(); // Skip the command character line.eat_spaces(); // Skip any leading whitespace @@ -1199,7 +1017,8 @@ fn compile_number_command( } line.eat_spaces(); // Skip any trailing whitespace - parse_command_ending(lines, line, cmd) + parse_command_ending(lines, line, cmd)?; + Ok(CommandHandling::Continue) } /// Compile commands that take text as an argument. @@ -1209,7 +1028,7 @@ fn compile_text_command( line: &mut ScriptCharProvider, cmd: &mut Command, _context: &mut ProcessingContext, -) -> UResult<()> { +) -> UResult { line.advance(); // Skip the command character. line.eat_spaces(); // Skip any leading whitespace. @@ -1247,22 +1066,22 @@ fn compile_text_command( } } cmd.data = CommandData::Text(Cow::Owned(text)); - Ok(()) + Ok(CommandHandling::Continue) } // Return the specification for the command letter at the current line position // checking for diverse errors. -fn get_cmd_spec( +fn get_verified_cmd_spec( lines: &ScriptLineProvider, line: &ScriptCharProvider, n_addr: usize, -) -> UResult<&'static CommandSpec> { +) -> UResult { if line.eol() { return compilation_error(lines, line, "command expected"); } let ch = line.current(); - let opt_cmd_spec = lookup_command(ch); + let opt_cmd_spec = get_cmd_spec(ch); if opt_cmd_spec.is_none() { return compilation_error(lines, line, format!("invalid command code `{ch}'")); @@ -1283,9 +1102,72 @@ fn get_cmd_spec( Ok(cmd_spec) } -// Look up a command format by its command code. -fn lookup_command(cmd: char) -> Option<&'static CommandSpec> { - CMD_MAP.get(&cmd) +// Look up a command addresses and handler by its command code. +fn get_cmd_spec(cmd_code: char) -> Option { + match cmd_code { + '!' => Some(CommandSpec { + n_addr: 2, + handler: compile_negation_command, + }), + '=' => Some(CommandSpec { + n_addr: 1, + handler: compile_empty_command, + }), + ':' => Some(CommandSpec { + n_addr: 0, + handler: compile_label_command, + }), + '{' => Some(CommandSpec { + n_addr: 2, + handler: compile_block_command, + }), + '}' => Some(CommandSpec { + n_addr: 0, + handler: compile_end_group_command, + }), + 'a' | 'i' => Some(CommandSpec { + n_addr: 1, + handler: compile_text_command, + }), + 'b' | 't' => Some(CommandSpec { + n_addr: 2, + handler: compile_label_command, + }), + 'c' => Some(CommandSpec { + n_addr: 2, + handler: compile_text_command, + }), + 'd' | 'D' | 'g' | 'G' | 'h' | 'H' | 'n' | 'N' | 'p' | 'P' | 'x' => Some(CommandSpec { + n_addr: 2, + handler: compile_empty_command, + }), + 'l' => Some(CommandSpec { + n_addr: 2, + handler: compile_number_command, + }), + // Q is a GNU extension + 'q' | 'Q' => Some(CommandSpec { + n_addr: 1, + handler: compile_number_command, + }), + 'r' => Some(CommandSpec { + n_addr: 1, + handler: compile_read_file_command, + }), + 's' => Some(CommandSpec { + n_addr: 2, + handler: compile_subst_command, + }), + 'w' => Some(CommandSpec { + n_addr: 2, + handler: compile_write_file_command, + }), + 'y' => Some(CommandSpec { + n_addr: 2, + handler: compile_trans_command, + }), + _ => None, + } } #[cfg(test)] @@ -1317,38 +1199,34 @@ mod tests { ProcessingContext::default() } - // lookup_command + // get_cmd_spec #[test] fn test_lookup_empty_command() { - let cmd = lookup_command('d').unwrap(); + let cmd = get_cmd_spec('d').unwrap(); assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.handling, CommandHandling::Other); } #[test] fn test_lookup_text_command() { - let cmd = lookup_command('a').unwrap(); + let cmd = get_cmd_spec('a').unwrap(); assert_eq!(cmd.n_addr, 1); - assert_eq!(cmd.handling, CommandHandling::Other); } #[test] fn test_lookup_nonselect_command() { - let cmd = lookup_command('!').unwrap(); + let cmd = get_cmd_spec('!').unwrap(); assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.handling, CommandHandling::NonSelect); } #[test] fn test_lookup_endgroup_command() { - let cmd = lookup_command('}').unwrap(); + let cmd = get_cmd_spec('}').unwrap(); assert_eq!(cmd.n_addr, 0); - assert_eq!(cmd.handling, CommandHandling::EndGroup); } #[test] fn test_lookup_invalid_command() { - let result = lookup_command('Z'); + let result = get_cmd_spec('Z'); assert!(result.is_none()); } @@ -1395,12 +1273,12 @@ mod tests { assert_eq!(msg, "input.txt:3:1: error: invalid command 'x'"); } - // get_cmd_spec + // get_verified_cmd_spec #[test] fn test_missing_command_character() { let lines = ScriptLineProvider::with_active_state("test.sed", 1); let line = char_provider_from(""); - let result = get_cmd_spec(&lines, &line, 0); + let result = get_verified_cmd_spec(&lines, &line, 0); assert!(result.is_err()); let msg = result.unwrap_err().to_string(); @@ -1411,7 +1289,7 @@ mod tests { fn test_invalid_command_character() { let lines = ScriptLineProvider::with_active_state("script.sed", 2); let line = char_provider_from("@"); - let result = get_cmd_spec(&lines, &line, 0); + let result = get_verified_cmd_spec(&lines, &line, 0); assert!(result.is_err()); let msg = result.unwrap_err().to_string(); @@ -1422,7 +1300,7 @@ mod tests { fn test_too_many_addresses() { let lines = ScriptLineProvider::with_active_state("input.sed", 3); let line = char_provider_from("q"); // q takes one address - let result = get_cmd_spec(&lines, &line, 2); + let result = get_verified_cmd_spec(&lines, &line, 2); assert!(result.is_err()); let msg = result.unwrap_err().to_string(); @@ -1435,11 +1313,11 @@ mod tests { fn test_valid_command_spec() { let lines = ScriptLineProvider::with_active_state("input.sed", 4); let line = char_provider_from("a"); // valid command - let result = get_cmd_spec(&lines, &line, 1); + let result = get_verified_cmd_spec(&lines, &line, 1); assert!(result.is_ok()); let spec = result.unwrap(); - assert_eq!(spec.code, 'a'); + assert_eq!(spec.n_addr, 1); } // parse_number