diff --git a/src/uu/sed/src/command.rs b/src/uu/sed/src/command.rs index acaca06b..35e8ad19 100644 --- a/src/uu/sed/src/command.rs +++ b/src/uu/sed/src/command.rs @@ -8,15 +8,18 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::error_handling::{ScriptLocation, runtime_error}; use crate::fast_regex::{Captures, Match, Regex}; use crate::named_writer::NamedWriter; +use crate::script_char_provider::ScriptCharProvider; +use crate::script_line_provider::ScriptLineProvider; use std::borrow::Cow; use std::cell::RefCell; use std::collections::HashMap; use std::path::PathBuf; // For file descriptors and equivalent use std::rc::Rc; -use uucore::error::{UResult, USimpleError}; +use uucore::error::UResult; #[derive(Debug, Default, Clone)] /// Compilation and processing options provided mostly through the @@ -38,6 +41,8 @@ pub struct ProcessingContext { pub null_data: bool, // Other context + /// Currently processed input file name (not script) in quoted form + pub input_name: String, /// Current input line number pub line_number: usize, /// True if this is the last address of a range @@ -80,13 +85,6 @@ pub struct StringSpace { pub has_newline: bool, // True if \n-terminated } -#[derive(Debug, PartialEq)] -/// The specification of a script: through a string or a file -pub enum ScriptValue { - StringVal(String), - PathVal(PathBuf), -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] /// Types of address specifications that precede commands pub enum AddressType { @@ -152,21 +150,21 @@ impl ReplacementTemplate { /// Apply the template to the given RE captures. /// Example: /// let result = regex.replace_all(input, |caps: &Captures| { - /// template.apply_captures(caps) }); + /// template.apply_captures(&command, caps) }); /// Returns an error if a backreference in the template was not matched by the RE. - pub fn apply_captures(&self, caps: &Captures) -> UResult { + pub fn apply_captures(&self, command: &Command, caps: &Captures) -> UResult { let mut result = String::new(); // Invalid group numbers may end here through (unkown at compile time) // reused REs. if self.max_group_number > caps.len() - 1 { - return Err(USimpleError::new( - 2, + return runtime_error( + &command.location, format!( - "invalid reference \\{} on `s' command's RHS", + "invalid reference \\{} on command's RHS", self.max_group_number ), - )); + ); } for part in &self.parts { @@ -285,6 +283,7 @@ pub struct Command { pub start_line: Option, // Start line number (or None) pub data: CommandData, // Command-specific data pub next: Option>>, // Pointer to next command + pub location: ScriptLocation, // Command's definition location } impl Default for Command { @@ -297,6 +296,17 @@ impl Default for Command { start_line: None, data: CommandData::None, next: None, + location: ScriptLocation::default(), + } + } +} + +impl Command { + /// Construct with position information from the given providers. + pub fn at_position(lines: &ScriptLineProvider, line: &ScriptCharProvider) -> Self { + Command { + location: ScriptLocation::at_position(lines, line), + ..Default::default() } } } @@ -306,15 +316,14 @@ impl Default for Command { /// After parsing, t, b Label elements are converted into BranchTarget ones. pub enum CommandData { None, - Block(Option>>), // Commands for '{' - BranchTarget(Option>>), // Commands for 'b', 't' - Label(Option), // Label name for 'b', 't', ':' - Path(PathBuf), // File path for 'r' - NamedWriter(Rc>), // File output for 'w' - Number(usize), // Number for 'l', 'q', 'Q' (GNU) - Substitution(Box), // Substitute command 's' - Text(Cow<'static, str>), // Text for 'a', 'c', 'i' - Transliteration(Box), // Transliteration command 'y' + BranchTarget(Option>>), // Commands for 'b', 't', '{' + Label(Option), // Label name for 'b', 't', ':' + Path(PathBuf), // File path for 'r' + NamedWriter(Rc>), // File output for 'w' + Number(usize), // Number for 'l', 'q', 'Q' (GNU) + Substitution(Box), // Substitute command 's' + Text(Cow<'static, str>), // Text for 'a', 'c', 'i' + Transliteration(Box), // Transliteration command 'y' } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -353,8 +362,9 @@ mod tests { let template = ReplacementTemplate::default(); let input = &mut IOChunk::from_str("foo"); let caps = caps_for("foo", input); + let cmd = Command::default(); - let result = template.apply_captures(&caps).unwrap(); + let result = template.apply_captures(&cmd, &caps).unwrap(); assert_eq!(result, ""); } @@ -364,8 +374,9 @@ mod tests { let template = ReplacementTemplate::new(vec![ReplacementPart::Literal("hello".into())]); let input = &mut IOChunk::from_str("abc"); let caps = caps_for("abc", input); + let cmd = Command::default(); - let result = template.apply_captures(&caps).unwrap(); + let result = template.apply_captures(&cmd, &caps).unwrap(); assert_eq!(result, "hello"); } @@ -378,8 +389,9 @@ mod tests { ]); let input = &mut IOChunk::from_str("foo42"); let caps = caps_for(r"foo\d+", input); + let cmd = Command::default(); - let result = template.apply_captures(&caps).unwrap(); + let result = template.apply_captures(&cmd, &caps).unwrap(); assert_eq!(result, "got: foo42"); } @@ -392,8 +404,9 @@ mod tests { ]); let input = &mut IOChunk::from_str("foo42"); let caps = caps_for(r"foo(\d+)", input); + let cmd = Command::default(); - let result = template.apply_captures(&caps).unwrap(); + let result = template.apply_captures(&cmd, &caps).unwrap(); assert_eq!(result, "number: 42"); } @@ -408,8 +421,9 @@ mod tests { ]); let input = &mut IOChunk::from_str("x:123"); let caps = caps_for(r"(\w+):(\d+)", input); + let cmd = Command::default(); - let result = template.apply_captures(&caps).unwrap(); + let result = template.apply_captures(&cmd, &caps).unwrap(); assert_eq!(result, "key: x, value: 123"); } @@ -424,8 +438,9 @@ mod tests { ]); let input = &mut IOChunk::from_str("x:123"); let caps = caps_for(r"(\w+):(\d+)", input); + let cmd = Command::default(); - let result = template.apply_captures(&caps); + let result = template.apply_captures(&cmd, &caps); assert!(result.is_err()); let msg = result.unwrap_err().to_string(); diff --git a/src/uu/sed/src/compiler.rs b/src/uu/sed/src/compiler.rs index 9246a49d..0574ecf1 100644 --- a/src/uu/sed/src/compiler.rs +++ b/src/uu/sed/src/compiler.rs @@ -10,15 +10,14 @@ use crate::command::{ Address, AddressType, AddressValue, Command, CommandData, ProcessingContext, ReplacementPart, - ReplacementTemplate, ScriptValue, Substitution, Transliteration, -}; -use crate::delimited_parser::{ - compilation_error, parse_char_escape, parse_regex, parse_transliteration, + ReplacementTemplate, Substitution, Transliteration, }; +use crate::delimited_parser::{parse_char_escape, parse_regex, parse_transliteration}; +use crate::error_handling::{ScriptLocation, compilation_error, semantic_error}; use crate::fast_regex::Regex; use crate::named_writer::NamedWriter; use crate::script_char_provider::ScriptCharProvider; -use crate::script_line_provider::ScriptLineProvider; +use crate::script_line_provider::{ScriptLineProvider, ScriptValue}; use std::borrow::Cow; use std::cell::RefCell; @@ -238,9 +237,9 @@ pub fn compile( } /// For every Command in the top-level `head` chain, look for -/// `CommandData::Block(Some(sub_head))`. Recursively patch -/// the sub-chain, then splice its tail back to the original -/// “next” pointer of the *parent* (falling back to its own +/// `CommandData::BranchTarget(Some(sub_head))` '{' commands. +/// Recursively patch the sub-chain, then splice its tail back to the +/// original “next” pointer of the *parent* (falling back to its own /// parent_next if its own next was `None`). fn patch_block_endings(head: Option>>) { fn patch_block_endings_to_parent( @@ -258,22 +257,24 @@ fn patch_block_endings(head: Option>>) { let splice_target = own_next.clone().or(parent_next.clone()); // If it has a sub-block, recurse and then patch its tail - if let CommandData::Block(Some(ref sub_head)) = cmd.data { - // 1) recurse into the sub-chain, passing along splice_target - patch_block_endings_to_parent(Some(sub_head.clone()), splice_target.clone()); - - // 2) find the tail of that sub-chain - let mut tail = sub_head.clone(); - loop { - let next_in_sub = tail.borrow().next.clone(); - match next_in_sub { - Some(n) => tail = n, - None => break, + if let CommandData::BranchTarget(Some(ref sub_head)) = cmd.data { + if cmd.code == '{' { + // 1) recurse into the sub-chain, passing splice_target + patch_block_endings_to_parent(Some(sub_head.clone()), splice_target.clone()); + + // 2) find the tail of that sub-chain + let mut tail = sub_head.clone(); + loop { + let next_in_sub = tail.borrow().next.clone(); + match next_in_sub { + Some(n) => tail = n, + None => break, + } } - } - // 3) splice the tail’s `.next` to splice_target - tail.borrow_mut().next = splice_target.clone(); + // 3) splice the tail’s `.next` to splice_target + tail.borrow_mut().next = splice_target.clone(); + } } // drop the borrow before moving on @@ -299,7 +300,7 @@ fn populate_label_map( // Extract any label to insert after borrow ends let maybe_label = match &cmd.data { - CommandData::Block(Some(sub_head)) => { + CommandData::BranchTarget(Some(sub_head)) => { populate_label_map(Some(sub_head.clone()), context)?; None } @@ -310,7 +311,7 @@ fn populate_label_map( if let Some(label) = maybe_label { if cmd.code == ':' { if context.label_to_command_map.contains_key(&label) { - return Err(USimpleError::new(2, format!("duplicate label `{label}'"))); + return semantic_error(&cmd.location, format!("duplicate label `{label}'")); } context.label_to_command_map.insert(label, rc_cmd.clone()); } @@ -332,7 +333,7 @@ fn resolve_branch_targets( let mut cmd = rc_cmd.borrow_mut(); // Recurse into blocks - if let CommandData::Block(Some(sub_head)) = &cmd.data { + if let CommandData::BranchTarget(Some(sub_head)) = &cmd.data { resolve_branch_targets(Some(sub_head.clone()), context)?; } @@ -349,7 +350,11 @@ fn resolve_branch_targets( .get(&label) .cloned() .ok_or_else(|| { - USimpleError::new(2, format!("undefined label `{label}'")) + semantic_error::<()>( + &cmd.location, + format!("undefined label `{label}'"), + ) + .unwrap_err() })?; CommandData::BranchTarget(Some(target)) } @@ -394,7 +399,7 @@ fn compile_sequence( continue; } - let mut cmd = Rc::new(RefCell::new(Command::default())); + let mut cmd = Rc::new(RefCell::new(Command::at_position(lines, line))); let n_addr = compile_address_range(lines, line, &mut cmd, context)?; line.eat_spaces(); let mut cmd_spec = get_cmd_spec(lines, line, n_addr)?; @@ -990,8 +995,9 @@ pub fn compile_subst_flags( } 'w' => { + let location = ScriptLocation::at_position(lines, line); let path = read_file_path(lines, line)?; - subst.write_file = Some(NamedWriter::new(path)?); + subst.write_file = Some(NamedWriter::new(path, location)?); return Ok(()); // 'w' is the last flag allowed } @@ -1154,7 +1160,7 @@ fn compile_command( line.advance(); // move past '{' context.parsed_block_nesting += 1; let block_body = compile_sequence(lines, line, context)?; - cmd.data = CommandData::Block(block_body); + cmd.data = CommandData::BranchTarget(block_body); } CommandArgs::EndGroup => { // } // Implemented at a higher level. @@ -1193,8 +1199,9 @@ fn compile_command( } CommandArgs::WriteFile => { // w + let location = ScriptLocation::at_position(lines, line); let path = read_file_path(lines, line)?; - cmd.data = CommandData::NamedWriter(NamedWriter::new(path)?); + cmd.data = CommandData::NamedWriter(NamedWriter::new(path, location)?); } } @@ -1368,7 +1375,7 @@ mod tests { let err = result.unwrap_err(); let msg = err.to_string(); - assert!(msg.contains("test.sed:42:4: error: unexpected token")); + assert!(msg.contains("test.sed:42:5: error: unexpected token")); } #[test] @@ -1385,7 +1392,7 @@ mod tests { let err = result.unwrap_err(); let msg = err.to_string(); - assert_eq!(msg, "input.txt:3:0: error: invalid command 'x'"); + assert_eq!(msg, "input.txt:3:1: error: invalid command 'x'"); } // get_cmd_spec @@ -1397,7 +1404,7 @@ mod tests { assert!(result.is_err()); let msg = result.unwrap_err().to_string(); - assert!(msg.contains("test.sed:1:0: error: command expected")); + assert!(msg.contains("test.sed:1:1: error: command expected")); } #[test] @@ -1408,7 +1415,7 @@ mod tests { assert!(result.is_err()); let msg = result.unwrap_err().to_string(); - assert!(msg.contains("script.sed:2:0: error: invalid command code `@'")); + assert!(msg.contains("script.sed:2:1: error: invalid command code `@'")); } #[test] @@ -1420,7 +1427,7 @@ mod tests { assert!(result.is_err()); let msg = result.unwrap_err().to_string(); assert!( - msg.contains("input.sed:3:0: error: command q expects up to 1 address(es), found 2") + msg.contains("input.sed:3:1: error: command q expects up to 1 address(es), found 2") ); } @@ -1854,9 +1861,37 @@ mod tests { }; assert_eq!(line, 1); + assert_eq!(cmd.location.line_number, 1); + assert_eq!(cmd.location.column_number, 1); + assert_eq!(cmd.location.input_name.as_ref(), "