diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06116bfb..057fce14 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,10 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: + - name: Set Git to use LF, even on Windows + run: | + git config --global core.autocrlf false + git config --global core.eol lf - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - run: cargo test --all @@ -39,6 +43,10 @@ jobs: - { os: macos-latest , features: macos } - { os: windows-latest , features: windows } steps: + - name: Set Git to use LF, even on Windows + run: | + git config --global core.autocrlf false + git config --global core.eol lf - uses: actions/checkout@v4 - name: Initialize workflow variables id: vars diff --git a/Cargo.lock b/Cargo.lock index 234660a2..03d0239a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -76,6 +76,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -299,6 +310,15 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "iana-time-zone" version = "0.1.63" @@ -369,6 +389,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + [[package]] name = "nix" version = "0.29.0" @@ -661,12 +690,14 @@ checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" name = "sed" version = "0.0.1" dependencies = [ + "atty", "chrono", "clap", "clap_complete", "clap_mangen", "ctor", "libc", + "memmap2", "phf", "phf_codegen", "pretty_assertions", @@ -857,7 +888,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" name = "uu_sed" version = "0.0.1" dependencies = [ + "atty", "clap", + "memmap2", "once_cell", "regex", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 3ae96336..3b94a00f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ feat_common_core = [ ] [workspace.dependencies] +atty = "0.2" bytesize = "2.0.0" chrono = { version = "0.4.37", default-features = false, features = [ "clock", @@ -37,6 +38,7 @@ clap = { version = "4.4", features = ["wrap_help", "cargo"] } clap_complete = "4.5" clap_mangen = "0.2" libc = "0.2.153" +memmap2 = "0.5" once_cell = "1.21" phf = "0.11.2" phf_codegen = "0.11.2" @@ -48,17 +50,19 @@ textwrap = { version = "0.16.1", features = ["terminal_size"] } uucore = "0.0.30" xattr = "1.3.1" + [dependencies] +atty = { workspace = true } clap = { workspace = true } clap_complete = { workspace = true } clap_mangen = { workspace = true } +ctor = "0.4.1" +memmap2.workspace = true phf = { workspace = true } +sed = { optional = true, version = "0.0.1", package = "uu_sed", path = "src/uu/sed" } sysinfo = { workspace = true } textwrap = { workspace = true } uucore = { workspace = true } - -ctor = "0.4.1" -sed = { optional = true, version = "0.0.1", package = "uu_sed", path = "src/uu/sed" } uutests = "0.0.30" [dev-dependencies] diff --git a/README.md b/README.md index 6be4a283..b50e8292 100644 --- a/README.md +++ b/README.md @@ -28,18 +28,41 @@ cd sed cargo build --release cargo run --release ``` -## Extensions -### GNU +## Extensions and incompatibilities +### Spported GNU extensions * Command-line arguments can be specified in long (`--`) form. * Spaces can precede a regular expression modifier. +* `I` can be used in as a synonym for the `i` (case insensitive) substitution + flag. +* In addition to `\n`, other escape sequences (octal, hex, C) are supported + in the strings of the `y` command. + Under POSIX these yield undefined behavior. -### BSD and GNU +### Supported BSD and GNU extensions * The second address in a range can be specified as a relative address with +N. -### Other +### New extensions * Unicode characters can be specified in regular expression pattern, replacement and transliteration sequences using `\uXXXX` or `\UXXXXXXXX` sequences. +### Incompatibilities +* The input is assumed to be valid UTF-8 (this includes 7-bit ASCII). + If the input is in another code page, consider converting it through UTF-8 + in order to avoid errors on invalid UTF-8 sequences and for the correct + handling of regular expressions. + This _sed_ program can also handle arbitrary byte sequences if no part of the + input is treated as string. +* The command will report an error and fail if duplicate labels are found + in the script. + This matches the BSD behavior. The GNU version accepts duplicate labels. +* The last line (`$`) address is interpreted as the last non-empty line of + the last file. If files specified in subsequent arguments until the last + one are empty, then the last line condition will never be triggered. + This behavior is consistent with the + [original implementation](https://github.com/dspinellis/unix-history-repo/blob/Research-V7/usr/src/cmd/sed/sed1.c#L665). +* Labels are parsed for alphanumeric characters. The BSD version parses them + until the end of the line, preventing ; to be used as a separator. + ## License sed is licensed under the MIT License - see the `LICENSE` file for details diff --git a/src/uu/sed/Cargo.toml b/src/uu/sed/Cargo.toml index 2671125c..b593b615 100644 --- a/src/uu/sed/Cargo.toml +++ b/src/uu/sed/Cargo.toml @@ -13,10 +13,12 @@ categories = ["command-line-utilities"] [dependencies] +atty = { workspace = true } clap = { workspace = true } once_cell = { workspace = true } regex = { workspace = true } tempfile = { workspace = true } +memmap2 = { workspace = true } uucore = { workspace = true } [lib] diff --git a/src/uu/sed/src/command.rs b/src/uu/sed/src/command.rs index 53761d85..af673f19 100644 --- a/src/uu/sed/src/command.rs +++ b/src/uu/sed/src/command.rs @@ -11,19 +11,24 @@ // TODO: remove when compile is implemented #![allow(dead_code)] +use crate::named_writer::NamedWriter; + +use regex::Captures; use regex::Regex; +use std::cell::RefCell; use std::collections::HashMap; -use std::fs::File; use std::path::PathBuf; // For file descriptors and equivalent +use std::rc::Rc; +use uucore::error::UResult; -// Compilation and processing options provided mostly through the -// command-line interface -#[derive(Debug, Default)] -pub struct CliOptions { +#[derive(Debug, Default, Clone)] +/// Compilation and processing options provided mostly through the +/// command-line interface +pub struct ProcessingContext { // Command-line flags with corresponding names pub all_output_files: bool, pub debug: bool, - pub regexp_extended: bool, + pub regex_extended: bool, pub follow_symlinks: bool, pub in_place: bool, pub in_place_suffix: Option, @@ -34,19 +39,50 @@ pub struct CliOptions { pub sandbox: bool, pub unbuffered: bool, pub null_data: bool, + + // Other context + /// Current input line number + pub line_number: usize, + /// True if this is the last address of a range + pub last_address: bool, + /// True if the line read is the last line + pub last_line: bool, + /// True if the file is the last file of the ones specified + pub last_file: bool, + /// Stop processing further input. + pub stop_processing: bool, + /// Previously compiled RE, saved for reuse when specifying an empty RE + pub saved_regex: RefCell>, + /// Modification of input processing action + // This is required to avoid doubly borrowing the reader in the 'N' + // command. + pub input_action: Option, + /// Hold space + pub hold: StringSpace, + /// Nesting of { } at compile time + pub parsed_block_nesting: usize, + /// Command associated with each label + pub label_to_command_map: HashMap>>, + /// True if a substitution was made as specified in the t command + pub substitution_made: bool, +} + +#[derive(Clone, Debug, Default, PartialEq)] +/// A space mirroring IOChunk, but only with a String +pub struct StringSpace { + pub content: String, // Line content without newline + pub has_newline: bool, // True if \n-terminated } -// The specification of a script: through a string or a file #[derive(Debug, PartialEq)] +/// The specification of a script: through a string or a file pub enum ScriptValue { StringVal(String), PathVal(PathBuf), } -/* - * Types of address specifications - */ #[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Types of address specifications that precede commands pub enum AddressType { Re, // Line that matches regex Line, // Specific line @@ -54,10 +90,8 @@ pub enum AddressType { Last, // Last line } -/* - * Format of an address - */ #[derive(Debug)] +/// Format of an address pub struct Address { pub atype: AddressType, // Address type pub value: AddressValue, // Line number or regex @@ -69,42 +103,167 @@ pub enum AddressValue { Regex(Regex), } -/* - * Substitution command - */ #[derive(Debug)] +/// A single part of an RE replacement +pub enum ReplacementPart { + Literal(String), // Normal text + WholeMatch, // & + Group(u32), // \1 to \9 +} + +#[derive(Debug)] +/// All specified replacements for an RE +pub struct ReplacementTemplate { + pub parts: Vec, +} + +impl Default for ReplacementTemplate { + /// Create an empty template. + fn default() -> Self { + ReplacementTemplate { parts: Vec::new() } + } +} + +impl ReplacementTemplate { + /// Apply the template to the given RE captures. + /// Example: + /// let result = regex.replace_all(input, |caps: ®ex::Captures| { + /// template.apply(caps) }); + /// Returns an error if a backreference in the template was not matched by the RE. + pub fn apply(&self, caps: &Captures) -> UResult { + let mut result = String::new(); + + for part in &self.parts { + match part { + ReplacementPart::Literal(s) => result.push_str(s), + + ReplacementPart::WholeMatch => { + result.push_str(caps.get(0).map_or("", |m| m.as_str())); + } + + ReplacementPart::Group(n) => { + // Compilation guarantees we only get valid group numbers + result.push_str( + caps.get((*n).try_into().unwrap()) + .map_or("", |m| m.as_str()), + ); + } + } + } + + Ok(result) + } + + /// Returns the highest capture group number referenced in this template. + pub fn max_group_number(&self) -> u32 { + self.parts + .iter() + .filter_map(|part| { + if let ReplacementPart::Group(n) = part { + Some(*n) + } else { + None + } + }) + .max() + .unwrap_or(0) + } +} + +#[derive(Debug)] +/// Substitution command pub struct Substitution { - pub occurrence: usize, // Which occurrence to substitute - pub print_flag: bool, // True if 'p' flag - pub ignore_case: bool, // True if 'I' flag - pub write_file: Option, // Path to file if 'w' flag is used - pub file_descriptor: Option, // Cached file descriptor - pub regex: Regex, // Regular expression - pub max_backref: u32, // Largest backreference - pub line_number: usize, // Line number - pub replacement: String, // Replacement text -} - -// Transliteration command (y) + pub occurrence: usize, // Which occurrence to substitute + pub print_flag: bool, // True if 'p' flag + pub ignore_case: bool, // True if 'I' flag + pub write_file: Option>>, // Writer to file if 'w' flag is used + pub regex: Regex, // Regular expression + pub line_number: usize, // Line number + pub replacement: ReplacementTemplate, // Specified broken-down replacement +} + +impl Default for Substitution { + fn default() -> Self { + Substitution { + occurrence: 0, + print_flag: false, + ignore_case: false, + write_file: None, + regex: Regex::new("").unwrap(), // safe dummy regex + line_number: 0, + replacement: ReplacementTemplate::default(), + } + } +} + +/// The block of the first and most common Unicode characters: +/// ASCII, Latin Extended, Greek, Curillic, Coptic, Arabic, etc. +/// It comprises all UCS-2 characters. We use a fast lookup array for these. +const COMMON_UNICODE: usize = 2048; + #[derive(Debug)] +/// Transliteration command (y) pub struct Transliteration { - pub byte_table: [u8; 256], // Byte translation table - pub multi_map: HashMap, // Direct mapping from one char to another + fast: [char; COMMON_UNICODE], + slow: HashMap, +} + +impl Default for Transliteration { + /// Create a new Transliteration with identity mapping for the fast-path. + fn default() -> Self { + let mut fast = ['\0'; COMMON_UNICODE]; + for (i, slot) in fast.iter_mut().enumerate() { + *slot = char::from_u32(i as u32).unwrap_or('\0'); + } + Self { + fast, + slow: HashMap::new(), + } + } +} + +impl Transliteration { + /// Create through character mappings from `source` to `target`. + pub fn from_strings(source: &str, target: &str) -> Self { + let mut result = Self::default(); + for (from, to) in source.chars().zip(target.chars()) { + result.insert(from, to); + } + result + } + + /// Set a transliteration mapping from one character to another. + fn insert(&mut self, from: char, to: char) { + let cp = from as usize; + if cp < COMMON_UNICODE { + self.fast[cp] = to; + } else { + self.slow.insert(from, to); + } + } + + /// Look up a character transliteration. + pub fn lookup(&self, ch: char) -> char { + let cp = ch as usize; + if cp < COMMON_UNICODE { + self.fast[cp] + } else { + self.slow.get(&ch).copied().unwrap_or(ch) + } + } } -/* - * An internally compiled command. - */ #[derive(Debug)] +/// An internally compiled command. pub struct Command { - pub code: char, // Command code - pub addr1: Option
, // Start address - pub addr2: Option
, // End address - pub non_select: bool, // True if '!' - pub start_line: Option, // Start line number (or None) - pub text: Option, // Text for ':', 'a', 'c', 'i', 'r', 'w' - pub data: CommandData, // Command-specific data - pub next: Option>, // Pointer to next command + pub code: char, // Command code + pub addr1: Option
, // Start address + pub addr2: Option
, // End address + pub non_select: bool, // True if '!' + pub start_line: Option, // Start line number (or None) + pub text: Option, // Text for ':', 'a', 'c', 'i', 'r', 'w' + pub data: CommandData, // Command-specific data + pub next: Option>>, // Pointer to next command } impl Default for Command { @@ -114,7 +273,7 @@ impl Default for Command { addr1: None, addr2: None, non_select: false, - start_line: Some(0), + start_line: None, text: None, data: CommandData::None, next: None, @@ -123,18 +282,20 @@ impl Default for Command { } #[derive(Debug)] +/// Command-specific data +/// After parsing, t, b Label elements are converted into BranchTarget ones. pub enum CommandData { None, - SubCommands(Vec), // Commands for 'b', 't', '{' - Substitution(Box), // Substitute command 's' + Block(Option>>), // Commands for '{' + BranchTarget(Option>>), // Commands for 'b', 't' + Label(Option), // Label name for 'b', 't', ':' + NamedWriter(Box), // File descriptor for 'w' + Substitution(Box), // Substitute command 's' Transliteration(Box), // Transliteration command 'y' - WriteFileDescriptor(File), // File descriptor for 'w' } -/* - * Structure containing things to append before a line is read - */ #[derive(Debug)] +/// Text to append before a line is read pub struct AppendBuffer { append_type: AppendType, content: String, @@ -146,22 +307,211 @@ pub enum AppendType { File, } -/* - * Special flag for space modifications - */ #[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Flag for space modifications pub enum SpaceFlag { Append, // Append to contents Replace, // Replace contents } -/* - * Structure for a processing space (process, hold, otherwise). - */ -#[derive(Debug)] -pub struct Space { - pub current: String, // Current space content - pub deleted: bool, // Whether content was deleted - pub append_newline: bool, // Whether originally terminated by \n - pub backup: String, // Backing memory +#[derive(Debug, Clone)] +/// Action to execute after reading a new input line +pub struct InputAction { + /// Next command to execute (rather than commands from start) + pub next_command: Option>>, + /// Data to prepend to the read contents + pub prepend: String, +} + +#[cfg(test)] +mod tests { + use super::*; + use regex::Regex; + + // Return the captures for the RE applied to the specified string + fn caps_for<'a>(re: &str, input: &'a str) -> regex::Captures<'a> { + Regex::new(re).unwrap().captures(input).unwrap() + } + + #[test] + // s/foo// + fn test_empty_template() { + let template = ReplacementTemplate::default(); + let caps = caps_for("foo", "foo"); + + let result = template.apply(&caps).unwrap(); + assert_eq!(result, ""); + } + + #[test] + // s/abc/hello/ + fn test_literal_only() { + let template = ReplacementTemplate { + parts: vec![ReplacementPart::Literal("hello".into())], + }; + let caps = caps_for("abc", "abc"); + + let result = template.apply(&caps).unwrap(); + assert_eq!(result, "hello"); + } + + #[test] + // s/foo\d+/got: &/ + fn test_whole_match() { + let template = ReplacementTemplate { + parts: vec![ + ReplacementPart::Literal("got: ".into()), + ReplacementPart::WholeMatch, + ], + }; + let caps = caps_for(r"foo\d+", "foo42"); + + let result = template.apply(&caps).unwrap(); + assert_eq!(result, "got: foo42"); + } + + #[test] + // s/foo(\d+)/number: \1/ + fn test_backreference() { + let template = ReplacementTemplate { + parts: vec![ + ReplacementPart::Literal("number: ".into()), + ReplacementPart::Group(1), + ], + }; + let caps = caps_for(r"foo(\d+)", "foo42"); + + let result = template.apply(&caps).unwrap(); + assert_eq!(result, "number: 42"); + } + + #[test] + // s/(\w+):(\d+)/key: \1, value: \2/ + fn test_multiple_parts() { + let template = ReplacementTemplate { + parts: vec![ + ReplacementPart::Literal("key: ".into()), + ReplacementPart::Group(1), + ReplacementPart::Literal(", value: ".into()), + ReplacementPart::Group(2), + ], + }; + let caps = caps_for(r"(\w+):(\d+)", "x:123"); + + let result = template.apply(&caps).unwrap(); + assert_eq!(result, "key: x, value: 123"); + } + + // max_group_number + #[test] + fn test_max_group_number_with_groups() { + let template = ReplacementTemplate { + parts: vec![ + ReplacementPart::Literal("a".into()), + ReplacementPart::Group(2), + ReplacementPart::WholeMatch, + ReplacementPart::Group(5), + ReplacementPart::Literal("z".into()), + ], + }; + assert_eq!(template.max_group_number(), 5); + } + + #[test] + fn test_max_group_number_without_groups() { + let template = ReplacementTemplate { + parts: vec![ + ReplacementPart::Literal("no".into()), + ReplacementPart::WholeMatch, + ReplacementPart::Literal("groups".into()), + ], + }; + assert_eq!(template.max_group_number(), 0); + } + + // Transliteration + // Creation and internal functions + #[test] + fn test_identity_lookup_fast_path() { + let t = Transliteration::default(); + assert_eq!(t.lookup('A'), 'A'); + assert_eq!(t.lookup('z'), 'z'); + assert_eq!(t.lookup('\u{07FF}'), '\u{07FF}'); // highest 2-byte UTF-8 char + } + + #[test] + fn test_identity_lookup_slow_path() { + let t = Transliteration::default(); + assert_eq!(t.lookup('\u{0800}'), '\u{0800}'); // just outside fast path + assert_eq!(t.lookup('\u{1F600}'), '\u{1F600}'); // 😀 + } + + #[test] + fn test_insert_and_lookup_fast_path() { + let mut t = Transliteration::default(); + t.insert('a', 'α'); + t.insert('b', 'β'); + assert_eq!(t.lookup('a'), 'α'); + assert_eq!(t.lookup('b'), 'β'); + assert_eq!(t.lookup('c'), 'c'); // unchanged + } + + #[test] + fn test_insert_and_lookup_slow_path() { + let mut t = Transliteration::default(); + t.insert('🦀', 'c'); // U+1F980 Crab emoji -> 'c' + assert_eq!(t.lookup('🦀'), 'c'); + assert_eq!(t.lookup('🦁'), '🦁'); // unchanged + } + + #[test] + fn test_overwrite_mapping() { + let mut t = Transliteration::default(); + t.insert('x', '1'); + assert_eq!(t.lookup('x'), '1'); + t.insert('x', '2'); + assert_eq!(t.lookup('x'), '2'); + } + + #[test] + fn test_all_fast_path_mapped_to_space() { + let mut t = Transliteration::default(); + for cp in 0..COMMON_UNICODE { + if let Some(ch) = char::from_u32(cp as u32) { + t.insert(ch, ' '); + } + } + assert_eq!(t.lookup('A'), ' '); + assert_eq!(t.lookup('\u{07FF}'), ' '); + } + + // from_strings + fn test_basic_transliteration() { + let t = Transliteration::from_strings("abcδ", "1234"); + + assert_eq!(t.lookup('a'), '1'); + assert_eq!(t.lookup('b'), '2'); + assert_eq!(t.lookup('c'), '3'); + assert_eq!(t.lookup('δ'), '4'); + assert_eq!(t.lookup('e'), 'e'); // not mapped, fallback + } + + #[test] + fn test_unicode_slow_path() { + let source = "é漢🦀"; + let target = "e文c"; + let t = Transliteration::from_strings(source, target); + + assert_eq!(t.lookup('é'), 'e'); + assert_eq!(t.lookup('漢'), '文'); + assert_eq!(t.lookup('🦀'), 'c'); + assert_eq!(t.lookup('x'), 'x'); // fast fallback + assert_eq!(t.lookup('文'), '文'); // slow fallback + } + + #[test] + fn test_overwrite_fast_path() { + let t = Transliteration::from_strings("aa", "12"); + assert_eq!(t.lookup('a'), '2'); // last mapping wins + } } diff --git a/src/uu/sed/src/compiler.rs b/src/uu/sed/src/compiler.rs index 341a8d17..084eec8f 100644 --- a/src/uu/sed/src/compiler.rs +++ b/src/uu/sed/src/compiler.rs @@ -8,20 +8,24 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use crate::command::{Address, AddressType, AddressValue, CliOptions, Command, ScriptValue}; -use crate::delimited_parser::{compilation_error, parse_regex}; +use crate::command::{ + Address, AddressType, AddressValue, Command, CommandData, ProcessingContext, ReplacementPart, + ReplacementTemplate, ScriptValue, Substitution, Transliteration, +}; +use crate::delimited_parser::{ + compilation_error, parse_char_escape, parse_regex, parse_transliteration, +}; +use crate::named_writer::NamedWriter; use crate::script_char_provider::ScriptCharProvider; use crate::script_line_provider::ScriptLineProvider; use once_cell::sync::Lazy; use regex::Regex; use std::cell::RefCell; use std::collections::HashMap; -use uucore::error::UResult; - -thread_local! { - /// The previously saved RE. It is reused when specifying an empty one. - static SAVED_REGEX: RefCell> = const { RefCell::new(None) }; -} +use std::mem; +use std::path::PathBuf; +use std::rc::Rc; +use uucore::error::{UResult, USimpleError}; // A global, immutable map of command properties, initialized on first access static CMD_MAP: Lazy> = Lazy::new(build_command_map); @@ -29,18 +33,16 @@ static CMD_MAP: Lazy> = Lazy::new(build_command_map); // Types of command arguments recognized by the parser #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum CommandArgs { - Empty, // d D g G h H l n N p P q x = \0 - Text, // a c i - NonSelect, // ! - Group, // { - EndGroup, // } - Comment, // # - Branch, // b t - Label, // : - ReadFile, // r - WriteFile, // w - Substitute, // s - Translate, // y + Empty, // d D g G h H l n N p P q x = \0 + Text, // a c i + NonSelect, // ! + BeginGroup, // { + EndGroup, // } + Label, // b t : + ReadFile, // r + WriteFile, // w + Substitute, // s + Transliterate, // y } // Command specification @@ -57,7 +59,7 @@ fn build_command_map() -> HashMap { CommandSpec { code: '{', n_addr: 2, - args: CommandArgs::Group, + args: CommandArgs::BeginGroup, }, CommandSpec { code: '}', @@ -72,7 +74,7 @@ fn build_command_map() -> HashMap { CommandSpec { code: 'b', n_addr: 2, - args: CommandArgs::Branch, + args: CommandArgs::Label, }, CommandSpec { code: 'c', @@ -157,7 +159,7 @@ fn build_command_map() -> HashMap { CommandSpec { code: 't', n_addr: 2, - args: CommandArgs::Branch, + args: CommandArgs::Label, }, CommandSpec { code: 'w', @@ -172,7 +174,7 @@ fn build_command_map() -> HashMap { CommandSpec { code: 'y', n_addr: 2, - args: CommandArgs::Translate, + args: CommandArgs::Transliterate, }, CommandSpec { code: '!', @@ -184,11 +186,6 @@ fn build_command_map() -> HashMap { n_addr: 0, args: CommandArgs::Label, }, - CommandSpec { - code: '#', - n_addr: 0, - args: CommandArgs::Comment, - }, CommandSpec { code: '=', n_addr: 1, @@ -199,77 +196,227 @@ fn build_command_map() -> HashMap { formats.into_iter().map(|f| (f.code, f)).collect() } -// How to continue after processing a command -#[derive(Debug)] -enum ContinueAction { - NextLine, - NextChar, -} - +/// Compile the scripts into an executable data structure. pub fn compile( scripts: Vec, - cli_options: &mut CliOptions, -) -> UResult>> { + context: &mut ProcessingContext, +) -> UResult>>> { let mut make_providers = ScriptLineProvider::new(scripts); - let result = compile_thread(&mut make_providers, cli_options)?; - // TODO: fix-up labels, check used labels, setup append & match structures + let mut empty_line = ScriptCharProvider::new(""); + let result = compile_sequence(&mut make_providers, &mut empty_line, context)?; + + // Link the ends of command blocks to their following commands. + if context.parsed_block_nesting > 0 { + return Err(USimpleError::new(1, "unmatched `{'")); + } + patch_block_endings(result.clone()); + + // Link branch commands to the target label commands. + populate_label_map(result.clone(), context)?; + resolve_branch_targets(result.clone(), context)?; + + // Comment-out the following to show the compiled script. + #[cfg(any())] + dbg!(&result); + + // TODO: setup append & match structures Ok(result) } -// Compile provided scripts into a thread of commands -fn compile_thread( - lines: &mut ScriptLineProvider, - _cli_options: &mut CliOptions, -) -> UResult>> { - let mut head: Option> = None; - // A mutable reference to the place we’ll insert next - let mut next_p = &mut head; - - 'next_line: loop { - match lines.next_line().unwrap() { - None => { - // TODO: Error if stack isn't empty - return Ok(head); - } - Some(line_string) => { - let mut line = ScriptCharProvider::new(&line_string); - - // TODO: set cli_options.quiet for StringVal starting with #n - 'next_char: loop { - line.eat_spaces(); - if line.eol() || line.current() == '#' { - continue 'next_line; - } else if line.current() == ';' { - line.advance(); - continue 'next_char; +/// For every Command in the top-level `head` chain, look for +/// `CommandData::Block(Some(sub_head))`. Recursively patch +/// the sub-chain, then splice its tail back to the original +/// “next” pointer of the *parent* (falling back to its own +/// parent_next if its own next was `None`). +fn patch_block_endings(head: Option>>) { + fn patch_block_endings_to_parent( + mut cur: Option>>, + parent_next: Option>>, + ) { + while let Some(rc_cmd) = cur { + // Borrow mutably just long enough to inspect/rewire this node + let cmd = rc_cmd.borrow_mut(); + // Save this node’s own next pointer + let own_next = cmd.next.clone(); + // Decide what “splice target” to use: + // - if this node has its own_next, use that + // - otherwise, fall back to parent_next + let splice_target = own_next.clone().or(parent_next.clone()); + + // If it has a sub-block, recurse and then patch its tail + if let CommandData::Block(Some(ref sub_head)) = cmd.data { + // 1) recurse into the sub-chain, passing along splice_target + patch_block_endings_to_parent(Some(sub_head.clone()), splice_target.clone()); + + // 2) find the tail of that sub-chain + let mut tail = sub_head.clone(); + loop { + let next_in_sub = tail.borrow().next.clone(); + match next_in_sub { + Some(n) => tail = n, + None => break, } + } - let mut cmd = Box::new(Command::default()); - let n_addr = compile_address_range(lines, &mut line, &mut cmd)?; - let mut cmd_spec = get_cmd_spec(lines, &line, n_addr)?; + // 3) splice the tail’s `.next` to splice_target + tail.borrow_mut().next = splice_target.clone(); + } - // The ! command shall be followed by another one - if cmd_spec.args == CommandArgs::NonSelect { - line.advance(); - line.eat_spaces(); - cmd.non_select = true; - cmd_spec = get_cmd_spec(lines, &line, n_addr)?; - } + // drop the borrow before moving on + drop(cmd); - // Move cmd into next_p, transferring its ownership - let action = compile_command(lines, &mut line, &mut cmd, cmd_spec)?; + // advance to the next sibling in this level + cur = own_next; + } + } - *next_p = Some(cmd); - next_p = &mut next_p.as_mut().unwrap().next; + // top-level has no parent, so pass None + patch_block_endings_to_parent(head, None); +} - match action { - ContinueAction::NextLine => continue 'next_line, - ContinueAction::NextChar => continue 'next_char, - } +/// Populate the context's label map with references to associated commands. +fn populate_label_map( + mut cur: Option>>, + context: &mut ProcessingContext, +) -> UResult<()> { + while let Some(rc_cmd) = cur { + // Borrow mutably just long enough to inspect/rewire this node + let cmd = rc_cmd.borrow_mut(); + + // Extract any label to insert after borrow ends + let maybe_label = match &cmd.data { + CommandData::Block(Some(sub_head)) => { + populate_label_map(Some(sub_head.clone()), context)?; + None + } + CommandData::Label(Some(label)) => Some(label.clone()), + _ => None, + }; + + if let Some(label) = maybe_label { + if cmd.code == ':' { + if context.label_to_command_map.contains_key(&label) { + return Err(USimpleError::new(2, format!("duplicate label `{}'", label))); + } + context.label_to_command_map.insert(label, rc_cmd.clone()); + } + } + + cur = cmd.next.clone(); + } + Ok(()) +} + +/// Replace branch labels with references to the corresponding commands. +/// Raise an error on undefined labels. +fn resolve_branch_targets( + mut cur: Option>>, + context: &mut ProcessingContext, +) -> UResult<()> { + while let Some(rc_cmd) = cur { + // Borrow mutably just long enough to inspect/rewire this node + let mut cmd = rc_cmd.borrow_mut(); + + // Recurse into blocks + if let CommandData::Block(Some(sub_head)) = &cmd.data { + resolve_branch_targets(Some(sub_head.clone()), context)?; + } + + // Only for 't' or 'b' commands: + if matches!(cmd.code, 't' | 'b') { + // Take ownership of the current data + let old_data = mem::replace(&mut cmd.data, CommandData::None); + + // Build the replacement + let new_data = match old_data { + CommandData::Label(Some(label)) => { + let target = context + .label_to_command_map + .get(&label) + .cloned() + .ok_or_else(|| { + USimpleError::new(2, format!("undefined label `{}'", label)) + })?; + CommandData::BranchTarget(Some(target)) + } + CommandData::Label(None) => CommandData::BranchTarget(None), + other => other, // put back anything else unchanged + }; + + // Store it back + cmd.data = new_data; + } + + // Advance to the next sibling + cur = cmd.next.clone(); + } + Ok(()) +} + +/// Compile provided scripts into a sequence of commands. +fn compile_sequence( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + context: &mut ProcessingContext, +) -> UResult>>> { + let mut head: Option>> = None; + let mut tail: Option>> = None; + + loop { + line.eat_spaces(); + if line.eol() || line.current() == '#' { + // TODO: set context.quiet for StringVal starting with #n + match lines.next_line()? { + None => { + return Ok(head); + } + Some(line_string) => { + *line = ScriptCharProvider::new(&line_string); } } + continue; + } else if line.current() == ';' { + line.advance(); + continue; + } + + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(lines, line, &mut cmd, context)?; + line.eat_spaces(); + let mut cmd_spec = get_cmd_spec(lines, line, n_addr)?; + + // The ! command shall be followed by another one + match cmd_spec.args { + CommandArgs::NonSelect => { + line.advance(); + line.eat_spaces(); + cmd.borrow_mut().non_select = true; + cmd_spec = get_cmd_spec(lines, line, n_addr)?; + } + CommandArgs::EndGroup => { + if context.parsed_block_nesting == 0 { + return compilation_error(lines, line, "unexpected `}'"); + } + context.parsed_block_nesting -= 1; + line.advance(); + line.eat_spaces(); + let mut cmd_ref = cmd.borrow_mut(); + parse_command_ending(lines, line, &mut cmd_ref)?; + return Ok(head); + } + _ => (), + } + + compile_command(lines, line, &mut cmd, cmd_spec, context)?; + if let Some(ref t) = tail { + // there's already a tail: link it + t.borrow_mut().next = Some(cmd.clone()); + } else { + // first element: set head + head = Some(cmd.clone()); } + tail = Some(cmd); } } @@ -283,13 +430,15 @@ fn is_address_char(c: char) -> bool { fn compile_address_range( lines: &ScriptLineProvider, line: &mut ScriptCharProvider, - cmd: &mut Command, + cmd: &mut Rc>, + context: &ProcessingContext, ) -> UResult { let mut n_addr = 0; + let mut cmd = cmd.borrow_mut(); line.eat_spaces(); if !line.eol() && is_address_char(line.current()) { - if let Ok(addr1) = compile_address(lines, line) { + if let Ok(addr1) = compile_address(lines, line, context) { cmd.addr1 = Some(addr1); n_addr += 1; } @@ -300,7 +449,7 @@ fn compile_address_range( line.advance(); line.eat_spaces(); if !line.eol() { - if let Ok(addr2) = compile_address(lines, line) { + if let Ok(addr2) = compile_address(lines, line, context) { cmd.addr2 = Some(addr2); n_addr += 1; } @@ -311,7 +460,11 @@ fn compile_address_range( } /// Compile and return a single range address specification. -fn compile_address(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> UResult
{ +fn compile_address( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + context: &ProcessingContext, +) -> UResult
{ let mut icase = false; if line.eol() { @@ -337,7 +490,7 @@ fn compile_address(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> Ok(Address { atype: AddressType::Re, - value: AddressValue::Regex(compile_regex(lines, line, &re, icase)?), + value: AddressValue::Regex(compile_regex(lines, line, &re, context, icase)?), }) } '$' => { @@ -382,96 +535,502 @@ fn parse_number(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> UR .map_err(|msg| compilation_error::(lines, line, msg).unwrap_err()) } +/// Parse the end of a command, failing with an error on extra characters. +fn parse_command_ending( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, +) -> UResult<()> { + if !line.eol() && line.current() == ';' { + line.advance(); + return Ok(()); + } + + if !line.eol() { + return compilation_error( + lines, + line, + format!("extra characters at the end of the {} command", cmd.code), + ); + } + + Ok(()) +} + +/// Convert a primitive BRE pattern to a safe ERE-compatible pattern string. +/// - Replacces `\(` and `\)` with `(` and `)` +/// - Escapes ERE-only metacharacters: `+ ? { } | ( )` +/// - Leaves all other characters as-is +fn bre_to_ere(pattern: &str) -> String { + let mut result = String::with_capacity(pattern.len()); + let mut chars = pattern.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '\\' { + match chars.peek() { + Some('(') => { + chars.next(); + result.push('('); // group start + } + Some(')') => { + chars.next(); + result.push(')'); // group end + } + Some(&next) => { + chars.next(); + result.push('\\'); + result.push(next); // preserve other escaped characters + } + None => { + result.push('\\'); // trailing backslash, keep it + } + } + } else { + match c { + '+' | '?' | '{' | '}' | '|' | '(' | ')' => { + result.push('\\'); // escape unsupported ERE metacharacters + result.push(c); + } + _ => result.push(c), + } + } + } + + result +} + /// Compile the provided regular expression string into a corresponding engine. fn compile_regex( lines: &ScriptLineProvider, line: &ScriptCharProvider, pattern: &str, + context: &ProcessingContext, icase: bool, ) -> UResult { if pattern.is_empty() { - SAVED_REGEX.with(|cell| { - if let Some(existing) = &*cell.borrow() { - Ok(existing.clone()) - } else { - compilation_error(lines, line, "no previously compiled regex available") - } - }) + let maybe_existing = context.saved_regex.borrow(); + if let Some(existing) = &*maybe_existing { + Ok(existing.clone()) + } else { + compilation_error(lines, line, "no previously compiled regex available") + } } else { + // Convert basic to extended regular expression if needed. + let ere_pattern = if context.regex_extended { + pattern + } else { + &bre_to_ere(pattern) + }; + + // Add case-insensitive modifier if needed. let full_pattern = if icase { - if pattern.is_empty() { + if ere_pattern.is_empty() { return compilation_error(lines, line, "cannot specify a modifier on an empty RE"); } - format!("(?i){}", pattern) + format!("(?i){}", ere_pattern) } else { - pattern.to_string() + ere_pattern.to_string() }; + // Compile into engine. let compiled = Regex::new(&full_pattern).map_err(|e| { compilation_error::(lines, line, format!("invalid regex '{}': {}", pattern, e)) .unwrap_err() })?; - SAVED_REGEX.with(|cell| { - *cell.borrow_mut() = Some(compiled.clone()); - }); + *context.saved_regex.borrow_mut() = Some(compiled.clone()); + Ok(compiled) } } -// Compile the specified command -fn compile_command( +/// Compile a regular expression replacement string. +pub fn compile_replacement( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, +) -> UResult { + let mut parts = Vec::new(); + let mut literal = String::new(); + + let delimiter = line.current(); + line.advance(); + + loop { + while !line.eol() { + match line.current() { + '\\' => { + line.advance(); + + // Line input_action + if line.eol() { + if let Some(next_line_string) = lines.next_line()? { + literal.push('\n'); + *line = ScriptCharProvider::new(&next_line_string); + continue; + } else { + return compilation_error( + lines, + line, + "unterminated substitute replacement (unexpected EOF)", + ); + } + } + + match line.current() { + // \1 - \9 + c @ '1'..='9' => { + let ref_num = c.to_digit(10).unwrap(); + + if !literal.is_empty() { + parts.push(ReplacementPart::Literal(std::mem::take(&mut literal))); + } + parts.push(ReplacementPart::Group(ref_num)); + line.advance(); + } + + // literal \ and & + '\\' | '&' => { + literal.push(line.current()); + line.advance(); + } + + // other escape sequences + _ => match parse_char_escape(line) { + Some(decoded) => literal.push(decoded), + None => { + literal.push('\\'); + literal.push(line.current()); + line.advance(); + } + }, + } + } + + '&' => { + if !literal.is_empty() { + parts.push(ReplacementPart::Literal(std::mem::take(&mut literal))); + } + parts.push(ReplacementPart::WholeMatch); + line.advance(); + } + + '\n' => { + return compilation_error( + lines, + line, + "unescaped newline inside substitute replacement", + ); + } + + c if c == delimiter => { + line.advance(); // skip closing delimiter + if !literal.is_empty() { + parts.push(ReplacementPart::Literal(literal)); + } + return Ok(ReplacementTemplate { parts }); + } + + c => { + literal.push(c); + line.advance(); + } + } + } + + // Fetch next line for continued replacement string + if let Some(next_line_string) = lines.next_line()? { + *line = ScriptCharProvider::new(&next_line_string); + } else { + return compilation_error(lines, line, "unterminated substitute replacement"); + } + } +} + +fn compile_subst_command( lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, - cmd_spec: &'static CommandSpec, -) -> UResult { - cmd.code = line.current(); + context: &ProcessingContext, +) -> UResult<()> { + line.advance(); // move past 's' - match cmd_spec.args { - CommandArgs::Empty => { - // d D g G h H l n N p P q x = - line.advance(); - line.eat_spaces(); - if !line.eol() && line.current() == ';' { + let delimiter = line.current(); + if delimiter == '\0' || delimiter == '\\' { + return compilation_error( + lines, + line, + "substitute pattern cannot be delimited by newline or backslash", + ); + } + + let pattern = parse_regex(lines, line)?; + if pattern.is_empty() { + return compilation_error(lines, line, "unterminated substitute pattern"); + } + + let mut subst = Box::new(Substitution { + line_number: lines.get_line_number(), + ..Default::default() + }); + + subst.replacement = compile_replacement(lines, line)?; + compile_subst_flags(lines, line, &mut subst)?; + + // Compile regex with now known ignore_case flag. + subst.regex = compile_regex(lines, line, &pattern, context, subst.ignore_case)?; + + let re_captures: u32 = subst + .regex + .captures_len() + .saturating_sub(1) + .try_into() + .unwrap(); + let max_group_number = subst.replacement.max_group_number(); + if max_group_number > re_captures { + return compilation_error( + lines, + line, + format!( + "group number \\{} is larger than the {} available RE groups", + max_group_number, re_captures + ), + ); + } + + cmd.data = CommandData::Substitution(subst); + + parse_command_ending(lines, line, cmd) +} + +fn compile_trans_command( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, +) -> UResult<()> { + line.advance(); // move past 'y' + + let delimiter = line.current(); + if delimiter == '\0' || delimiter == '\\' { + return compilation_error( + lines, + line, + "transliteration string cannot be delimited by newline or backslash", + ); + } + + let source = parse_transliteration(lines, line)?; + let target = parse_transliteration(lines, line)?; + if source.chars().count() != target.chars().count() { + return compilation_error( + lines, + line, + "transliteration strings are not the same length", + ); + } + + let transliteration = Box::new(Transliteration::from_strings(&source, &target)); + cmd.data = CommandData::Transliteration(transliteration); + + line.advance(); // move past last delimiter + parse_command_ending(lines, line, cmd) +} + +/// Parse the substitution command's optional flags +pub fn compile_subst_flags( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + subst: &mut Substitution, +) -> UResult<()> { + let mut seen_g_or_n = false; + + subst.occurrence = 1; // default + subst.print_flag = false; + subst.ignore_case = false; + subst.write_file = None; + + loop { + line.eat_spaces(); + if line.eol() { + break; + } + + match line.current() { + 'g' => { + if seen_g_or_n { + return compilation_error( + lines, + line, + "multiple 'g' or numeric flags in substitute command", + ); + } + seen_g_or_n = true; + subst.occurrence = 0; + line.advance(); + } + + 'p' => { + subst.print_flag = true; + line.advance(); + } + + 'i' | 'I' => { + subst.ignore_case = true; + line.advance(); + } + + _c @ '1'..='9' => { + if seen_g_or_n { + return compilation_error( + lines, + line, + "multiple 'g' or numeric flags in substitute command", + ); + } + + let mut number = 0usize; + while !line.eol() && line.current().is_ascii_digit() { + number = number + .checked_mul(10) + .and_then(|n| n.checked_add(line.current().to_digit(10).unwrap() as usize)) + .ok_or_else(|| { + compilation_error::<()>( + lines, + line, + "overflow in numeric substitute flag", + ) + .unwrap_err() + })?; + line.advance(); + } + + subst.occurrence = number; + seen_g_or_n = true; + } + + 'w' => { line.advance(); - // TODO: update link - return Ok(ContinueAction::NextChar); + line.eat_spaces(); + + let mut path = String::new(); + while !line.eol() && line.current() != ';' { + path.push(line.current()); + line.advance(); + } + + if path.is_empty() { + return compilation_error(lines, line, "missing filename after 'w' flag"); + } + + subst.write_file = Some(NamedWriter::new(PathBuf::from(path))?); + return Ok(()); // 'w' is the last flag allowed } - if !line.eol() { + + ';' | '\n' => break, + + other => { return compilation_error( lines, line, - format!("extra characters at the end of the {} command", cmd.code), + format!("invalid substitute flag: '{}'", other), ); } } - CommandArgs::NonSelect => { // ! - } - // TODO - CommandArgs::Text => { // a c i + } + + Ok(()) +} + +/// Compile a command that doesn't take any arguments +// Handles d D g G h H l n N p P q x = +fn compile_empty_command( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, +) -> UResult<()> { + line.advance(); // Skip the command character + line.eat_spaces(); // Skip any trailing whitespace + + parse_command_ending(lines, line, cmd) +} + +fn compile_label_command( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Command, +) -> UResult<()> { + line.advance(); // Skip the command character + line.eat_spaces(); // Skip any leading whitespace + + let mut label = String::new(); + while !line.eol() && line.current().is_ascii_alphanumeric() { + label.push(line.current()); + line.advance(); + } + + if label.is_empty() { + if cmd.code == ':' { + return compilation_error(lines, line, "empty label"); } - CommandArgs::Group => { // { + cmd.data = CommandData::Label(None); + } else { + cmd.data = CommandData::Label(Some(label)); + } + + line.eat_spaces(); // Skip any trailing whitespace + parse_command_ending(lines, line, cmd) +} + +// Compile the specified command +fn compile_command( + lines: &mut ScriptLineProvider, + line: &mut ScriptCharProvider, + cmd: &mut Rc>, + cmd_spec: &'static CommandSpec, + context: &mut ProcessingContext, +) -> UResult<()> { + let mut cmd = cmd.borrow_mut(); + cmd.code = line.current(); + + match cmd_spec.args { + CommandArgs::BeginGroup => { + // { + line.advance(); // move past '{' + context.parsed_block_nesting += 1; + let block_body = compile_sequence(lines, line, context)?; + cmd.data = CommandData::Block(block_body); } CommandArgs::EndGroup => { // } + // Implemented at a higher level. + } + CommandArgs::Empty => { + // d D g G h H l n N p P q x = + return compile_empty_command(lines, line, &mut cmd); } - CommandArgs::Comment => { // # + CommandArgs::Label => { + // b t : + compile_label_command(lines, line, &mut cmd)?; } - CommandArgs::Branch => { // b t + CommandArgs::NonSelect => { // ! + // Implemented at a higher level. } - CommandArgs::Label => { // : + CommandArgs::Substitute => { + // s + return compile_subst_command(lines, line, &mut cmd, context); } - CommandArgs::ReadFile => { // r + CommandArgs::Transliterate => { + // y + return compile_trans_command(lines, line, &mut cmd); } - CommandArgs::WriteFile => { // w + // TODO + CommandArgs::Text => { // a c i } - CommandArgs::Substitute => { // s + CommandArgs::ReadFile => { // r } - CommandArgs::Translate => { // y + CommandArgs::WriteFile => { // w } } - Ok(ContinueAction::NextLine) + Ok(()) } // Return the specification for the command letter at the current line position @@ -489,7 +1048,7 @@ fn get_cmd_spec( let opt_cmd_spec = lookup_command(ch); if opt_cmd_spec.is_none() { - return compilation_error(lines, line, format!("invalid command code {}", ch)); + return compilation_error(lines, line, format!("invalid command code `{}'", ch)); } let cmd_spec = opt_cmd_spec.unwrap(); @@ -522,6 +1081,11 @@ mod tests { (lines, line) } + /// Return a default ProcessingContext for use in tests. + pub fn ctx() -> ProcessingContext { + ProcessingContext::default() + } + // lookup_command #[test] fn test_lookup_empty_command() { @@ -548,7 +1112,7 @@ mod tests { fn test_lookup_group_command() { let cmd = lookup_command('{').unwrap(); assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::Group); + assert_eq!(cmd.args, CommandArgs::BeginGroup); } #[test] @@ -559,24 +1123,10 @@ mod tests { } #[test] - fn test_lookup_comment_command() { - let cmd = lookup_command('#').unwrap(); + fn test_lookup_label_command() { + let cmd = lookup_command(':').unwrap(); assert_eq!(cmd.n_addr, 0); - assert_eq!(cmd.args, CommandArgs::Comment); - } - - #[test] - fn test_lookup_branch_command() { - let cmd = lookup_command('b').unwrap(); - assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::Branch); - } - - #[test] - fn test_lookup_label_command() { - let cmd = lookup_command(':').unwrap(); - assert_eq!(cmd.n_addr, 0); - assert_eq!(cmd.args, CommandArgs::Label); + assert_eq!(cmd.args, CommandArgs::Label); } #[test] @@ -604,7 +1154,7 @@ mod tests { fn test_lookup_translate_command() { let cmd = lookup_command('y').unwrap(); assert_eq!(cmd.n_addr, 2); - assert_eq!(cmd.args, CommandArgs::Translate); + assert_eq!(cmd.args, CommandArgs::Transliterate); } #[test] @@ -676,7 +1226,7 @@ mod tests { assert!(result.is_err()); let msg = result.unwrap_err().to_string(); - assert!(msg.contains("script.sed:2:0: error: invalid command code @")); + assert!(msg.contains("script.sed:2:0: error: invalid command code `@'")); } #[test] @@ -725,7 +1275,7 @@ mod tests { #[test] fn test_compile_re_basic() { let (lines, chars) = dummy_providers(); - let regex = compile_regex(&lines, &chars, "abc", false).unwrap(); + let regex = compile_regex(&lines, &chars, "abc", &ctx(), false).unwrap(); assert!(regex.is_match("abc")); assert!(!regex.is_match("ABC")); } @@ -733,7 +1283,7 @@ mod tests { #[test] fn test_compile_re_case_insensitive() { let (lines, chars) = dummy_providers(); - let regex = compile_regex(&lines, &chars, "abc", true).unwrap(); + let regex = compile_regex(&lines, &chars, "abc", &ctx(), true).unwrap(); assert!(regex.is_match("abc")); assert!(regex.is_match("ABC")); assert!(regex.is_match("AbC")); @@ -741,33 +1291,29 @@ mod tests { #[test] fn test_compile_re_saved_and_reuse() { + let context = ctx(); // Save a regex let (lines1, chars1) = dummy_providers(); - let _ = compile_regex(&lines1, &chars1, "abc", false).unwrap(); + let _ = compile_regex(&lines1, &chars1, "abc", &context, false).unwrap(); // Now try to reuse it let (lines2, chars2) = dummy_providers(); - let reused = compile_regex(&lines2, &chars2, "", false).unwrap(); + let reused = compile_regex(&lines2, &chars2, "", &context, false).unwrap(); assert!(reused.is_match("abc")); } #[test] fn test_compile_re_empty_and_not_saved() { - // Clear saved regex - SAVED_REGEX.with(|cell| { - *cell.borrow_mut() = None; - }); - let (lines, chars) = dummy_providers(); - let result = compile_regex(&lines, &chars, "", false); + let result = compile_regex(&lines, &chars, "", &ctx(), false); assert!(result.is_err()); // Should fail because nothing was saved } #[test] fn test_compile_re_invalid() { let (lines, chars) = dummy_providers(); - let result = compile_regex(&lines, &chars, "a[d", false); + let result = compile_regex(&lines, &chars, "a[d", &ctx(), false); assert!(result.is_err()); // Should fail due to open bracketed expression } @@ -775,7 +1321,7 @@ mod tests { #[test] fn test_compile_addr_line_number() { let (lines, mut chars) = make_providers("42"); - let addr = compile_address(&lines, &mut chars).unwrap(); + let addr = compile_address(&lines, &mut chars, &ctx()).unwrap(); assert!(matches!(addr.atype, AddressType::Line)); if let AddressValue::LineNumber(n) = addr.value { assert_eq!(n, 42); @@ -787,7 +1333,7 @@ mod tests { #[test] fn test_compile_addr_relative_line() { let (lines, mut chars) = make_providers("+7"); - let addr = compile_address(&lines, &mut chars).unwrap(); + let addr = compile_address(&lines, &mut chars, &ctx()).unwrap(); assert!(matches!(addr.atype, AddressType::RelLine)); if let AddressValue::LineNumber(n) = addr.value { assert_eq!(n, 7); @@ -799,14 +1345,14 @@ mod tests { #[test] fn test_compile_addr_last_line() { let (lines, mut chars) = make_providers("$"); - let addr = compile_address(&lines, &mut chars).unwrap(); + let addr = compile_address(&lines, &mut chars, &ctx()).unwrap(); assert!(matches!(addr.atype, AddressType::Last)); } #[test] fn test_compile_addr_regex() { let (lines, mut chars) = make_providers("/hello/"); - let addr = compile_address(&lines, &mut chars).unwrap(); + let addr = compile_address(&lines, &mut chars, &ctx()).unwrap(); assert!(matches!(addr.atype, AddressType::Re)); if let AddressValue::Regex(re) = addr.value { assert!(re.is_match("hello")); @@ -818,7 +1364,7 @@ mod tests { #[test] fn test_compile_addr_regex_other_delimiter() { let (lines, mut chars) = make_providers("\\#hello#"); - let addr = compile_address(&lines, &mut chars).unwrap(); + let addr = compile_address(&lines, &mut chars, &ctx()).unwrap(); assert!(matches!(addr.atype, AddressType::Re)); if let AddressValue::Regex(re) = addr.value { assert!(re.is_match("hello")); @@ -830,7 +1376,7 @@ mod tests { #[test] fn test_compile_addr_regex_with_modifier() { let (lines, mut chars) = make_providers("/hello/I"); - let addr = compile_address(&lines, &mut chars).unwrap(); + let addr = compile_address(&lines, &mut chars, &ctx()).unwrap(); assert!(matches!(addr.atype, AddressType::Re)); if let AddressValue::Regex(re) = addr.value { assert!(re.is_match("HELLO")); // case-insensitive @@ -841,13 +1387,14 @@ mod tests { #[test] fn test_compile_addr_empty_regex_saved() { + let context = ctx(); // First save a regex let (lines1, mut chars1) = make_providers("/saved/"); - let _ = compile_address(&lines1, &mut chars1).unwrap(); + let _ = compile_address(&lines1, &mut chars1, &context).unwrap(); // Then reuse it with empty regex let (lines2, mut chars2) = make_providers("//"); - let addr = compile_address(&lines2, &mut chars2).unwrap(); + let addr = compile_address(&lines2, &mut chars2, &context).unwrap(); assert!(matches!(addr.atype, AddressType::Re)); if let AddressValue::Regex(re) = addr.value { assert!(re.is_match("saved")); @@ -860,12 +1407,12 @@ mod tests { #[test] fn test_compile_single_line_address() { let (lines, mut chars) = make_providers("42"); - let mut cmd = Command::default(); - let n_addr = compile_address_range(&lines, &mut chars, &mut cmd).unwrap(); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap(); assert_eq!(n_addr, 1); assert!(matches!( - cmd.addr1.as_ref().unwrap().atype, + cmd.borrow().addr1.as_ref().unwrap().atype, AddressType::Line )); } @@ -873,26 +1420,26 @@ mod tests { #[test] fn test_compile_relative_address_range() { let (lines, mut chars) = make_providers("2,+3"); - let mut cmd = Command::default(); - let n_addr = compile_address_range(&lines, &mut chars, &mut cmd).unwrap(); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap(); assert_eq!(n_addr, 2); assert!(matches!( - cmd.addr1.as_ref().unwrap().atype, + cmd.borrow().addr1.as_ref().unwrap().atype, AddressType::Line )); - let v1 = match &cmd.addr1.as_ref().unwrap().value { + let v1 = match &cmd.borrow().addr1.as_ref().unwrap().value { AddressValue::LineNumber(n) => *n, _ => panic!(), }; assert_eq!(v1, 2); assert!(matches!( - cmd.addr2.as_ref().unwrap().atype, + cmd.borrow().addr2.as_ref().unwrap().atype, AddressType::RelLine )); - let v2 = match &cmd.addr2.as_ref().unwrap().value { + let v2 = match &cmd.borrow().addr2.as_ref().unwrap().value { AddressValue::LineNumber(n) => *n, _ => panic!(), }; @@ -902,12 +1449,12 @@ mod tests { #[test] fn test_compile_last_address() { let (lines, mut chars) = make_providers("$"); - let mut cmd = Command::default(); - let n_addr = compile_address_range(&lines, &mut chars, &mut cmd).unwrap(); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap(); assert_eq!(n_addr, 1); assert!(matches!( - cmd.addr1.as_ref().unwrap().atype, + cmd.borrow().addr1.as_ref().unwrap().atype, AddressType::Last )); } @@ -915,16 +1462,16 @@ mod tests { #[test] fn test_compile_absolute_address_range() { let (lines, mut chars) = make_providers("5,10"); - let mut cmd = Command::default(); - let n_addr = compile_address_range(&lines, &mut chars, &mut cmd).unwrap(); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap(); assert_eq!(n_addr, 2); assert!(matches!( - cmd.addr1.as_ref().unwrap().atype, + cmd.borrow().addr1.as_ref().unwrap().atype, AddressType::Line )); assert!(matches!( - cmd.addr2.as_ref().unwrap().atype, + cmd.borrow().addr2.as_ref().unwrap().atype, AddressType::Line )); } @@ -932,83 +1479,96 @@ mod tests { #[test] fn test_compile_regex_address() { let (lines, mut chars) = make_providers("/foo/"); - let mut cmd = Command::default(); - let n_addr = compile_address_range(&lines, &mut chars, &mut cmd).unwrap(); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap(); assert_eq!(n_addr, 1); - assert!(matches!(cmd.addr1.as_ref().unwrap().atype, AddressType::Re)); - if let AddressValue::Regex(re) = &cmd.addr1.as_ref().unwrap().value { + assert!(matches!( + cmd.borrow().addr1.as_ref().unwrap().atype, + AddressType::Re + )); + if let AddressValue::Regex(re) = &cmd.borrow().addr1.as_ref().unwrap().value { assert!(re.is_match("foo")); assert!(!re.is_match("bar")); } else { panic!("expected a regex address"); - } + }; } #[test] fn test_compile_regex_address_range_other_delimiter() { let (lines, mut chars) = make_providers("\\#foo# , \\|bar|"); - let mut cmd = Command::default(); - let n_addr = compile_address_range(&lines, &mut chars, &mut cmd).unwrap(); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap(); assert_eq!(n_addr, 2); - assert!(matches!(cmd.addr1.as_ref().unwrap().atype, AddressType::Re)); - if let AddressValue::Regex(re) = &cmd.addr1.as_ref().unwrap().value { + assert!(matches!( + cmd.borrow().addr1.as_ref().unwrap().atype, + AddressType::Re + )); + if let AddressValue::Regex(re) = &cmd.borrow().addr1.as_ref().unwrap().value { assert!(re.is_match("foo")); assert!(!re.is_match("bar")); } else { panic!("expected a regex address"); } - assert!(matches!(cmd.addr2.as_ref().unwrap().atype, AddressType::Re)); - if let AddressValue::Regex(re) = &cmd.addr2.as_ref().unwrap().value { + assert!(matches!( + cmd.borrow().addr2.as_ref().unwrap().atype, + AddressType::Re + )); + if let AddressValue::Regex(re) = &cmd.borrow().addr2.as_ref().unwrap().value { assert!(re.is_match("bar")); assert!(!re.is_match("foo")); } else { panic!("expected a regex address"); - } + }; } #[test] fn test_compile_regex_with_modifier() { let (lines, mut chars) = make_providers("/foo/I"); - let mut cmd = Command::default(); - let n_addr = compile_address_range(&lines, &mut chars, &mut cmd).unwrap(); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap(); assert_eq!(n_addr, 1); - assert!(matches!(cmd.addr1.as_ref().unwrap().atype, AddressType::Re)); - if let AddressValue::Regex(re) = &cmd.addr1.as_ref().unwrap().value { + assert!(matches!( + cmd.borrow().addr1.as_ref().unwrap().atype, + AddressType::Re + )); + if let AddressValue::Regex(re) = &cmd.borrow().addr1.as_ref().unwrap().value { assert!(re.is_match("FOO")); assert!(re.is_match("foo")); } else { panic!("expected a regex address with case-insensitive match"); - } + }; } #[test] fn test_compile_re_reuse_saved() { + let context = ctx(); // First save a regex let (lines1, mut chars1) = make_providers("/abc/"); - let mut cmd1 = Command::default(); - compile_address_range(&lines1, &mut chars1, &mut cmd1).unwrap(); + let mut cmd1 = Rc::new(RefCell::new(Command::default())); + compile_address_range(&lines1, &mut chars1, &mut cmd1, &context).unwrap(); // Now reuse it let (lines2, mut chars2) = make_providers("//"); - let mut cmd2 = Command::default(); - let n_addr = compile_address_range(&lines2, &mut chars2, &mut cmd2).unwrap(); + let mut cmd2 = Rc::new(RefCell::new(Command::default())); + let n_addr = compile_address_range(&lines2, &mut chars2, &mut cmd2, &context).unwrap(); assert_eq!(n_addr, 1); assert!(matches!( - cmd2.addr1.as_ref().unwrap().atype, + cmd2.borrow().addr1.as_ref().unwrap().atype, AddressType::Re )); - if let AddressValue::Regex(re) = &cmd2.addr1.as_ref().unwrap().value { + if let AddressValue::Regex(re) = &cmd2.borrow().addr1.as_ref().unwrap().value { assert!(re.is_match("abc")); - } + }; } - // compile_thread + // compile_sequence fn make_provider(lines: &[&str]) -> ScriptLineProvider { let input = lines .iter() @@ -1017,35 +1577,36 @@ mod tests { ScriptLineProvider::new(input) } - fn make_cli_options() -> CliOptions { - CliOptions::default() + fn empty_line() -> ScriptCharProvider { + ScriptCharProvider::new("") } #[test] - fn test_compile_thread_empty_input() { + fn test_compile_sequence_empty_input() { let mut provider = make_provider(&[]); - let mut opts = make_cli_options(); + let mut opts = ctx(); - let result = compile_thread(&mut provider, &mut opts).unwrap(); + let result = compile_sequence(&mut provider, &mut empty_line(), &mut opts).unwrap(); assert!(result.is_none()); } #[test] - fn test_compile_thread_comment_only() { + fn test_compile_sequence_comment_only() { let mut provider = make_provider(&["# comment", " ", ";;"]); - let mut opts = make_cli_options(); + let mut opts = ctx(); - let result = compile_thread(&mut provider, &mut opts).unwrap(); + let result = compile_sequence(&mut provider, &mut empty_line(), &mut opts).unwrap(); assert!(result.is_none()); } #[test] - fn test_compile_thread_single_command() { + fn test_compile_sequence_single_command() { let mut provider = make_provider(&["42q"]); - let mut opts = make_cli_options(); + let mut opts = ctx(); - let result = compile_thread(&mut provider, &mut opts).unwrap(); - let cmd = result.unwrap(); + let result = compile_sequence(&mut provider, &mut empty_line(), &mut opts).unwrap(); + let binding = result.unwrap(); + let cmd = binding.borrow(); assert_eq!(cmd.code, 'q'); assert!(!cmd.non_select); @@ -1063,12 +1624,13 @@ mod tests { } #[test] - fn test_compile_thread_non_selected_single_command() { + fn test_compile_sequence_non_selected_single_command() { let mut provider = make_provider(&["42!p"]); - let mut opts = make_cli_options(); + let mut opts = ctx(); - let result = compile_thread(&mut provider, &mut opts).unwrap(); - let cmd = result.unwrap(); + let result = compile_sequence(&mut provider, &mut empty_line(), &mut opts).unwrap(); + let binding = result.unwrap(); + let cmd = binding.borrow(); assert_eq!(cmd.code, 'p'); assert!(cmd.non_select); @@ -1086,29 +1648,33 @@ mod tests { } #[test] - fn test_compile_thread_multiple_lines() { + fn test_compile_sequence_multiple_lines() { let mut provider = make_provider(&["1q", "2d"]); - let mut opts = make_cli_options(); + let mut opts = ctx(); - let result = compile_thread(&mut provider, &mut opts).unwrap(); - let first = result.unwrap(); + let result = compile_sequence(&mut provider, &mut empty_line(), &mut opts).unwrap(); + let binding = result.unwrap(); + let first = binding.borrow(); assert_eq!(first.code, 'q'); - let second = first.next.unwrap(); + let binding = first.next.clone().unwrap(); + let second = binding.borrow(); assert_eq!(second.code, 'd'); assert!(second.next.is_none()); } #[test] - fn test_compile_thread_single_line_multiple_commands() { + fn test_compile_sequence_single_line_multiple_commands() { let mut provider = make_provider(&["1q;2d"]); - let mut opts = make_cli_options(); + let mut opts = ctx(); - let result = compile_thread(&mut provider, &mut opts).unwrap(); - let first = result.unwrap(); + let result = compile_sequence(&mut provider, &mut empty_line(), &mut opts).unwrap(); + let binding = result.unwrap(); + let first = binding.borrow(); assert_eq!(first.code, 'q'); - let second = first.next.unwrap(); + let binding = first.next.clone().unwrap(); + let second = binding.borrow(); assert_eq!(second.code, 'd'); assert!(second.next.is_none()); } @@ -1117,10 +1683,11 @@ mod tests { #[test] fn test_compile_single_command() { let scripts = vec![ScriptValue::StringVal("1q".to_string())]; - let mut opts = CliOptions::default(); + let mut opts = ProcessingContext::default(); let result = compile(scripts, &mut opts).unwrap(); - let cmd = result.unwrap(); + let binding = result.unwrap(); + let cmd = binding.borrow(); assert_eq!(cmd.code, 'q'); @@ -1135,4 +1702,641 @@ mod tests { assert!(cmd.next.is_none()); } + + // compile_replacement + #[test] + fn test_compile_replacement_literal() { + let (mut lines, mut chars) = make_providers("/hello/"); + let template = compile_replacement(&mut lines, &mut chars).unwrap(); + + assert_eq!(template.parts.len(), 1); + assert!(matches!(&template.parts[0], ReplacementPart::Literal(s) if s == "hello")); + } + + #[test] + fn test_compile_replacement_backrefs_and_literal() { + let (mut lines, mut chars) = make_providers("/prefix \\1 and \\2/"); + let template = compile_replacement(&mut lines, &mut chars).unwrap(); + + assert_eq!(template.parts.len(), 4); + assert!(matches!(&template.parts[0], ReplacementPart::Literal(s) if s == "prefix ")); + assert!(matches!(&template.parts[1], ReplacementPart::Group(1))); + assert!(matches!(&template.parts[2], ReplacementPart::Literal(s) if s == " and ")); + assert!(matches!(&template.parts[3], ReplacementPart::Group(2))); + } + + #[test] + fn test_compile_replacement_whole_match() { + let (mut lines, mut chars) = make_providers("/The match was: &/"); + let template = compile_replacement(&mut lines, &mut chars).unwrap(); + + assert_eq!(template.parts.len(), 2); + assert!( + matches!(&template.parts[0], ReplacementPart::Literal(s) if s == "The match was: ") + ); + assert!(matches!(&template.parts[1], ReplacementPart::WholeMatch)); + } + + #[test] + fn test_compile_replacement_ampersand() { + let (mut lines, mut chars) = make_providers("/Simon \\& Garfunkel/"); + let template = compile_replacement(&mut lines, &mut chars).unwrap(); + + assert_eq!(template.parts.len(), 1); + assert!( + matches!(&template.parts[0], ReplacementPart::Literal(s) if s == "Simon & Garfunkel") + ); + } + + #[test] + fn test_compile_replacement_escape_sequences() { + let (mut lines, mut chars) = make_providers("/line\\nnewline\\tend/"); + let template = compile_replacement(&mut lines, &mut chars).unwrap(); + + assert_eq!(template.parts.len(), 1); + assert!(matches!( + &template.parts[0], + ReplacementPart::Literal(s) if s == "line\nnewline\tend" + )); + } + + #[test] + fn test_compile_replacement_line_continuation() { + let script = vec![ + ScriptValue::StringVal("/first line\\".to_string()), + ScriptValue::StringVal(" continued/".to_string()), + ]; + let mut provider = ScriptLineProvider::new(script); + let first_line = provider.next_line().unwrap().unwrap(); + let mut chars = ScriptCharProvider::new(&first_line); + + let template = compile_replacement(&mut provider, &mut chars).unwrap(); + assert_eq!(template.parts.len(), 1); + assert!(matches!( + &template.parts[0], + ReplacementPart::Literal(s) if s == "first line\n continued" + )); + } + + // compile_subst_flags + #[test] + fn test_compile_subst_flag_g() { + let (lines, mut chars) = make_providers("g"); + let mut subst = Substitution::default(); + + compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + assert_eq!(subst.occurrence, 0); // 'g' means all occurrences + } + + #[test] + fn test_compile_subst_flag_p() { + let (lines, mut chars) = make_providers("p"); + let mut subst = Substitution::default(); + + compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + assert!(subst.print_flag); + } + + #[test] + fn test_compile_subst_flag_uppercase_i() { + let (lines, mut chars) = make_providers("I"); + let mut subst = Substitution::default(); + + compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + assert!(subst.ignore_case); + } + + #[test] + fn test_compile_subst_flag_i_lowercase() { + let (lines, mut chars) = make_providers("i"); + let mut subst = Substitution::default(); + + compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + assert!(subst.ignore_case); + } + + #[test] + fn test_compile_subst_flag_number() { + let (lines, mut chars) = make_providers("3"); + let mut subst = Substitution::default(); + + compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + assert_eq!(subst.occurrence, 3); + } + + #[test] + fn test_compile_subst_flag_g_and_number_should_fail() { + let (lines, mut chars) = make_providers("g3"); + let mut subst = Substitution::default(); + + let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + assert!( + err.to_string() + .contains("multiple 'g' or numeric flags in substitute command") + ); + } + + #[test] + fn test_compile_subst_flag_number_and_g_should_fail() { + let (lines, mut chars) = make_providers("2g"); + let mut subst = Substitution::default(); + + let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + assert!( + err.to_string() + .contains("multiple 'g' or numeric flags in substitute command") + ); + } + + #[test] + fn test_compile_subst_flag_w_missing_filename() { + let (lines, mut chars) = make_providers("w "); + let mut subst = Substitution::default(); + + let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + assert!(err.to_string().contains("missing filename")); + } + + #[test] + fn test_compile_subst_flag_w_with_filename() { + let (lines, mut chars) = make_providers("w out.txt"); + let mut subst = Substitution::default(); + + compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + assert_eq!( + subst.write_file.as_ref().map(|w| w.borrow().path.clone()), + Some(std::path::PathBuf::from("out.txt")) + ); + } + + #[test] + fn test_compile_subst_flag_invalid_flag() { + let (lines, mut chars) = make_providers("z"); + let mut subst = Substitution::default(); + + let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + assert!(err.to_string().contains("invalid substitute flag")); + } + // compile_subst_command + #[test] + fn test_compile_subst_invalid_delimiter_backslash() { + let (mut lines, mut chars) = make_providers("s\\foo\\bar\\"); + let mut cmd = Command::default(); + + let err = compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap_err(); + assert!( + err.to_string() + .contains("substitute pattern cannot be delimited") + ); + } + + #[test] + fn test_compile_subst_empty_pattern() { + let (mut lines, mut chars) = make_providers("s//bar/"); + let mut cmd = Command::default(); + + let err = compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap_err(); + assert!(err.to_string().contains("unterminated substitute pattern")); + } + + #[test] + fn test_compile_subst_extra_characters_at_end() { + let (mut lines, mut chars) = make_providers("s/foo/bar/x"); + let mut cmd = Command::default(); + + let err = compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap_err(); + assert!(err.to_string().contains("invalid substitute flag")); + } + + #[test] + fn test_compile_subst_semicolon_indicates_continue() { + let (mut lines, mut chars) = make_providers("s/foo/bar/;"); + let mut cmd = Command::default(); + + compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap(); + + if let CommandData::Substitution(subst) = &cmd.data { + assert_eq!(subst.replacement.parts.len(), 1); + } else { + panic!("Expected CommandData::Substitution"); + } + } + + #[test] + fn test_compile_subst_sets_command_data() { + let (mut lines, mut chars) = make_providers("s/foo/bar/"); + let mut cmd = Command::default(); + + compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap(); + match &cmd.data { + CommandData::Substitution(subst) => { + assert_eq!(subst.replacement.parts.len(), 1); + assert!( + matches!(&subst.replacement.parts[0], ReplacementPart::Literal(s) if s == "bar") + ); + } + _ => panic!("Expected CommandData::Substitution"), + } + } + + #[test] + fn test_compile_subst_invalid_group_number() { + let (mut lines, mut chars) = make_providers(r"s/\(.\)\(.\)/\3\2\1/"); + let mut cmd = Command::default(); + + let err = compile_subst_command(&mut lines, &mut chars, &mut cmd, &ctx()).unwrap_err(); + assert!( + err.to_string() + .contains("group number \\3 is larger than the 2 available RE groups") + ); + } + + // bre_to_ere + #[test] + fn test_bre_group_translation() { + assert_eq!(bre_to_ere(r"\(abc\)"), "(abc)"); + assert_eq!(bre_to_ere(r"a\(b\)c"), "a(b)c"); + } + + #[test] + fn test_ere_metacharacters_escaped() { + assert_eq!(bre_to_ere(r"a+b?c{1}|(d)"), r"a\+b\?c\{1\}\|\(d\)"); + } + + #[test] + fn test_literal_backslashes_preserved() { + assert_eq!(bre_to_ere(r"foo\\bar"), r"foo\\bar"); + assert_eq!(bre_to_ere(r"\."), r"\."); + } + + #[test] + fn test_character_classes_unchanged() { + assert_eq!(bre_to_ere(r"[a-z]"), "[a-z]"); + assert_eq!(bre_to_ere(r"[^0-9]"), "[^0-9]"); + } + + #[test] + fn test_anchors_and_dot_and_star() { + assert_eq!(bre_to_ere(r"^a.*b$"), "^a.*b$"); + } + + #[test] + fn test_trailing_backslash_is_preserved() { + assert_eq!(bre_to_ere(r"abc\"), r"abc\"); + } + + // patch_block_endings + + // Create a command with the specified code. + fn command_with_code(code: char) -> Rc> { + Rc::new(RefCell::new(Command { + code, + ..Default::default() + })) + } + + // Link the vector of passed commands into a list, returning head. + fn link_commands(cmds: Vec>>) -> Option>> { + for i in 0..cmds.len().saturating_sub(1) { + cmds[i].borrow_mut().next = Some(cmds[i + 1].clone()); + } + cmds.first().cloned() + } + + // Return the command codes along the passed linked list. + fn collect_codes(mut head: Option>>) -> Vec { + let mut result = Vec::new(); + while let Some(cmd) = head { + let cmd_ref = cmd.borrow(); + result.push(cmd_ref.code); + head = cmd_ref.next.clone(); + } + result + } + + #[test] + fn test_flat_chain() { + let a = command_with_code('a'); + let b = command_with_code('b'); + let head = link_commands(vec![a.clone(), b.clone()]); + + patch_block_endings(head.clone()); + + assert_eq!(collect_codes(head), vec!['a', 'b']); + } + + #[test] + fn test_simple_block_relinks_tail() { + // a ; { x ; y ; } b + let a = command_with_code('a'); + let block = command_with_code('{'); + let x = command_with_code('x'); + let y = command_with_code('y'); + let b = command_with_code('b'); + + let head = link_commands(vec![a.clone(), block.clone(), b.clone()]); + let sub_head = link_commands(vec![x.clone(), y.clone()]); + block.borrow_mut().data = CommandData::Block(sub_head.clone()); + + patch_block_endings(head.clone()); + + // Expect x -> y -> b + assert_eq!(collect_codes(sub_head), vec!['x', 'y', 'b']); + // Expect a -> { -> b still valid + assert_eq!(collect_codes(Some(a)), vec!['a', '{', 'b']); + } + + #[test] + fn test_empty_block_no_panic() { + let a = command_with_code('a'); + a.borrow_mut().data = CommandData::Block(None); + + patch_block_endings(Some(a.clone())); + + assert_eq!(collect_codes(Some(a)), vec!['a']); + } + + #[test] + fn test_nested_blocks() { + // a + // { + // m + // { + // x + // y + // } + // n + // } + // b + let a = command_with_code('a'); + let b = command_with_code('b'); + let x = command_with_code('x'); + let y = command_with_code('y'); + let m = command_with_code('m'); + let n = command_with_code('n'); + let outer_block = command_with_code('{'); + let inner_block = command_with_code('{'); + + let head = link_commands(vec![a.clone(), outer_block.clone(), b.clone()]); + let outer = link_commands(vec![m.clone(), inner_block.clone(), n.clone()]); + let inner = link_commands(vec![x.clone(), y.clone()]); + outer_block.borrow_mut().data = CommandData::Block(outer.clone()); + inner_block.borrow_mut().data = CommandData::Block(inner.clone()); + + patch_block_endings(head.clone()); + + assert_eq!(collect_codes(head), vec!['a', '{', 'b']); + assert_eq!(collect_codes(inner), vec!['x', 'y', 'n', 'b']); + assert_eq!(collect_codes(outer), vec!['m', '{', 'n', 'b']); + } + + #[test] + fn test_empty_nested_blocks() { + // a + // { + // { + // x + // } + // } + // b + let a = command_with_code('a'); + let b = command_with_code('b'); + let x = command_with_code('x'); + let outer_block = command_with_code('{'); + let inner_block = command_with_code('{'); + + let head = link_commands(vec![a.clone(), outer_block.clone(), b.clone()]); + let outer = link_commands(vec![inner_block.clone()]); + let inner = link_commands(vec![x.clone()]); + outer_block.borrow_mut().data = CommandData::Block(outer.clone()); + inner_block.borrow_mut().data = CommandData::Block(inner.clone()); + + patch_block_endings(head.clone()); + + assert_eq!(collect_codes(head), vec!['a', '{', 'b']); + assert_eq!(collect_codes(outer), vec!['{', 'b']); + assert_eq!(collect_codes(inner), vec!['x', 'b']); + } + + // compile_label_command + #[test] + fn test_compile_label_command() { + let (mut lines, mut chars) = make_providers(": foo"); + let mut cmd = Command::default(); + + compile_label_command(&mut lines, &mut chars, &mut cmd).unwrap(); + match &cmd.data { + CommandData::Label(label) => { + let name = label.clone().unwrap(); + assert_eq!(name, "foo"); + } + _ => panic!("Expected CommandData::Label"), + } + } + + #[test] + fn test_compile_missing_label_command() { + let (mut lines, mut chars) = make_providers(": ;"); + let mut cmd = Command::default(); + + cmd.code = ':'; + let err = compile_label_command(&mut lines, &mut chars, &mut cmd).unwrap_err(); + assert!(err.to_string().contains("empty label")); + } + + #[test] + fn test_compile_empty_label_command() { + let (mut lines, mut chars) = make_providers("b ;"); + let mut cmd = Command::default(); + + cmd.code = 'b'; + compile_label_command(&mut lines, &mut chars, &mut cmd).unwrap(); + match &cmd.data { + CommandData::Label(label) => { + assert!(label.is_none()); + } + _ => panic!("Expected CommandData::Label(None)"), + } + } + + // populate_label_map + fn command_with_data(data: CommandData) -> Rc> { + Rc::new(RefCell::new(Command { + data, + ..Default::default() + })) + } + + #[test] + fn test_single_label() { + let cmd = command_with_data(CommandData::Label(Some("start".to_string()))); + cmd.borrow_mut().code = ':'; + let mut context = ProcessingContext::default(); + + populate_label_map(Some(cmd.clone()), &mut context).unwrap(); + + assert_eq!(context.label_to_command_map.len(), 1); + assert!(context.label_to_command_map.contains_key("start")); + assert!(Rc::ptr_eq(&context.label_to_command_map["start"], &cmd)); + } + + #[test] + fn test_label_inside_block() { + let nested = command_with_data(CommandData::Label(Some("inside".to_string()))); + nested.borrow_mut().code = ':'; + let block = command_with_data(CommandData::Block(Some(nested.clone()))); + let mut context = ProcessingContext::default(); + + populate_label_map(Some(block.clone()), &mut context).unwrap(); + + assert_eq!(context.label_to_command_map.len(), 1); + assert!(context.label_to_command_map.contains_key("inside")); + assert!(Rc::ptr_eq(&context.label_to_command_map["inside"], &nested)); + } + + #[test] + fn test_multiple_labels() { + let a = command_with_data(CommandData::Label(Some("a".to_string()))); + a.borrow_mut().code = ':'; + let b = command_with_data(CommandData::Label(Some("b".to_string()))); + b.borrow_mut().code = ':'; + let head = link_commands(vec![a.clone(), b.clone()]); + + let mut context = ProcessingContext::default(); + populate_label_map(head, &mut context).unwrap(); + + assert_eq!(context.label_to_command_map.len(), 2); + assert!(context.label_to_command_map.contains_key("a")); + assert!(context.label_to_command_map.contains_key("b")); + } + + #[test] + fn test_no_labels() { + let a = command_with_data(CommandData::None); + let b = command_with_data(CommandData::None); + let head = link_commands(vec![a.clone(), b.clone()]); + + let mut context = ProcessingContext::default(); + populate_label_map(head, &mut context).unwrap(); + + assert_eq!(context.label_to_command_map.len(), 0); + } + + #[test] + fn test_label_none_is_ignored() { + let cmd = command_with_data(CommandData::Label(None)); + let mut context = ProcessingContext::default(); + + populate_label_map(Some(cmd.clone()), &mut context).unwrap(); + + // The map should remain empty since the label is None + assert_eq!(context.label_to_command_map.len(), 0); + } + + #[test] + fn test_duplicate_label_gives_error() { + let a1 = command_with_data(CommandData::Label(Some("dup".to_string()))); + a1.borrow_mut().code = ':'; + + let a2 = command_with_data(CommandData::Label(Some("dup".to_string()))); + a2.borrow_mut().code = ':'; + + let head = link_commands(vec![a1.clone(), a2.clone()]); + let mut context = ProcessingContext::default(); + + let result = populate_label_map(head, &mut context); + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("duplicate label `dup'")); + } + + // resolve_branch_targets + #[test] + fn test_branch_target_resolved() { + let target = command_with_data(CommandData::Label(Some("end".to_string()))); + target.borrow_mut().code = ':'; + + let branch = command_with_data(CommandData::Label(Some("end".to_string()))); + branch.borrow_mut().code = 'b'; + + let head = link_commands(vec![branch.clone(), target.clone()]); + let mut context = ProcessingContext::default(); + + populate_label_map(head.clone(), &mut context).unwrap(); + let result = resolve_branch_targets(head.clone(), &mut context); + assert!(result.is_ok()); + + match &branch.borrow().data { + CommandData::BranchTarget(Some(ptr)) => { + assert!(Rc::ptr_eq(ptr, &target)); + } + _ => panic!("Expected BranchTarget(Some(...))"), + } + } + + #[test] + fn test_branch_target_missing_label_gives_error() { + let branch = command_with_data(CommandData::Label(Some("nope".to_string()))); + branch.borrow_mut().code = 't'; + + let mut context = ProcessingContext::default(); + let result = resolve_branch_targets(Some(branch.clone()), &mut context); + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("undefined label `nope'")); + } + + #[test] + fn test_branch_with_no_label_resolves_to_none() { + let branch = command_with_data(CommandData::Label(None)); + branch.borrow_mut().code = 'b'; + + let mut context = ProcessingContext::default(); + let result = resolve_branch_targets(Some(branch.clone()), &mut context); + + assert!(result.is_ok()); + match &branch.borrow().data { + CommandData::BranchTarget(None) => {} // ok + _ => panic!("Expected BranchTarget(None)"), + } + } + + #[test] + fn test_non_branch_label_is_unchanged() { + let cmd = command_with_data(CommandData::Label(Some("unchanged".to_string()))); + cmd.borrow_mut().code = 'q'; // not a branch command + + let mut context = ProcessingContext::default(); + let result = resolve_branch_targets(Some(cmd.clone()), &mut context); + assert!(result.is_ok()); + + match &cmd.borrow().data { + CommandData::Label(Some(label)) => assert_eq!(label, "unchanged"), + _ => panic!("Expected Label(Some(...)) to remain unchanged"), + } + } + + #[test] + fn test_branch_in_nested_block() { + let label = command_with_data(CommandData::Label(Some("inner".to_string()))); + label.borrow_mut().code = ':'; + + let branch = command_with_data(CommandData::Label(Some("inner".to_string()))); + branch.borrow_mut().code = 't'; + + let block = command_with_data(CommandData::Block(Some(label.clone()))); + let head = link_commands(vec![branch.clone(), block]); + + let mut context = ProcessingContext::default(); + populate_label_map(Some(label.clone()), &mut context).unwrap(); + let result = resolve_branch_targets(head.clone(), &mut context); + + assert!(result.is_ok()); + match &branch.borrow().data { + CommandData::BranchTarget(Some(ptr)) => assert!(Rc::ptr_eq(ptr, &label)), + _ => panic!("Expected BranchTarget(Some(...))"), + } + } } diff --git a/src/uu/sed/src/delimited_parser.rs b/src/uu/sed/src/delimited_parser.rs index 7a5dd269..e96e4b04 100644 --- a/src/uu/sed/src/delimited_parser.rs +++ b/src/uu/sed/src/delimited_parser.rs @@ -101,7 +101,7 @@ fn create_control_char(x: char) -> Option { /// At entry line.current() must have advanced after the `\\`. /// Advance line to the first character not part of the escape. /// Return `None` if an invalid escape has been specified. -fn parse_char_escape(line: &mut ScriptCharProvider) -> Option { +pub fn parse_char_escape(line: &mut ScriptCharProvider) -> Option { match line.current() { 'a' => { line.advance(); @@ -783,6 +783,14 @@ mod tests { assert_eq!(line.current(), '/'); } + #[test] + fn test_regex_with_capture() { + let (lines, mut line) = make_providers(r"/\(.\)/c/"); + let parsed = parse_regex(&lines, &mut line).unwrap(); + assert_eq!(parsed, r"\(.\)"); + assert_eq!(line.current(), '/'); + } + #[test] fn test_regex_with_escape_sequence() { let (lines, mut line) = make_providers("/ab\\n/"); diff --git a/src/uu/sed/src/fast_io.rs b/src/uu/sed/src/fast_io.rs new file mode 100644 index 00000000..cb46c7f5 --- /dev/null +++ b/src/uu/sed/src/fast_io.rs @@ -0,0 +1,1213 @@ +// Zero-copy line-based I/O +// +// Abstractions that allow file lines to be processed and output +// in mmapped memory space. By coallescing output requests an +// efficient write(2) system call can be issued for them, bypassing +// the copy required for output through BufWriter. +// Search for "main" to see a usage example. +// +// SPDX-License-Identifier: MIT +// Copyright (c) 2025 Diomidis Spinellis +// +// This file is part of the uutils sed package. +// It is licensed under the MIT License. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +#[cfg(unix)] +use memmap2::Mmap; + +use std::fs::File; +use std::io::{self, BufRead, BufReader, BufWriter, Read, Write}; + +#[cfg(not(unix))] +use std::marker::PhantomData; + +#[cfg(unix)] +use std::os::unix::io::AsRawFd; + +use std::path::PathBuf; +use std::str; + +#[cfg(unix)] +use uucore::libc::{c_void, write}; + +use uucore::error::UError; + +#[cfg(unix)] +use uucore::error::USimpleError; + +// Define two cursors for iterating over lines: +// - MmapLineCursor based on mmap(2), +// - ReadLineCursorbased on BufReader. + +/// Cursor for zero-copy iteration over mmap’d file. +#[cfg(unix)] +pub struct MmapLineCursor<'a> { + data: &'a [u8], + pos: usize, +} + +#[cfg(unix)] +/// Represents the get_line return: one line plus whether it was the last. +pub struct NextMmapLine<'a> { + pub content: &'a [u8], + pub full_span: &'a [u8], + pub is_last_line: bool, +} + +#[cfg(unix)] +impl<'a> MmapLineCursor<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data, pos: 0 } + } + + /// Return the next line, if available, or None. + fn get_line(&mut self) -> io::Result>> { + if self.pos >= self.data.len() { + return Ok(None); + } + + let start = self.pos; + let mut end = start; + while end < self.data.len() && self.data[end] != b'\n' { + end += 1; + } + + if end < self.data.len() { + end += 1; // include \n in full span + } + + self.pos = end; + let full_span = &self.data[start..end]; + let content = if full_span.ends_with(b"\n") { + &full_span[..full_span.len() - 1] + } else { + full_span + }; + + let is_last_line = self.pos >= self.data.len(); + Ok(Some(NextMmapLine { + content, + full_span, + is_last_line, + })) + } +} + +/// Buffered line reader from any BufRead input. +pub struct ReadLineCursor { + reader: Box, + buffer: String, +} + +impl ReadLineCursor { + /// Construct from anything that implements `Read`. + fn new(r: R) -> Self { + let buf = BufReader::new(r); + Self { + reader: Box::new(buf), + buffer: String::new(), + } + } + + /// If a line is available, return it, its \n termination, + /// and next line availability, itherwise return None. + fn get_line(&mut self) -> io::Result> { + self.buffer.clear(); + // read_line *includes* the '\n' if present + let bytes_read = self.reader.read_line(&mut self.buffer)?; + if bytes_read == 0 { + return Ok(None); + } + // O(1) check whether it ended in '\n' + let has_newline = self.buffer.ends_with('\n'); + // strip it if you don’t want to expose it to the caller + if has_newline { + self.buffer.pop(); + } + let line = std::mem::take(&mut self.buffer); + let is_last_line = self.reader.fill_buf()?.is_empty(); + Ok(Some((line, has_newline, is_last_line))) + } +} + +/// As chunk of data that is input and can be output, often very efficiently +#[derive(Debug, PartialEq, Eq)] +pub struct IOChunk<'a> { + utf8_verified: bool, // True if the contents are valid UTF-8 + content: IOChunkContent<'a>, +} + +impl<'a> IOChunk<'a> { + /// Construct an IOChunk from the given content + fn from_content(content: IOChunkContent<'a>) -> Self { + Self { + utf8_verified: false, + content, + } + } + + /// Clear the object's contents, converting it into Owned if needed. + pub fn clear(&mut self) { + self.utf8_verified = true; + match &mut self.content { + IOChunkContent::Owned { + content, + has_newline, + .. + } => { + content.clear(); + *has_newline = false; + } + #[cfg(unix)] + _ => { + self.content = IOChunkContent::new_owned(String::new(), false); + } + } + } + + /// Return true if the content ends with a newline. + pub fn is_newline_terminated(&self) -> bool { + match &self.content { + IOChunkContent::Owned { has_newline, .. } => *has_newline, + #[cfg(unix)] + IOChunkContent::MmapInput { full_span, .. } => { + if let Some(&last) = full_span.last() { + last == b'\n' + } else { + false + } + } + } + } + + /// Set the object's contents to the specified string. + /// Convert it into Owned if needed. + pub fn set_to_string(&mut self, new_content: String, add_newline: bool) { + self.utf8_verified = true; + match &mut self.content { + IOChunkContent::Owned { + content, + has_newline, + .. + } => { + *content = new_content; + *has_newline = add_newline; + } + #[cfg(unix)] + _ => { + self.content = IOChunkContent::new_owned(new_content, add_newline); + } + } + } + + /// Return the content as a str. + pub fn as_str(&mut self) -> Result<&str, Box> { + match &self.content { + #[cfg(unix)] + IOChunkContent::MmapInput { content, .. } => { + if self.utf8_verified { + // Use cached result + Ok(unsafe { self.content.as_str_unchecked() }) + } else { + let result = str::from_utf8(content); + self.utf8_verified = true; + result.map_err(|e| USimpleError::new(1, e.to_string())) + } + } + IOChunkContent::Owned { content, .. } => Ok(content), + } + } + + /// Convert content to the Owned variant if it's not already. + /// Fails if the conversion to UTF-8 fails. + pub fn ensure_owned(&mut self) -> Result<(), Box> { + match &self.content { + IOChunkContent::Owned { .. } => Ok(()), // already owned + #[cfg(unix)] + IOChunkContent::MmapInput { content, full_span } => { + match std::str::from_utf8(content) { + Ok(valid_str) => { + let has_newline = full_span.last().copied() == Some(b'\n'); + self.content = + IOChunkContent::new_owned(valid_str.to_string(), has_newline); + self.utf8_verified = true; + Ok(()) + } + Err(e) => Err(USimpleError::new(1, e.to_string())), + } + } + } + } + + /// Return mutable access to the content and has_newline fields. + pub fn fields_mut(&mut self) -> Result<(&mut String, &mut bool), Box> { + self.ensure_owned()?; + + match &mut self.content { + IOChunkContent::Owned { + content, + has_newline, + .. + } => Ok((content, has_newline)), + #[allow(unreachable_patterns)] + _ => unreachable!("ensure_owned should convert to Owned"), + } + } +} + +/// Data to be written to a file. It can come from the mmapped +/// memory space, in which case it is tracked to allow coallescing +/// and bypassing BufWriter, or it can be other data from the process's +/// memory space. +#[derive(Debug, PartialEq, Eq)] +enum IOChunkContent<'a> { + #[cfg(unix)] + MmapInput { + content: &'a [u8], // Line without newline + full_span: &'a [u8], // Line including original newline, if any + }, + Owned { + content: String, // Line content without newline + has_newline: bool, // True if \n-terminated + #[cfg(not(unix))] + _phantom: PhantomData<&'a ()>, // Silence E0392 warning + }, +} + +impl IOChunkContent<'_> { + /// Construct a new Owned chunk. + pub fn new_owned(content: String, has_newline: bool) -> Self { + #[cfg(unix)] + return IOChunkContent::Owned { + content, + has_newline, + }; + + #[cfg(not(unix))] + return IOChunkContent::Owned { + content, + has_newline, + // Avoid E0063 missing _phantom initialization errors + _phantom: std::marker::PhantomData, + }; + } + + #[cfg(unix)] + unsafe fn as_str_unchecked(&self) -> &str { + match self { + IOChunkContent::MmapInput { content, .. } => unsafe { + std::str::from_utf8_unchecked(content) + }, + IOChunkContent::Owned { content, .. } => content, + } + } +} + +/// Unified reader that uses mmap when possible, falls back to buffered reading. +pub enum LineReader { + #[cfg(unix)] + MmapInput { + mapped_file: Mmap, // A handle that can derive the mapped file slice + cursor: MmapLineCursor<'static>, + }, + ReadInput(ReadLineCursor), +} + +/// Return a LineReader that uses the ReadInput method fot the specified file. +fn line_reader_read_input(file: File) -> io::Result { + let boxed: Box = Box::new(file); + let reader = BufReader::new(boxed); + Ok(LineReader::ReadInput(ReadLineCursor::new(reader))) +} + +impl LineReader { + /// Open the specified file for line input. + // Use "-" to read from the standard input. + pub fn open(path: &PathBuf) -> io::Result { + if path.as_os_str() == "-" { + let stdin = io::stdin(); + let boxed: Box = Box::new(stdin.lock()); + let reader = BufReader::new(boxed); + return Ok(LineReader::ReadInput(ReadLineCursor::new(reader))); + } + + let file = File::open(path)?; + + #[cfg(unix)] + { + match unsafe { Mmap::map(&file) } { + Ok(mapped_file) => { + // SAFETY: mmap owns the data and lives in the same variant + let slice: &'static [u8] = unsafe { + std::slice::from_raw_parts(mapped_file.as_ptr(), mapped_file.len()) + }; + let cursor = MmapLineCursor::new(slice); + Ok(LineReader::MmapInput { + mapped_file, + cursor, + }) + } + // Fallback to ReadInput + Err(_) => line_reader_read_input(file), + } + } + + #[cfg(not(unix))] + { + line_reader_read_input(file) + } + } + + /// Open the specified file to read as a stream. + #[cfg(test)] + pub fn open_stream(path: &PathBuf) -> io::Result { + let file = File::open(path)?; + line_reader_read_input(file) + } + + /// Return the next line, if available and also the availability + /// of another one, or None at end of file. + pub fn get_line(&mut self) -> io::Result> { + match self { + #[cfg(unix)] + LineReader::MmapInput { cursor, .. } => { + if let Some(NextMmapLine { + content, + full_span, + is_last_line, + }) = cursor.get_line()? + { + let chunk = + IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + + Ok(Some((chunk, is_last_line))) + } else { + Ok(None) + } + } + + LineReader::ReadInput(cursor) => { + if let Some((line, _has_newline, is_last_line)) = cursor.get_line()? { + let chunk = + IOChunk::from_content(IOChunkContent::new_owned(line, _has_newline)); + Ok(Some((chunk, is_last_line))) + } else { + Ok(None) + } + } + } + } +} + +// Define a trait combining two: workaround for Rust's corresponding inability. +#[cfg(unix)] +pub trait OutputWrite: Write + AsRawFd {} +#[cfg(unix)] +impl OutputWrite for T {} + +#[cfg(not(unix))] +pub trait OutputWrite: Write {} +#[cfg(not(unix))] +impl OutputWrite for T {} + +/// Abstraction for outputting data, potentially from the mmapped file +/// Outputs from mmapped data are coallesced and written via a write(2) +/// system call without any copying if worthwhile. +/// All other output is buffered and writen via BufWriter. +pub struct OutputBuffer { + out: BufWriter>, // Where to write + #[cfg(unix)] + mmap_ptr: Option<(*const u8, usize)>, // Start and len of chunk to write + #[cfg(test)] + writes_issued: usize, // Number of issued write(2) calls +} + +/// Wrapper that issues the write(2) system call +#[cfg(unix)] +fn write_syscall(fd: i32, ptr: *const u8, len: usize) -> io::Result<()> { + let ret = unsafe { write(fd, ptr as *const c_void, len) }; + if ret < 0 { + Err(std::io::Error::last_os_error()) + } else { + Ok(()) + } +} + +/// Threshold to use buffered writes for output +// These 4k are half the 8k size of the BufWriter buffer. +// The constant guarantees that, at worst, mmapped output will +// result in a doubling of the issued write(2) system calls. +// Taking into account the non-copied data, this should result +// in overall fewer CPU instructions. +#[cfg(unix)] +const MIN_DIRECT_WRITE: usize = 4 * 1024; + +/// The maximum size of a pending write buffer +// Once more than 64k accumulate, issue a write to allow the OS +// and downstream pipes to handle the output processing in parallel +// with our processing. +#[cfg(unix)] +const MAX_PENDING_WRITE: usize = 64 * 1024; + +impl OutputBuffer { + pub fn new(w: Box) -> Self { + Self { + out: BufWriter::new(w), + #[cfg(unix)] + mmap_ptr: None, + #[cfg(test)] + writes_issued: 0, + } + } + + /// Schedule the specified String or &strfor eventual output + pub fn write_str>(&mut self, s: S) -> io::Result<()> { + self.write_chunk(&IOChunk::from_content(IOChunkContent::new_owned( + s.into(), + false, + ))) + } +} + +#[cfg(unix)] +impl OutputBuffer { + /// Schedule the specified output chunk for eventual output + pub fn write_chunk(&mut self, chunk: &IOChunk) -> io::Result<()> { + match &chunk.content { + IOChunkContent::MmapInput { full_span, .. } => { + let ptr = full_span.as_ptr(); + let len = full_span.len(); + + if let Some((p, l)) = self.mmap_ptr { + // Coalesce if adjacent + if unsafe { p.add(l) } == ptr && l < MAX_PENDING_WRITE { + self.mmap_ptr = Some((p, l + len)); + return Ok(()); + } else { + self.flush_mmap()?; // not contiguous + } + } + self.mmap_ptr = Some((ptr, len)); + Ok(()) + } + + IOChunkContent::Owned { + content, + has_newline, + .. + } => { + self.flush_mmap()?; + self.out.write_all(content.as_bytes())?; + if *has_newline { + self.out.write_all(b"\n")?; + } + Ok(()) + } + } + } + + // Flush any pending mmap data + #[cfg(unix)] + fn flush_mmap(&mut self) -> io::Result<()> { + if let Some((ptr, len)) = self.mmap_ptr.take() { + if len < MIN_DIRECT_WRITE { + // SAFELY treat as &[u8] and write to buffered writer + let slice = unsafe { std::slice::from_raw_parts(ptr, len) }; + return self.out.write_all(slice); + } else { + // Large enough: write directly using zero-copy + let fd = self.out.get_ref().as_raw_fd(); + self.out.flush()?; // sync any buffered data + #[cfg(test)] + { + self.writes_issued += 1; + } + return write_syscall(fd, ptr, len); + } + } + Ok(()) + } + + /// Flush everything: pending mmap and buffered data. + pub fn flush(&mut self) -> io::Result<()> { + self.flush_mmap()?; // flush mmap if any + self.out.flush() // then flush buffered data + } +} + +#[cfg(not(unix))] +impl OutputBuffer { + /// Schedule the specified output chunk for eventual output + pub fn write_chunk(&mut self, chunk: &IOChunk) -> io::Result<()> { + match &chunk.content { + IOChunkContent::Owned { + content, + has_newline, + .. + } => { + self.out.write_all(content.as_bytes())?; + if *has_newline { + self.out.write_all(b"\n")?; + } + Ok(()) + } + } + } + + /// Flush everything: pending mmap and buffered data. + pub fn flush(&mut self) -> io::Result<()> { + self.out.flush() // then flush buffered data + } +} + +// Usage example (never compiled) +#[cfg(any())] +pub fn main() -> io::Result<()> { + let path = std::env::args() + .nth(1) + .map(PathBuf::from) + .unwrap_or_else(|| "-".into()); + let mut reader = LineReader::open(&path)?; + let stdout = Box::new(io::stdout().lock()); + let mut output = OutputBuffer::new(stdout); + + while let Some(chunk) = reader.get_line()? { + output.write_chunk(&chunk)?; + } + + output.flush() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + #[cfg(unix)] + use std::fs::File; + #[cfg(unix)] + use std::io::{self, Write}; + use tempfile::NamedTempFile; + + /// Helper: produce a 4k-byte Vec of `'.'`s ending in `'\n'`. + #[cfg(unix)] + fn make_dot_line_4k() -> Vec { + let mut buf = Vec::with_capacity(4096); + buf.extend(std::iter::repeat(b'.').take(4095)); + buf.push(b'\n'); + buf + } + + #[test] + fn test_owned_line_output() -> io::Result<()> { + let tmp = NamedTempFile::new()?; + { + let file = tmp.reopen()?; + let mut out = OutputBuffer::new(Box::new(file)); + out.write_str("foo\n")?; + out.write_str("bar\n")?; + out.flush()?; + assert_eq!(out.writes_issued, 0); + } // File closes here as it leaves the scope + + let contents = fs::read(tmp.path())?; + assert_eq!(contents.as_slice(), b"foo\nbar\n"); + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_mmap_line_output_single() -> io::Result<()> { + use std::fs; + use std::io::Write; + use tempfile::NamedTempFile; + + // Prepare the input buffer: two lines in one contiguous mmap region + let mmap_data = b"line one\nline two\n"; + + // Write that into a temp file + let mut input = NamedTempFile::new()?; + input.write_all(mmap_data)?; + input.flush()?; + let input_path = input.path().to_path_buf(); + + // Open the reader on that file + let mut reader = LineReader::open(&input_path)?; + + // Prepare an output temp file and wrap it in our OutputBuffer + let output = NamedTempFile::new()?; + let output_path = output.path().to_path_buf(); + let out_file = std::fs::File::create(&output_path)?; + let mut out = OutputBuffer::new(Box::new(Box::new(out_file))); + + // Drain reader → writer + while let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + } + out.flush()?; + + assert_eq!(out.writes_issued, 0); + + let written = fs::read(&output_path)?; + assert_eq!(written.as_slice(), mmap_data); + + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_mixed_output_order_preserved() -> io::Result<()> { + use std::fs; + use std::fs::File; + use std::io::Write; + use tempfile::NamedTempFile; + + // Prepare an input file containing two lines: "zero\none\n" + let data = b"zero\none\n"; + let mut input = NamedTempFile::new()?; + input.write_all(data)?; + input.flush()?; + let input_path = input.path().to_path_buf(); + let mut reader = LineReader::open(&input_path)?; + + // Prepare an empty output file + let output = NamedTempFile::new()?; + let output_path = output.path().to_path_buf(); + let out_file = File::create(&output_path)?; + let mut out = OutputBuffer::new(Box::new(out_file)); + + // Read the first mmap line ("zero\n") and write it + if let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + } + + // Write an owned line ("middle\n") + out.write_str("middle\n")?; + + // Read the second mmap line ("one\n") and write it + if let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + } + + out.flush()?; + + // Since all writes are small (<4K), we expect zero zero copy syscalls + assert_eq!(out.writes_issued, 0); + + // Read both files back and compare + let expected = { + let mut v = Vec::new(); + v.extend_from_slice(b"zero\n"); + v.extend_from_slice(b"middle\n"); + v.extend_from_slice(b"one\n"); + v + }; + let actual = fs::read(&output_path)?; + assert_eq!(actual, expected); + + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_large_file_zero_copy() -> io::Result<()> { + // Create and fill the input temp file: + let mut input = NamedTempFile::new()?; + write!(input, "first line\nsecond line\n")?; + let dot_line = make_dot_line_4k(); + input.write_all(&dot_line)?; + input.flush()?; + let input_path = input.path().to_path_buf(); + + // Open reader on input file: + let mut reader = LineReader::open(&input_path)?; + + // Create the output temp file (empty): + let output = NamedTempFile::new()?; + let output_path = output.path().to_path_buf(); + let out_file = File::create(&output_path)?; + + // Wrap it in your OutputBuffer and run the loop: + let mut out = OutputBuffer::new(Box::new(out_file)); + let mut nline = 0; + while let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + nline += 1; + } + assert_eq!(nline, 3); + + out.flush()?; + assert_eq!(out.writes_issued, 1); + + // Verify that files match: + let expected = fs::read(&input_path)?; + let actual = fs::read(&output_path)?; + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_large_file_zero_copy_unterminated() -> io::Result<()> { + // Create and fill the input temp file: + let mut input = NamedTempFile::new()?; + write!(input, "first line\nsecond line\n")?; + let dot_line = make_dot_line_4k(); + input.write_all(&dot_line)?; + write!(input, "last line (unterminated)")?; + input.flush()?; + let input_path = input.path().to_path_buf(); + + // Open reader on input file: + let mut reader = LineReader::open(&input_path)?; + + // Create the output temp file (empty): + let output = NamedTempFile::new()?; + let output_path = output.path().to_path_buf(); + let out_file = File::create(&output_path)?; + + // Wrap it in your OutputBuffer and run the loop: + let mut out = OutputBuffer::new(Box::new(out_file)); + let mut nline = 0; + while let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + nline += 1; + } + assert_eq!(nline, 4); + + out.flush()?; + assert_eq!(out.writes_issued, 1); + + // Verify that files match: + let expected = fs::read(&input_path)?; + let actual = fs::read(&output_path)?; + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn test_small_file_unterminated() -> io::Result<()> { + // Create and fill the input temp file: + let mut input = NamedTempFile::new()?; + write!(input, "first line\nsecond line\nlast line (unterminated)")?; + input.flush()?; + let input_path = input.path().to_path_buf(); + + // Open reader on input file: + let mut reader = LineReader::open(&input_path)?; + + // Create the output temp file (empty): + let output = NamedTempFile::new()?; + let output_path = output.path().to_path_buf(); + let out_file = File::create(&output_path)?; + + // Wrap it in your OutputBuffer and run the loop: + let mut out = OutputBuffer::new(Box::new(out_file)); + let mut nline = 0; + while let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + nline += 1; + } + assert_eq!(nline, 3); + + out.flush()?; + assert_eq!(out.writes_issued, 0); + + // Verify that files match: + let expected = fs::read(&input_path)?; + let actual = fs::read(&output_path)?; + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn test_small_file_unterminated_stream() -> io::Result<()> { + // Create and fill the input temp file: + let mut input = NamedTempFile::new()?; + write!(input, "first line\nsecond line\nlast line (unterminated)")?; + input.flush()?; + let input_path = input.path().to_path_buf(); + + // Open reader on input file: + let mut reader = LineReader::open_stream(&input_path)?; + + // Create the output temp file (empty): + let output = NamedTempFile::new()?; + let output_path = output.path().to_path_buf(); + let out_file = File::create(&output_path)?; + + // Wrap it in your OutputBuffer and run the loop: + let mut out = OutputBuffer::new(Box::new(out_file)); + let mut nline = 0; + while let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + nline += 1; + } + assert_eq!(nline, 3); + + out.flush()?; + assert_eq!(out.writes_issued, 0); + + // Verify that files match: + let expected = fs::read(&input_path)?; + let actual = fs::read(&output_path)?; + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_large_file_zero_copy_with_flush() -> io::Result<()> { + // Create and fill the input temp file: + let mut input = NamedTempFile::new()?; + write!(input, "first line\nsecond line\n")?; + let dot_line = make_dot_line_4k(); + // Write 64k + 16k to ensure one flush when writing + for _i in 0..20 { + input.write_all(&dot_line)?; + } + input.flush()?; + let input_path = input.path().to_path_buf(); + + // Open reader on input file: + let mut reader = LineReader::open(&input_path)?; + + // Create the output temp file (empty): + let output = NamedTempFile::new()?; + let output_path = output.path().to_path_buf(); + let out_file = File::create(&output_path)?; + + // Wrap it in your OutputBuffer and run the loop: + let mut out = OutputBuffer::new(Box::new(out_file)); + let mut nline = 0; + while let Some((chunk, _last_line)) = reader.get_line()? { + out.write_chunk(&chunk)?; + nline += 1; + } + assert_eq!(nline, 22); + + out.flush()?; + assert_eq!(out.writes_issued, 2); + + // Verify that files match: + let expected = fs::read(&input_path)?; + let actual = fs::read(&output_path)?; + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn test_stream_read() -> std::io::Result<()> { + // Create temporary file with known contents + let mut tmp = NamedTempFile::new()?; + write!(tmp, "first line\nsecond line\nlast line\n")?; + tmp.flush()?; + + let path = tmp.path().to_path_buf(); + let mut reader = LineReader::open_stream(&path)?; + + // Verify the reader's operation + if let Some(( + IOChunk { + content: + IOChunkContent::Owned { + content, + has_newline, + .. + }, + utf8_verified, + .. + }, + last_line, + )) = reader.get_line()? + { + assert_eq!(content, "first line"); + assert!(has_newline); + assert!(!utf8_verified); + assert!(!last_line); + } else { + panic!("Expected IOChunkContent::Owned"); + } + + if let Some(( + IOChunk { + content: + IOChunkContent::Owned { + content, + has_newline, + .. + }, + .. + }, + last_line, + )) = reader.get_line()? + { + assert_eq!(content, "second line"); + assert!(has_newline); + assert!(!last_line); + } else { + panic!("Expected IOChunkContent::Owned"); + } + + if let Some((mut content, last_line)) = reader.get_line()? { + assert_eq!(content.as_str().unwrap(), "last line"); + assert!(last_line); + } else { + panic!("Expected IOChunk"); + } + + assert_eq!(reader.get_line()?, None); + + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_mmap_read() -> std::io::Result<()> { + // Create temporary file with known contents + let mut tmp = NamedTempFile::new()?; + write!(tmp, "first line\nsecond line\nlast line\n")?; + tmp.flush()?; + + let path = tmp.path().to_path_buf(); + let mut reader = LineReader::open(&path)?; + + // Verify the reader's operation + if let Some(( + IOChunk { + content: + IOChunkContent::MmapInput { + content, full_span, .. + }, + utf8_verified, + .. + }, + last_line, + )) = reader.get_line()? + { + assert_eq!(content, b"first line"); + assert_eq!(full_span, b"first line\n"); + assert!(!utf8_verified); + assert!(!last_line); + } else { + panic!("Expected IOChunkContent::MapInput"); + } + + if let Some(( + IOChunk { + content: + IOChunkContent::MmapInput { + content, full_span, .. + }, + utf8_verified, + .. + }, + last_line, + )) = reader.get_line()? + { + assert_eq!(content, b"second line"); + assert_eq!(full_span, b"second line\n"); + assert!(!utf8_verified); + assert!(!last_line); + } else { + panic!("Expected IOChunkContent::MapInput"); + } + + if let Some((mut content, last_line)) = reader.get_line()? { + assert_eq!(content.as_str().unwrap(), "last line"); + assert!(content.utf8_verified); + assert!(last_line); + // Cached version + assert_eq!(content.as_str().unwrap(), "last line"); + } else { + panic!("Expected IOChunk"); + } + + assert_eq!(reader.get_line()?, None); + + Ok(()) + } + + // is_newline_terminated + #[test] + fn test_owned_newline_terminated() { + let chunk = IOChunk::from_content(IOChunkContent::new_owned("line".to_string(), true)); + assert!(chunk.is_newline_terminated()); + } + + #[test] + fn test_owned_not_newline_terminated() { + let chunk = IOChunk::from_content(IOChunkContent::new_owned("line".to_string(), false)); + assert!(!chunk.is_newline_terminated()); + } + + #[cfg(unix)] + #[test] + fn test_mmap_newline_terminated() { + let content = b"line"; + let full_span = b"line\n"; + let chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + assert!(chunk.is_newline_terminated()); + } + + #[cfg(unix)] + #[test] + fn test_mmap_not_newline_terminated() { + let content = b"line"; + let full_span = b"line"; + let chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + assert!(!chunk.is_newline_terminated()); + } + + #[cfg(unix)] + #[test] + fn test_mmap_empty() { + let content = b""; + let full_span = b""; + let chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + assert!(!chunk.is_newline_terminated()); + } + + // ensure_owned() + #[test] + fn test_ensure_owned_on_owned() { + let mut chunk = + IOChunk::from_content(IOChunkContent::new_owned("already owned".to_string(), true)); + + let result = chunk.ensure_owned(); + assert!(result.is_ok()); + + // Content must be unchanged + match &chunk.content { + IOChunkContent::Owned { + content, + has_newline, + .. + } => { + assert_eq!(content, "already owned"); + assert!(*has_newline); + } + #[cfg(unix)] + _ => panic!("Expected Owned variant"), + } + } + + #[cfg(unix)] + #[test] + fn test_ensure_owned_on_mmap_valid_utf8() { + let content = b"mmap string"; + let full_span = b"mmap string\n"; + + let mut chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + + let result = chunk.ensure_owned(); + assert!(result.is_ok()); + + match &chunk.content { + IOChunkContent::Owned { + content, + has_newline, + .. + } => { + assert_eq!(content, "mmap string"); + assert!(*has_newline); + } + _ => panic!("Expected Owned variant after ensure_owned"), + } + } + + #[cfg(unix)] + #[test] + fn test_ensure_owned_on_mmap_valid_utf8_no_newline() { + let content = b"no newline"; + let full_span = b"no newline"; + + let mut chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + + let result = chunk.ensure_owned(); + assert!(result.is_ok()); + + match &chunk.content { + IOChunkContent::Owned { + content, + has_newline, + .. + } => { + assert_eq!(content, "no newline"); + assert!(!*has_newline); + } + _ => panic!("Expected Owned variant after ensure_owned"), + } + } + + #[cfg(unix)] + #[test] + fn test_ensure_owned_on_mmap_invalid_utf8() { + let content = b"bad\xFFutf8"; + let full_span = b"bad\xFFutf8\n"; + + let mut chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + + let result = chunk.ensure_owned(); + assert!(result.is_err()); + let err_msg = format!("{}", result.unwrap_err()); + assert!( + err_msg.contains("invalid utf-8"), + "Unexpected error message: {}", + err_msg + ); + } + + // fields_mut + #[test] + fn test_fields_mut_on_owned() { + let mut chunk = + IOChunk::from_content(IOChunkContent::new_owned("hello".to_string(), false)); + + let (s, _) = chunk.fields_mut().unwrap(); + s.push_str(" world"); + + assert_eq!(chunk.as_str().unwrap(), "hello world"); + } + + #[cfg(unix)] + #[test] + fn test_fields_mut_on_mmap_input_valid_utf8() { + let content = b"foo"; + let full_span = b"foo\n"; + let mut chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + + { + let (s, _) = chunk.fields_mut().unwrap(); + s.push_str("bar"); + } + + assert_eq!(chunk.as_str().unwrap(), "foobar"); + } + + #[cfg(unix)] + #[test] + fn test_fields_mut_on_utf8_multibyte() { + let content = "Ζωντανά!".as_bytes(); + let full_span = "Ζωντανά!\n".as_bytes(); + let mut chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + + let (s, _) = chunk.fields_mut().unwrap(); + s.push_str(" Δεδομένα"); + + assert_eq!(chunk.as_str().unwrap(), "Ζωντανά! Δεδομένα"); + } + + #[cfg(unix)] + #[test] + fn test_fields_mut_invalid_utf8() { + let content = b"abc\xFF"; // invalid UTF-8 + let full_span = b"abc\xFF\n"; + let mut chunk = IOChunk::from_content(IOChunkContent::MmapInput { content, full_span }); + + let result = chunk.fields_mut(); + assert!(result.is_err()); + assert!(format!("{}", result.unwrap_err()).contains("invalid utf-8")); + } +} diff --git a/src/uu/sed/src/in_place.rs b/src/uu/sed/src/in_place.rs new file mode 100644 index 00000000..eb4d9403 --- /dev/null +++ b/src/uu/sed/src/in_place.rs @@ -0,0 +1,46 @@ +// Support for in-place editing +// +// SPDX-License-Identifier: MIT +// Copyright (c) 2025 Diomidis Spinellis +// +// This file is part of the uutils sed package. +// It is licensed under the MIT License. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use crate::command::ProcessingContext; +use crate::fast_io::OutputBuffer; +use std::io::stdout; +use std::path::Path; +use uucore::error::UResult; + +/// Context for in-place editing +pub struct InPlace { + pub output: OutputBuffer, + pub processing_context: ProcessingContext, +} + +impl InPlace { + /// Create a new `ProcessingContext` taking ownership of processing_context + pub fn new(processing_context: ProcessingContext) -> UResult { + let output = OutputBuffer::new(Box::new(stdout())); + + Ok(InPlace { + output, + processing_context, + }) + } + + /// Return an OutputBuffer for outputting the edits to the specified file. + pub fn begin(&mut self, _file_name: &Path) -> UResult<&mut OutputBuffer> { + // TODO: Adjust output for in-place editing, if needed. + Ok(&mut self.output) + } + + /// Finish in-place editing. + pub fn end(&mut self) -> UResult<()> { + self.output.flush()?; + // TODO: Rename and delete output file, if needed. + Ok(()) + } +} diff --git a/src/uu/sed/src/named_writer.rs b/src/uu/sed/src/named_writer.rs new file mode 100644 index 00000000..26482acf --- /dev/null +++ b/src/uu/sed/src/named_writer.rs @@ -0,0 +1,78 @@ +// An abstraction for output files created on entry and flushed on exit +// +// SPDX-License-Identifier: MIT +// Copyright (c) 2025 Diomidis Spinellis +// +// This file is part of the uutils sed package. +// It is licensed under the MIT License. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::cell::RefCell; +use std::fs::{File, OpenOptions}; +use std::io::{BufWriter, Write}; +use std::path::PathBuf; +use std::rc::Rc; + +use uucore::error::{UResult, USimpleError}; + +thread_local! { + /// Global list of all writers that should be flushed at shutdown + static FLUSH_LIST: RefCell>>> = const { RefCell::new(Vec::new()) }; +} + +#[derive(Debug)] +/// Writer that tracks its file name for better error messages +pub struct NamedWriter { + pub path: PathBuf, + pub writer: BufWriter, +} + +impl NamedWriter { + /// Create a new writer, truncate the file, and register it for flushing. + pub fn new(path: PathBuf) -> UResult>> { + let file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(&path) + .map_err(|e| { + USimpleError::new( + 2, + format!("Error opening output file {}: {}", path.display(), e), + ) + })?; + + let writer = Rc::new(RefCell::new(NamedWriter { + path, + writer: BufWriter::new(file), + })); + + FLUSH_LIST.with(|list| list.borrow_mut().push(Rc::clone(&writer))); + Ok(writer) + } + + /// Write a line to the file with a newline, returning descriptive errors. + pub fn write_line(&mut self, line: &str) -> UResult<()> { + writeln!(self.writer, "{}", line) + .map_err(|e| USimpleError::new(2, format!("{}: {}", self.path.display(), e))) + } + + /// Flush the writer, returning a descriptive error. + pub fn flush(&mut self) -> UResult<()> { + self.writer + .flush() + .map_err(|e| USimpleError::new(2, format!("{}: {}", self.path.display(), e))) + } +} + +/// Flush buffered content to the file, returning descriptive errors. +pub fn flush_all() -> UResult<()> { + FLUSH_LIST.with(|cell| { + for handle in cell.borrow().iter() { + handle.borrow_mut().flush()?; + } + + Ok(()) + }) +} diff --git a/src/uu/sed/src/processor.rs b/src/uu/sed/src/processor.rs index 4d5b3c15..a799ceb9 100644 --- a/src/uu/sed/src/processor.rs +++ b/src/uu/sed/src/processor.rs @@ -8,16 +8,512 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use crate::command::CliOptions; -use crate::command::Command; +use crate::command::{ + Address, AddressType, AddressValue, Command, CommandData, InputAction, ProcessingContext, + Substitution, Transliteration, +}; +use crate::fast_io::{IOChunk, LineReader, OutputBuffer}; +use crate::in_place::InPlace; +use crate::named_writer; +use atty::Stream; +use std::cell::RefCell; use std::path::PathBuf; -use uucore::error::UResult; +use std::rc::Rc; +use uucore::error::{UResult, USimpleError}; -pub fn process( - _code: Option>, - _files: Vec, - _cli_options: &mut CliOptions, +/// Return true if the passed address matches the current I/O context. +fn match_address( + addr: &Address, + pattern: &mut IOChunk, + context: &ProcessingContext, +) -> UResult { + match addr.atype { + AddressType::Re => { + if let AddressValue::Regex(ref re) = addr.value { + Ok(re.is_match(pattern.as_str()?)) + } else { + Ok(false) + } + } + + AddressType::Line => { + if let AddressValue::LineNumber(lineno) = addr.value { + Ok(context.line_number == lineno) + } else { + Ok(false) + } + } + + // Recognize "$" as the last line of last file. This is consistent + // with the original 7th Research Edition implementation: + // https://github.com/dspinellis/unix-history-repo/blob/Research-V7/usr/src/cmd/sed/sed1.c#L665 + // The FreeBSD version checked for subsequent empty files, but this + // can lead to destructive reads (e.g. from named pipes), + // and is probably an overkill. + AddressType::Last => Ok(context.last_line && (context.last_file || context.separate)), + + _ => panic!("invalid address type in match_address"), + } +} + +#[allow(dead_code)] +/// Return true if the command applies to the given pattern. +fn applies( + command: &mut Command, + pattern: &mut IOChunk, + context: &mut ProcessingContext, +) -> UResult { + let linenum = context.line_number; + + let result = if command.addr1.is_none() && command.addr2.is_none() { + Ok(true) + } else if let Some(addr2) = &command.addr2 { + if let Some(start) = command.start_line { + match addr2.atype { + AddressType::RelLine => { + if let AddressValue::LineNumber(n) = addr2.value { + if linenum - start <= n { + Ok(true) + } else { + command.start_line = None; + Ok(false) + } + } else { + Ok(false) + } + } + _ => { + if match_address(addr2, pattern, context)? { + command.start_line = None; + context.last_address = true; + Ok(true) + } else if addr2.atype == AddressType::Line { + if let AddressValue::LineNumber(n) = addr2.value { + if linenum > n { + command.start_line = None; + Ok(false) + } else { + Ok(true) + } + } else { + Ok(true) + } + } else { + Ok(true) + } + } + } + } else if let Some(addr1) = &command.addr1 { + if match_address(addr1, pattern, context)? { + match addr2.atype { + AddressType::Line => { + if let AddressValue::LineNumber(n) = addr2.value { + if linenum >= n { + context.last_address = true; + } else { + command.start_line = Some(linenum); + } + } + } + AddressType::RelLine => { + if let AddressValue::LineNumber(0) = addr2.value { + context.last_address = true; + } else { + command.start_line = Some(linenum); + } + } + _ => { + command.start_line = Some(linenum); + } + } + Ok(true) + } else { + Ok(false) + } + } else { + Ok(false) + } + } else if let Some(addr1) = &command.addr1 { + Ok(match_address(addr1, pattern, context)?) + } else { + Ok(false) + }; + + if command.non_select { + result.map(|v| !v) + } else { + result + } +} + +/// Write the specified chunk to the output for a given processing context. +fn write_chunk( + output: &mut OutputBuffer, + context: &ProcessingContext, + chunk: &IOChunk, +) -> std::io::Result<()> { + output.write_chunk(chunk)?; + + if context.unbuffered { + output.flush()?; + } + + Ok(()) +} + +/// Perform the specified RE replacement in the provided pattern space. +fn substitute( + pattern: &mut IOChunk, + sub: &mut Substitution, + context: &mut ProcessingContext, + output: &mut OutputBuffer, +) -> UResult<()> { + let mut count = 0; + let mut last_end = 0; + let mut result = String::new(); + let mut replaced = false; + + let text = pattern.as_str()?; + + for caps in sub.regex.captures_iter(text) { + count += 1; + let m = caps.get(0).unwrap(); + + // Always write the unmatched text before this match. + result.push_str(&text[last_end..m.start()]); + + if sub.occurrence == 0 || count == sub.occurrence { + let replacement = sub.replacement.apply(&caps)?; + result.push_str(&replacement); + replaced = true; + } else { + // Not the target match — leave the match unchanged. + result.push_str(m.as_str()); + } + + last_end = m.end(); + } + + // Handle substitution success. + if replaced { + result.push_str(&text[last_end..]); + + pattern.set_to_string(result, pattern.is_newline_terminated()); + + if sub.print_flag { + write_chunk(output, context, pattern)?; + } + + // Write to file if needed. + if let Some(ref writer) = sub.write_file { + writer.borrow_mut().write_line(pattern.as_str()?)?; + } + context.substitution_made = true; + } + + Ok(()) +} + +/// Apply the specified transliteration in the provided pattern space. +fn transliterate(pattern: &mut IOChunk, trans: &Transliteration) -> UResult<()> { + let text = pattern.as_str()?; + let mut result = String::with_capacity(text.len()); + let mut replaced = false; + + // Perform the transliteration. + for ch in text.chars() { + let mapped = trans.lookup(ch); + if mapped != ch { + replaced = true; + } + result.push(mapped); + } + + // Lazy replace. + if replaced { + pattern.set_to_string(result, pattern.is_newline_terminated()); + } + + Ok(()) +} + +/// Process a single input file +fn process_file( + commands: &Option>>, + reader: &mut LineReader, + output: &mut OutputBuffer, + context: &mut ProcessingContext, +) -> UResult<()> { + // Loop over the input lines as pattern space. + 'lines: while let Some((mut pattern, last_line)) = reader.get_line()? { + context.last_line = last_line; + context.line_number += 1; + context.substitution_made = false; + // Set the script command from which to start. + let mut current: Option>> = + if let Some(action) = context.input_action.take() { + // Continue processing the `N` command. + let current_line = pattern.as_str()?; + let mut combined_lines = action.prepend; + combined_lines.push('\n'); + combined_lines.push_str(current_line); + + pattern.set_to_string(combined_lines, pattern.is_newline_terminated()); + action.next_command + } else { + // Start from the script top. + commands.clone() + }; + + // Loop over script commands. + while let Some(command_rc) = current.clone() { + let mut command = command_rc.borrow_mut(); + + if !applies(&mut command, &mut pattern, context)? { + // Advance to next command + current = command.next.clone(); + continue; + } + + match command.code { + '{' => { + // Block begin; start processing the enclosed ones. + let CommandData::Block(body) = &command.data else { + panic!("Expected Block command data"); + }; + current = body.clone(); + continue; + } + '}' => { + // Block end: continue with the block's patched next. + } + 'a' => { + // TODO + } + 'b' => { + // Branch to the specified label or end if none is given. + let CommandData::BranchTarget(target) = &command.data else { + panic!("Expected BranchTarget command data"); + }; + if target.is_some() { + // New command to execute + current = target.clone(); + continue; + } else { + // Branch to the end of the script. + break; + } + } + 'c' => { + // TODO + } + 'd' => { + // Delete the pattern space and start the next cycle. + pattern.clear(); + break; + } + 'D' => { + // Delete up to \n and start a new cycle without new input. + if let Some(pos) = pattern.as_str()?.find('\n') { + let (s, _) = pattern.fields_mut()?; + s.drain(..=pos); + current = commands.clone(); + continue; + } else { + // Same as d + pattern.clear(); + break; + } + } + 'g' => { + // Replace pattern with the contents of the hold space. + pattern.set_to_string(context.hold.content.clone(), context.hold.has_newline); + } + 'G' => { + // Append to pattern \n followed by hold space contents. + let (pat_content, pat_has_newline) = pattern.fields_mut()?; + pat_content.push('\n'); + pat_content.push_str(&context.hold.content); + *pat_has_newline = context.hold.has_newline; + } + 'h' => { + // Replace hold with the contents of the pattern space. + context.hold.content = pattern.as_str()?.to_string(); + context.hold.has_newline = pattern.is_newline_terminated(); + } + 'H' => { + // Append to hold \n followed by pattern space contents. + context.hold.content.push('\n'); + context.hold.content.push_str(pattern.as_str()?); + context.hold.has_newline = pattern.is_newline_terminated(); + } + 'i' => { + // TODO + } + 'l' => { + // TODO + } + 'n' => { + break; + } + 'N' => { + // Append to pattern `\n` and the next line + // Rather than reading input here, which would result + // in a double borrow on reader, modify the action + // to perform when the next line is read. + context.input_action = Some(InputAction { + next_command: command.next.clone(), + prepend: pattern.as_str()?.to_string(), + }); + continue 'lines; + } + 'p' => { + // Write the pattern space to standard output. + write_chunk(output, context, &pattern)?; + } + 'P' => { + // Output pattern space, up to the first \n. + let line = pattern.as_str()?; + match line.find('\n') { + Some(pos) => { + output.write_str(&line[..=pos])?; + } + None => { + output.write_str(line)?; + output.write_str("\n")?; + } + } + } + 'q' => { + context.stop_processing = true; + break; + } + 'r' => { + // TODO + } + 's' => { + let subst = match &mut command.data { + CommandData::Substitution(subst) => subst, + _ => panic!("Expected Substitution command data"), + }; + + substitute(&mut pattern, &mut *subst, context, output)?; + } + 't' if !context.substitution_made => { /* Do nothing. */ } + 't' => { + // Branch to the specified label or end if none is given + // if a substitution was made since last cycle or t. + let CommandData::BranchTarget(target) = &command.data else { + panic!("Expected BranchTarget command data"); + }; + context.substitution_made = false; + if target.is_some() { + // New command to execute + current = target.clone(); + continue; + } else { + // Branch to the end of the script. + break; + } + } + 'w' => { + // TODO + } + 'x' => { + // Exchange the contents of the pattern and hold spaces. + let (pat_content, pat_has_newline) = pattern.fields_mut()?; + std::mem::swap(pat_content, &mut context.hold.content); + std::mem::swap(pat_has_newline, &mut context.hold.has_newline); + } + 'y' => { + let trans = match &mut command.data { + CommandData::Transliteration(trans) => trans, + _ => panic!("Expected Transliteration command data"), + }; + + transliterate(&mut pattern, trans)?; + } + ':' => { + // Branch target; do nothing. + } + '=' => { + // TODO + } + // The compilation should supply only valid codes. + _ => panic!("invalid command code"), + } // match + // Advance to next command. + current = command.next.clone(); + } + + if !context.quiet { + write_chunk(output, context, &pattern)?; + } + + if context.stop_processing { + break; + } + } + + // Handle any N command remains. + if context.separate && !context.quiet { + if let Some(action) = context.input_action.take() { + let mut pending = action.prepend; + pending.push('\n'); + output.write_str(pending)?; + if context.unbuffered { + output.flush()?; + } + } + } + + Ok(()) +} + +/// Process all input files +pub fn process_all_files( + commands: Option>>, + files: Vec, + mut context: ProcessingContext, ) -> UResult<()> { - // TODO + context.unbuffered = context.unbuffered || atty::is(Stream::Stdout); + + let mut in_place = InPlace::new(context.clone())?; + let last_file_index = files.len() - 1; + + for (index, path) in files.iter().enumerate() { + context.last_file = index == last_file_index; + let mut reader = LineReader::open(path).map_err(|e| { + USimpleError::new( + 2, + format!("Error opening input file {}: {}", path.display(), e), + ) + })?; + let output = in_place.begin(path)?; + + if context.separate { + context.line_number = 0; + } + process_file(&commands, &mut reader, output, &mut context)?; + + // Handle any N command remains. + if context.last_file && !context.separate && !context.quiet { + if let Some(action) = context.input_action.take() { + let mut pending = action.prepend; + pending.push('\n'); + output.write_str(pending)?; + } + } + + in_place.end()?; + + if context.stop_processing { + break; + } + } + + // Flush all output files + named_writer::flush_all()?; + Ok(()) } diff --git a/src/uu/sed/src/script_char_provider.rs b/src/uu/sed/src/script_char_provider.rs index 52b54e01..a3e3a85f 100644 --- a/src/uu/sed/src/script_char_provider.rs +++ b/src/uu/sed/src/script_char_provider.rs @@ -8,6 +8,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +#[derive(Debug)] pub struct ScriptCharProvider { line: Vec, pos: usize, diff --git a/src/uu/sed/src/script_line_provider.rs b/src/uu/sed/src/script_line_provider.rs index 24841d5d..f287e982 100644 --- a/src/uu/sed/src/script_line_provider.rs +++ b/src/uu/sed/src/script_line_provider.rs @@ -9,9 +9,12 @@ // file that was distributed with this source code. use crate::command::ScriptValue; +use std::fmt; use std::fs::File; use std::io::{self, BufRead, BufReader}; +use uucore::error::{UResult, USimpleError}; +#[derive(Debug)] /// The provider of script lines across all specified scripts /// Scripts can be specified to sed as files or as strings. pub struct ScriptLineProvider { @@ -19,7 +22,7 @@ pub struct ScriptLineProvider { state: State, } -// Encapsulation of the script line provider's state +/// Encapsulation of the script line provider's state enum State { NotStarted, // Processing has not yet started Active { @@ -57,7 +60,7 @@ impl ScriptLineProvider { } /// Return the next script line to process across all scripts. - pub fn next_line(&mut self) -> io::Result> { + pub fn next_line(&mut self) -> UResult> { let mut line = String::new(); loop { @@ -75,6 +78,10 @@ impl ScriptLineProvider { Some(*index + 1) // finished reading this source } else { *line_number += 1; + // Remove trailing newline + if line.ends_with('\n') { + line.pop(); + } return Ok(Some(line)); } } @@ -90,7 +97,7 @@ impl ScriptLineProvider { } // Move to the next available script source. - fn advance_source(&mut self, next_index: usize) -> io::Result<()> { + fn advance_source(&mut self, next_index: usize) -> UResult<()> { if next_index >= self.sources.len() { self.state = State::Done; return Ok(()); @@ -124,7 +131,12 @@ impl ScriptLineProvider { line_number: 0, }; } else { - let file = File::open(p)?; + let file = File::open(p).map_err(|e| { + USimpleError::new( + 2, + format!("Error opening script file {}: {}", p.display(), e), + ) + })?; self.state = State::Active { index: next_index, reader: Box::new(BufReader::new(file)), @@ -139,6 +151,27 @@ impl ScriptLineProvider { } } +impl fmt::Debug for State { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + State::NotStarted => f.debug_struct("NotStarted").finish(), + State::Done => f.debug_struct("Done").finish(), + State::Active { + index, + input_name, + line_number, + .. + } => f + .debug_struct("Active") + .field("index", index) + .field("input_name", input_name) + .field("line_number", line_number) + .field("reader", &"") + .finish(), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/uu/sed/src/sed.rs b/src/uu/sed/src/sed.rs index 20541e96..776edede 100644 --- a/src/uu/sed/src/sed.rs +++ b/src/uu/sed/src/sed.rs @@ -11,14 +11,19 @@ pub mod command; pub mod compiler; pub mod delimited_parser; +pub mod fast_io; +pub mod in_place; +pub mod named_writer; pub mod processor; pub mod script_char_provider; pub mod script_line_provider; -use crate::command::{CliOptions, ScriptValue}; +use crate::command::{ProcessingContext, ScriptValue, StringSpace}; use crate::compiler::compile; -use crate::processor::process; +use crate::processor::process_all_files; use clap::{Arg, ArgMatches, Command, arg}; +use std::cell::RefCell; +use std::collections::HashMap; use std::path::PathBuf; use uucore::error::{UResult, UUsageError}; use uucore::format_usage; @@ -30,10 +35,10 @@ const USAGE: &str = "sed [OPTION]... [script] [file]..."; pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().try_get_matches_from(args)?; let (scripts, files) = get_scripts_files(&matches)?; - let mut cli_options = build_context(&matches); + let mut processing_context = build_context(&matches); - let executable = compile(scripts, &mut cli_options)?; - process(executable, files, &mut cli_options)?; + let executable = compile(scripts, &mut processing_context)?; + process_all_files(executable, files, processing_context)?; Ok(()) } @@ -169,12 +174,12 @@ fn get_scripts_files(matches: &ArgMatches) -> UResult<(Vec, Vec CliOptions { - CliOptions { +// Parse CLI flag arguments and return a ProcessingContext struct based on them +fn build_context(matches: &ArgMatches) -> ProcessingContext { + ProcessingContext { all_output_files: matches.get_flag("all-output-files"), debug: matches.get_flag("debug"), - regexp_extended: matches.get_flag("regexp-extended"), + regex_extended: matches.get_flag("regexp-extended"), follow_symlinks: matches.get_flag("follow-symlinks"), in_place: matches.contains_id("in-place"), in_place_suffix: matches @@ -190,6 +195,19 @@ fn build_context(matches: &ArgMatches) -> CliOptions { sandbox: matches.get_flag("sandbox"), unbuffered: matches.get_flag("unbuffered"), null_data: matches.get_flag("null-data"), + + // Other context + line_number: 0, + last_address: false, + last_line: false, + last_file: false, + stop_processing: false, + saved_regex: const { RefCell::new(None) }, + input_action: None, + hold: StringSpace::default(), + parsed_block_nesting: 0, + label_to_command_map: HashMap::new(), + substitution_made: false, } } @@ -303,7 +321,7 @@ mod tests { assert!(!ctx.all_output_files); assert!(!ctx.debug); - assert!(!ctx.regexp_extended); + assert!(!ctx.regex_extended); assert!(!ctx.follow_symlinks); assert!(!ctx.in_place); assert_eq!(ctx.in_place_suffix, None); @@ -338,7 +356,7 @@ mod tests { assert!(ctx.all_output_files); assert!(ctx.debug); - assert!(ctx.regexp_extended); + assert!(ctx.regex_extended); assert!(ctx.follow_symlinks); assert!(ctx.in_place); assert!(ctx.in_place_suffix.is_none()); diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index f98b07d7..74517d81 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -8,7 +8,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use std::io::Write; +use std::fs; +use std::io::{Read, Write}; use tempfile::NamedTempFile; use uutests::new_ucmd; use uutests::util::TestScenario; @@ -22,12 +23,12 @@ fn test_invalid_arg() { #[test] fn test_debug() { - new_ucmd!().arg("--debug").arg("").succeeds(); + new_ucmd!().args(&["--debug", ""]).succeeds(); } #[test] fn test_silent_alias() { - new_ucmd!().arg("--silent").arg("").succeeds(); + new_ucmd!().args(&["--silent", ""]).succeeds(); } #[test] @@ -43,9 +44,14 @@ fn test_positional_script_ok() { new_ucmd!().arg("l").succeeds().code_is(0); } +#[test] +fn test_empty_positional_script_ok() { + new_ucmd!().arg("").succeeds().code_is(0); +} + #[test] fn test_e_script_ok() { - new_ucmd!().arg("-e").arg("l").succeeds(); + new_ucmd!().args(&["-e", "l"]).succeeds(); } #[test] @@ -56,3 +62,406 @@ fn test_f_script_ok() { new_ucmd!().arg("-f").arg(path).succeeds(); } + +const INPUT_FILES: &[&str] = &[ + "input/two-lines.txt", + "input/no-new-line.txt", + "input/dots-4k.txt", + "input/dots-8k.txt", + "input/dots-64k.txt", +]; + +#[test] +fn test_no_script_stdin() { + for fixture in INPUT_FILES { + new_ucmd!() + .arg("") + .pipe_in_fixture(fixture) + .succeeds() + .stdout_is_fixture(fixture); + } +} + +#[test] +fn test_no_script_file() { + for fixture in INPUT_FILES { + new_ucmd!() + .args(&["-e", "", fixture]) + .succeeds() + .stdout_is_fixture(fixture); + } +} + +#[test] +fn test_delete_stdin() { + for fixture in INPUT_FILES { + new_ucmd!() + .arg("d") + .pipe_in_fixture(fixture) + .succeeds() + .no_stdout(); + } +} + +#[test] +fn test_delete_file() { + for fixture in INPUT_FILES { + new_ucmd!() + .args(&["-e", "d", fixture]) + .succeeds() + .no_stdout(); + } +} + +/// Create a new test function to verify an execution for specified output. +macro_rules! check_output { + ($name:ident, $args:expr) => { + #[test] + fn $name() { + new_ucmd!() + .args(&$args) + .succeeds() + .stdout_is_fixture(&format!("output/{}", stringify!($name))); + } + }; +} + +// Input files +const LINES1: &str = "input/lines1"; +const LINES2: &str = "input/lines2"; +const NO_NEW_LINE: &str = "input/no-new-line.txt"; + +// Test address ranges +check_output!(addr_one_line, ["-n", "-e", "4p", LINES1]); +check_output!(addr_straddle, ["-n", "-e", "20p", LINES1, LINES2]); +check_output!(addr_last_one_file, ["-n", "-e", "$p", LINES1]); +check_output!(addr_last_two_files, ["-n", "-e", "$p", LINES1, LINES2]); + +// TODO: Enable and configure for Unix/Windows, when "a" is implemented. +#[cfg(any())] +check_output!(addr_append_with_empty, ["-e", "$a\nhello", "/dev/null"]); + +#[cfg(unix)] +check_output!( + addr_last_with_empty, + ["-n", "-e", "$p", LINES1, "/dev/null", LINES2] +); + +#[cfg(windows)] +check_output!( + addr_last_with_empty, + ["-n", "-e", "$p", LINES1, "NUL", LINES2] +); + +check_output!(addr_past_last, ["-n", "-e", "20p", LINES1]); +check_output!(addr_not_found, ["-n", "-e", "/NOTFOUND/p", LINES1]); +check_output!(addr_found, ["-n", "/l1_7/p", LINES1]); +check_output!(addr_found_space, ["-n", " /l1_7/ p", LINES1]); +check_output!(addr_escaped_delimiter, ["-n", "\\_l1\\_7_p", LINES1]); +check_output!(addr_range_numeric, ["-n", "1,4p", LINES1]); +check_output!(addr_range_to_last, ["-n", "1,$p", LINES1, LINES2]); +check_output!(addr_range_to_pattern, ["-n", "1,/l2_9/p", LINES1, LINES2]); +check_output!(addr_pattern_to_last, ["-n", "/4/,$p", LINES1, LINES2]); +check_output!(addr_pattern_to_straddle, ["-n", "/4/,20p", LINES1, LINES2]); +check_output!(addr_pattern_to_pattern, ["-n", "/4/,/10/p", LINES1, LINES2]); +check_output!( + addr_pattern_straddle, + ["-n", "/l2_3/,/l1_8/p", LINES1, LINES2] +); +check_output!(addr_range_reverse, ["-n", "12,3p", LINES1, LINES2]); +check_output!( + addr_pattern_range_reverse, + ["-n", "/l1_7/,3p", LINES1, LINES2] +); +check_output!(addr_numeric_to_relative, ["-n", "13,+4p", LINES1, LINES2]); +check_output!( + addr_pattern_to_relative, + ["-n", "/l1_6/,+2p", LINES1, LINES2] +); +check_output!(addr_numeric_relative_straddle, ["-n", "12,+1p", LINES1]); +check_output!( + addr_first_separate, + ["-n", "--separate", "1p", LINES1, LINES2] +); +check_output!(addr_last_separate, ["-ns", "$p", LINES1, LINES2]); +check_output!(addr_two_lines_semicolon, ["-n", "-e", "4p;8p", LINES1]); +check_output!(addr_two_lines_newline, ["-n", "-e", "4p\n8p", LINES1]); +check_output!(addr_three_lines_semicolon, ["-n", "-e", "4p;8p;1p", LINES1]); +check_output!(addr_one_line_negate, ["-n", "-e", "4!p", LINES1]); +check_output!(addr_range_numeric_negate, ["-n", "1,4!p", LINES1]); +check_output!( + addr_pattern_to_pattern_negate, + ["-n", "/1_4/,/10/!p", LINES1] +); +check_output!(addr_empty_re_reuse, ["-n", "/_2/,//p", LINES1, LINES2]); +check_output!(addr_simple_negation, ["-e", r"4,12!s/^/^/", LINES1]); + +// Test substitutions +check_output!(subst_any, ["-e", r"s/./X/g", LINES1]); +check_output!(subst_any_global, ["-e", r"s,.,X,g", LINES1]); +check_output!(subst_escaped_magic_separator, ["-e", r"s.\..X.g", LINES1]); +check_output!(subst_escaped_braced_separator, ["-e", r"s/[\/]/Q/", LINES1]); +check_output!(subst_escaped_separator, ["-e", r"s_\__X_", LINES1]); +check_output!(subst_whole_match_group, ["-e", r"s/./(&)/g", LINES1]); +check_output!(subst_print, ["-ne", "s/1_1/S&/p", LINES1]); +check_output!( + subst_escaped_whole_match_group, + ["-e", r"s/./(\&)/g", LINES1] +); +check_output!( + subst_numerical_groups, + ["-e", r"s/\(.\)\(.\)\(.\)/x\3x\2x\1/g", LINES1] +); +check_output!( + subst_ere_numerical_groups, + [ + "--regexp-extended", + "-e", + r"s/(.)(.)(.)/x\3x\2x\1/g", + LINES1 + ] +); +check_output!(subst_multiline, ["-e", "s/_/u0\\\nu1\\\nu2/g", LINES1]); +check_output!(subst_numbered_replacement, ["-e", r"s/./X/4", LINES1]); +check_output!(subst_brace, ["-e", r"s/[123]/X/g", LINES1]); +check_output!(subst_case_insensitive, ["-e", r"s/L/Line/", LINES1]); +check_output!(subst_no_new_line, ["-e", r"s/l/L/g", NO_NEW_LINE]); + +#[test] +fn subst_write_file() -> std::io::Result<()> { + let temp = NamedTempFile::new()?; + let path = temp.path(); + let cmd = format!("s/_1/S_1/w {}", path.display()); + + new_ucmd!().args(&["-n", &cmd, LINES1]).succeeds(); + + let mut actual = String::new(); + temp.reopen()?.read_to_string(&mut actual)?; + + let expected = fs::read_to_string("tests/fixtures/sed/output/subst_write_file")?; + assert_eq!(actual, expected, "Output did not match fixture"); + + Ok(()) +} + +check_output!(trans_simple, ["-e", r"y/0123456789/9876543210/", LINES1]); +check_output!( + trans_delimiter, + ["-e", r"y10\123456789198765432\101", LINES1] +); +check_output!(trans_no_new_line, ["-e", r"y/l/L/", NO_NEW_LINE]); +check_output!(trans_newline, ["-e", r"1N;2y/\n/X/", LINES1]); +check_output!(subst_newline_class, ["-n", r"1{;N;s/[\n]/X/;p;}", LINES1]); +check_output!(subst_newline_re, ["-n", r"1{;N;s/\n/X/;p;}", LINES1]); +check_output!(print_to_newline, ["-n", r"1{;N;P;P;p;}", LINES1]); + +check_output!(pattern_next_print, ["-n", r"N;N;P", LINES1]); +check_output!(pattern_delete_to_newline, ["-n", r"2N;3p;3D;3p", LINES1]); +check_output!(pattern_delete_no_newline, ["-e", r"2D", LINES1]); +check_output!(pattern_delete_print, ["-n", r"4d;p", LINES1]); + +// FreeBSD sed does not produce any output for the following two +check_output!(pattern_append_delete, ["-e", r"N;N;N;D", LINES1]); +check_output!(pattern_append_delete_2, ["-e", r"N;N;N;D", LINES1, LINES2]); + +check_output!( + pattern_append_delete2_separate, + ["-s", r"N;N;N;D", LINES1, LINES2] +); +check_output!( + pattern_hold_append_swap, + ["-e", r"2h;3H;4g;5G;6x;6p;6x;6p", LINES1] +); +check_output!(pattern_next_output, ["-e", r"4n", LINES1]); +check_output!(pattern_next_no_output, ["-n", "-e", r"4n", LINES1]); +check_output!(pattern_next_print_output, ["-e", r"4n;p", LINES1]); +check_output!(pattern_next_print_no_output, ["-n", "-e", r"4n;p", LINES1]); +check_output!(pattern_quit, [r"5q", LINES1]); +check_output!(pattern_quit_2, [r"5q", LINES1, LINES2]); + +check_output!( + block_simple_range, + [ + "-e", + r#" +4,12 { + s/^/^/ + s/$/$/ + s/_/T/ +}"#, + LINES1 + ] +); + +check_output!( + block_negative_range, + [ + "-e", + r#" +4,12 !{ + s/^/^/ + s/$/$/ + s/_/T/ +}"#, + LINES1 + ] +); + +check_output!( + block_negative_range_2, + [ + "-e", + r#" +4,12 !{ + s/^/^/ + s/$/$/ + s/_/T/ +}"#, + LINES1, + LINES2 + ] +); + +check_output!( + block_nested_selection, + [ + "-e", + r#" +4,12 { + s/^/^/ + /6/,/10/ { + s/$/$/ + /8/ s/_/T/ + } +}"#, + LINES1 + ] +); + +check_output!( + block_nested_negative_selection, + [ + "-e", + r#" +4,12 !{ + s/^/^/ + /6/,/10/ !{ + s/$/$/ + /8/ !s/_/T/ + } +}"#, + LINES1 + ] +); + +check_output!( + branch_plain, + [ + "-n", + "-e", + r#" +b label4 +:label3 +s/^/label3_/p +b end +:label4 +2,12b label1 +b label2 +:label1 +s/^/label1_/p +b +:label2 +s/^/label2_/p +b label3 +:end +"#, + LINES1 + ] +); + +check_output!( + branch_conditional_simple, + [ + "-n", + "-e", + r#" +s/l1_/l2_/ +t ok +b +:ok +s/^/tested /p +"#, + LINES1, + LINES2 + ] +); + +// SunOS and GNU sed behave as follows: lines 9-$ aren"#,t printed at all +check_output!( + branch_to_block, + [ + "-n", + "-e", + r#" +5,8b inside +1,5 { + s/^/^/p + :inside + s/$/$/p +} +"#, + LINES1 + ] +); + +// Check that t clears the substitution done flag +check_output!( + branch_test_clears, + [ + "-n", + "-e", + r#" +1,8s/^/^/ +t l1 +:l1 +t l2 +s/$/$/p +b +:l2 +s/^/ERROR/ +"#, + LINES1 + ] +); + +// Check that reading a line clears the substitution done flag +check_output!( + branch_cycle_clears, + [ + "-n", + "-e", + r#" +t l2 +1,8s/^/^/p +2,7N +b +:l2 +s/^/ERROR/p +"#, + LINES1 + ] +); + +check_output!( + branch_conditional_boundary, + [ + "-e", + r#" +{ +:b +} +s/l/m/ +tb"#, + LINES1 + ] +); diff --git a/tests/fixtures/sed/input/dots-4k.txt b/tests/fixtures/sed/input/dots-4k.txt new file mode 100644 index 00000000..50a372b2 --- /dev/null +++ b/tests/fixtures/sed/input/dots-4k.txt @@ -0,0 +1 @@ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ diff --git a/tests/fixtures/sed/input/dots-64k.txt b/tests/fixtures/sed/input/dots-64k.txt new file mode 100644 index 00000000..9c9fd559 --- /dev/null +++ b/tests/fixtures/sed/input/dots-64k.txt @@ -0,0 +1,16 @@ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ diff --git a/tests/fixtures/sed/input/dots-8k.txt b/tests/fixtures/sed/input/dots-8k.txt new file mode 100644 index 00000000..cdfcce14 --- /dev/null +++ b/tests/fixtures/sed/input/dots-8k.txt @@ -0,0 +1,2 @@ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ diff --git a/tests/fixtures/sed/input/lines1 b/tests/fixtures/sed/input/lines1 new file mode 100644 index 00000000..3bcc601e --- /dev/null +++ b/tests/fixtures/sed/input/lines1 @@ -0,0 +1,14 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/input/lines2 b/tests/fixtures/sed/input/lines2 new file mode 100644 index 00000000..d2ff3827 --- /dev/null +++ b/tests/fixtures/sed/input/lines2 @@ -0,0 +1,9 @@ +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/input/no-new-line.txt b/tests/fixtures/sed/input/no-new-line.txt new file mode 100644 index 00000000..5ab2f8a4 --- /dev/null +++ b/tests/fixtures/sed/input/no-new-line.txt @@ -0,0 +1 @@ +Hello \ No newline at end of file diff --git a/tests/fixtures/sed/input/two-lines.txt b/tests/fixtures/sed/input/two-lines.txt new file mode 100644 index 00000000..e5c5c558 --- /dev/null +++ b/tests/fixtures/sed/input/two-lines.txt @@ -0,0 +1,2 @@ +line one +line two diff --git a/tests/fixtures/sed/output/addr_append_empty b/tests/fixtures/sed/output/addr_append_empty new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/sed/output/addr_empty_re_reuse b/tests/fixtures/sed/output/addr_empty_re_reuse new file mode 100644 index 00000000..feeeea41 --- /dev/null +++ b/tests/fixtures/sed/output/addr_empty_re_reuse @@ -0,0 +1,15 @@ +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 diff --git a/tests/fixtures/sed/output/addr_escaped_delimiter b/tests/fixtures/sed/output/addr_escaped_delimiter new file mode 100644 index 00000000..8e6f085d --- /dev/null +++ b/tests/fixtures/sed/output/addr_escaped_delimiter @@ -0,0 +1 @@ +l1_7 diff --git a/tests/fixtures/sed/output/addr_first_separate b/tests/fixtures/sed/output/addr_first_separate new file mode 100644 index 00000000..07b5fe88 --- /dev/null +++ b/tests/fixtures/sed/output/addr_first_separate @@ -0,0 +1,2 @@ +l1_1 +l2_1 diff --git a/tests/fixtures/sed/output/addr_found b/tests/fixtures/sed/output/addr_found new file mode 100644 index 00000000..8e6f085d --- /dev/null +++ b/tests/fixtures/sed/output/addr_found @@ -0,0 +1 @@ +l1_7 diff --git a/tests/fixtures/sed/output/addr_found_space b/tests/fixtures/sed/output/addr_found_space new file mode 100644 index 00000000..8e6f085d --- /dev/null +++ b/tests/fixtures/sed/output/addr_found_space @@ -0,0 +1 @@ +l1_7 diff --git a/tests/fixtures/sed/output/addr_last_one_file b/tests/fixtures/sed/output/addr_last_one_file new file mode 100644 index 00000000..6165ce87 --- /dev/null +++ b/tests/fixtures/sed/output/addr_last_one_file @@ -0,0 +1 @@ +l1_14 diff --git a/tests/fixtures/sed/output/addr_last_separate b/tests/fixtures/sed/output/addr_last_separate new file mode 100644 index 00000000..3c6d6e9f --- /dev/null +++ b/tests/fixtures/sed/output/addr_last_separate @@ -0,0 +1,2 @@ +l1_14 +l2_9 diff --git a/tests/fixtures/sed/output/addr_last_two_files b/tests/fixtures/sed/output/addr_last_two_files new file mode 100644 index 00000000..1502f007 --- /dev/null +++ b/tests/fixtures/sed/output/addr_last_two_files @@ -0,0 +1 @@ +l2_9 diff --git a/tests/fixtures/sed/output/addr_last_with_empty b/tests/fixtures/sed/output/addr_last_with_empty new file mode 100644 index 00000000..1502f007 --- /dev/null +++ b/tests/fixtures/sed/output/addr_last_with_empty @@ -0,0 +1 @@ +l2_9 diff --git a/tests/fixtures/sed/output/addr_not_found b/tests/fixtures/sed/output/addr_not_found new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/sed/output/addr_numeric_relative_straddle b/tests/fixtures/sed/output/addr_numeric_relative_straddle new file mode 100644 index 00000000..543c0574 --- /dev/null +++ b/tests/fixtures/sed/output/addr_numeric_relative_straddle @@ -0,0 +1,2 @@ +l1_12 +l1_13 diff --git a/tests/fixtures/sed/output/addr_numeric_to_relative b/tests/fixtures/sed/output/addr_numeric_to_relative new file mode 100644 index 00000000..1a9e0660 --- /dev/null +++ b/tests/fixtures/sed/output/addr_numeric_to_relative @@ -0,0 +1,5 @@ +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 diff --git a/tests/fixtures/sed/output/addr_one_line b/tests/fixtures/sed/output/addr_one_line new file mode 100644 index 00000000..a7c92f0e --- /dev/null +++ b/tests/fixtures/sed/output/addr_one_line @@ -0,0 +1 @@ +l1_4 diff --git a/tests/fixtures/sed/output/addr_one_line_negate b/tests/fixtures/sed/output/addr_one_line_negate new file mode 100644 index 00000000..77e5cc0e --- /dev/null +++ b/tests/fixtures/sed/output/addr_one_line_negate @@ -0,0 +1,13 @@ +l1_1 +l1_2 +l1_3 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/addr_past_last b/tests/fixtures/sed/output/addr_past_last new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/sed/output/addr_pattern_range_reverse b/tests/fixtures/sed/output/addr_pattern_range_reverse new file mode 100644 index 00000000..8e6f085d --- /dev/null +++ b/tests/fixtures/sed/output/addr_pattern_range_reverse @@ -0,0 +1 @@ +l1_7 diff --git a/tests/fixtures/sed/output/addr_pattern_straddle b/tests/fixtures/sed/output/addr_pattern_straddle new file mode 100644 index 00000000..c4d558b1 --- /dev/null +++ b/tests/fixtures/sed/output/addr_pattern_straddle @@ -0,0 +1,7 @@ +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/output/addr_pattern_to_last b/tests/fixtures/sed/output/addr_pattern_to_last new file mode 100644 index 00000000..1f20a523 --- /dev/null +++ b/tests/fixtures/sed/output/addr_pattern_to_last @@ -0,0 +1,20 @@ +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/output/addr_pattern_to_pattern b/tests/fixtures/sed/output/addr_pattern_to_pattern new file mode 100644 index 00000000..df7978fd --- /dev/null +++ b/tests/fixtures/sed/output/addr_pattern_to_pattern @@ -0,0 +1,17 @@ +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/output/addr_pattern_to_pattern_negate b/tests/fixtures/sed/output/addr_pattern_to_pattern_negate new file mode 100644 index 00000000..4b859514 --- /dev/null +++ b/tests/fixtures/sed/output/addr_pattern_to_pattern_negate @@ -0,0 +1,7 @@ +l1_1 +l1_2 +l1_3 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/addr_pattern_to_relative b/tests/fixtures/sed/output/addr_pattern_to_relative new file mode 100644 index 00000000..b1827918 --- /dev/null +++ b/tests/fixtures/sed/output/addr_pattern_to_relative @@ -0,0 +1,3 @@ +l1_6 +l1_7 +l1_8 diff --git a/tests/fixtures/sed/output/addr_pattern_to_straddle b/tests/fixtures/sed/output/addr_pattern_to_straddle new file mode 100644 index 00000000..4d8dc1e3 --- /dev/null +++ b/tests/fixtures/sed/output/addr_pattern_to_straddle @@ -0,0 +1,17 @@ +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 diff --git a/tests/fixtures/sed/output/addr_range_numeric b/tests/fixtures/sed/output/addr_range_numeric new file mode 100644 index 00000000..ddd49762 --- /dev/null +++ b/tests/fixtures/sed/output/addr_range_numeric @@ -0,0 +1,4 @@ +l1_1 +l1_2 +l1_3 +l1_4 diff --git a/tests/fixtures/sed/output/addr_range_numeric_negate b/tests/fixtures/sed/output/addr_range_numeric_negate new file mode 100644 index 00000000..18476253 --- /dev/null +++ b/tests/fixtures/sed/output/addr_range_numeric_negate @@ -0,0 +1,10 @@ +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/addr_range_reverse b/tests/fixtures/sed/output/addr_range_reverse new file mode 100644 index 00000000..7fb81db4 --- /dev/null +++ b/tests/fixtures/sed/output/addr_range_reverse @@ -0,0 +1 @@ +l1_12 diff --git a/tests/fixtures/sed/output/addr_range_to_last b/tests/fixtures/sed/output/addr_range_to_last new file mode 100644 index 00000000..d08d35c6 --- /dev/null +++ b/tests/fixtures/sed/output/addr_range_to_last @@ -0,0 +1,23 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/output/addr_range_to_pattern b/tests/fixtures/sed/output/addr_range_to_pattern new file mode 100644 index 00000000..d08d35c6 --- /dev/null +++ b/tests/fixtures/sed/output/addr_range_to_pattern @@ -0,0 +1,23 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/output/addr_simple_negation b/tests/fixtures/sed/output/addr_simple_negation new file mode 100644 index 00000000..67f03ef6 --- /dev/null +++ b/tests/fixtures/sed/output/addr_simple_negation @@ -0,0 +1,14 @@ +^l1_1 +^l1_2 +^l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +^l1_13 +^l1_14 diff --git a/tests/fixtures/sed/output/addr_straddle b/tests/fixtures/sed/output/addr_straddle new file mode 100644 index 00000000..fe925b0a --- /dev/null +++ b/tests/fixtures/sed/output/addr_straddle @@ -0,0 +1 @@ +l2_6 diff --git a/tests/fixtures/sed/output/addr_three_lines_semicolon b/tests/fixtures/sed/output/addr_three_lines_semicolon new file mode 100644 index 00000000..d01034e5 --- /dev/null +++ b/tests/fixtures/sed/output/addr_three_lines_semicolon @@ -0,0 +1,3 @@ +l1_1 +l1_4 +l1_8 diff --git a/tests/fixtures/sed/output/addr_two_lines_newline b/tests/fixtures/sed/output/addr_two_lines_newline new file mode 100644 index 00000000..f209cac5 --- /dev/null +++ b/tests/fixtures/sed/output/addr_two_lines_newline @@ -0,0 +1,2 @@ +l1_4 +l1_8 diff --git a/tests/fixtures/sed/output/addr_two_lines_semicolon b/tests/fixtures/sed/output/addr_two_lines_semicolon new file mode 100644 index 00000000..f209cac5 --- /dev/null +++ b/tests/fixtures/sed/output/addr_two_lines_semicolon @@ -0,0 +1,2 @@ +l1_4 +l1_8 diff --git a/tests/fixtures/sed/output/block_negative_range b/tests/fixtures/sed/output/block_negative_range new file mode 100644 index 00000000..5b15dae5 --- /dev/null +++ b/tests/fixtures/sed/output/block_negative_range @@ -0,0 +1,14 @@ +^l1T1$ +^l1T2$ +^l1T3$ +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +^l1T13$ +^l1T14$ diff --git a/tests/fixtures/sed/output/block_negative_range_2 b/tests/fixtures/sed/output/block_negative_range_2 new file mode 100644 index 00000000..d5484cb6 --- /dev/null +++ b/tests/fixtures/sed/output/block_negative_range_2 @@ -0,0 +1,23 @@ +^l1T1$ +^l1T2$ +^l1T3$ +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +^l1T13$ +^l1T14$ +^l2T1$ +^l2T2$ +^l2T3$ +^l2T4$ +^l2T5$ +^l2T6$ +^l2T7$ +^l2T8$ +^l2T9$ diff --git a/tests/fixtures/sed/output/block_nested_negative_selection b/tests/fixtures/sed/output/block_nested_negative_selection new file mode 100644 index 00000000..5b15dae5 --- /dev/null +++ b/tests/fixtures/sed/output/block_nested_negative_selection @@ -0,0 +1,14 @@ +^l1T1$ +^l1T2$ +^l1T3$ +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +^l1T13$ +^l1T14$ diff --git a/tests/fixtures/sed/output/block_nested_selection b/tests/fixtures/sed/output/block_nested_selection new file mode 100644 index 00000000..488e94f6 --- /dev/null +++ b/tests/fixtures/sed/output/block_nested_selection @@ -0,0 +1,14 @@ +l1_1 +l1_2 +l1_3 +^l1_4 +^l1_5 +^l1_6$ +^l1_7$ +^l1T8$ +^l1_9$ +^l1_10$ +^l1_11 +^l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/block_simple_range b/tests/fixtures/sed/output/block_simple_range new file mode 100644 index 00000000..f963b03f --- /dev/null +++ b/tests/fixtures/sed/output/block_simple_range @@ -0,0 +1,14 @@ +l1_1 +l1_2 +l1_3 +^l1T4$ +^l1T5$ +^l1T6$ +^l1T7$ +^l1T8$ +^l1T9$ +^l1T10$ +^l1T11$ +^l1T12$ +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/branch_conditional_boundary b/tests/fixtures/sed/output/branch_conditional_boundary new file mode 100644 index 00000000..1557318a --- /dev/null +++ b/tests/fixtures/sed/output/branch_conditional_boundary @@ -0,0 +1,14 @@ +m1_1 +m1_2 +m1_3 +m1_4 +m1_5 +m1_6 +m1_7 +m1_8 +m1_9 +m1_10 +m1_11 +m1_12 +m1_13 +m1_14 diff --git a/tests/fixtures/sed/output/branch_conditional_simple b/tests/fixtures/sed/output/branch_conditional_simple new file mode 100644 index 00000000..ec339f43 --- /dev/null +++ b/tests/fixtures/sed/output/branch_conditional_simple @@ -0,0 +1,14 @@ +tested l2_1 +tested l2_2 +tested l2_3 +tested l2_4 +tested l2_5 +tested l2_6 +tested l2_7 +tested l2_8 +tested l2_9 +tested l2_10 +tested l2_11 +tested l2_12 +tested l2_13 +tested l2_14 diff --git a/tests/fixtures/sed/output/branch_cycle_clears b/tests/fixtures/sed/output/branch_cycle_clears new file mode 100644 index 00000000..6766fea1 --- /dev/null +++ b/tests/fixtures/sed/output/branch_cycle_clears @@ -0,0 +1,5 @@ +^l1_1 +^l1_2 +^l1_4 +^l1_6 +^l1_8 diff --git a/tests/fixtures/sed/output/branch_plain b/tests/fixtures/sed/output/branch_plain new file mode 100644 index 00000000..ac9202e6 --- /dev/null +++ b/tests/fixtures/sed/output/branch_plain @@ -0,0 +1,17 @@ +label2_l1_1 +label3_label2_l1_1 +label1_l1_2 +label1_l1_3 +label1_l1_4 +label1_l1_5 +label1_l1_6 +label1_l1_7 +label1_l1_8 +label1_l1_9 +label1_l1_10 +label1_l1_11 +label1_l1_12 +label2_l1_13 +label3_label2_l1_13 +label2_l1_14 +label3_label2_l1_14 diff --git a/tests/fixtures/sed/output/branch_test_clears b/tests/fixtures/sed/output/branch_test_clears new file mode 100644 index 00000000..a9ec85bf --- /dev/null +++ b/tests/fixtures/sed/output/branch_test_clears @@ -0,0 +1,14 @@ +^l1_1$ +^l1_2$ +^l1_3$ +^l1_4$ +^l1_5$ +^l1_6$ +^l1_7$ +^l1_8$ +l1_9$ +l1_10$ +l1_11$ +l1_12$ +l1_13$ +l1_14$ diff --git a/tests/fixtures/sed/output/branch_to_block b/tests/fixtures/sed/output/branch_to_block new file mode 100644 index 00000000..e1ddb547 --- /dev/null +++ b/tests/fixtures/sed/output/branch_to_block @@ -0,0 +1,12 @@ +^l1_1 +^l1_1$ +^l1_2 +^l1_2$ +^l1_3 +^l1_3$ +^l1_4 +^l1_4$ +l1_5$ +l1_6$ +l1_7$ +l1_8$ diff --git a/tests/fixtures/sed/output/pattern_append_delete b/tests/fixtures/sed/output/pattern_append_delete new file mode 100644 index 00000000..18476253 --- /dev/null +++ b/tests/fixtures/sed/output/pattern_append_delete @@ -0,0 +1,10 @@ +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/pattern_append_delete2_separate b/tests/fixtures/sed/output/pattern_append_delete2_separate new file mode 100644 index 00000000..cc300c2f --- /dev/null +++ b/tests/fixtures/sed/output/pattern_append_delete2_separate @@ -0,0 +1,17 @@ +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/output/pattern_append_delete_2 b/tests/fixtures/sed/output/pattern_append_delete_2 new file mode 100644 index 00000000..ae3aa47f --- /dev/null +++ b/tests/fixtures/sed/output/pattern_append_delete_2 @@ -0,0 +1,16 @@ +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 diff --git a/tests/fixtures/sed/output/pattern_delete_no_newline b/tests/fixtures/sed/output/pattern_delete_no_newline new file mode 100644 index 00000000..8c5285bb --- /dev/null +++ b/tests/fixtures/sed/output/pattern_delete_no_newline @@ -0,0 +1,13 @@ +l1_1 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/pattern_delete_print b/tests/fixtures/sed/output/pattern_delete_print new file mode 100644 index 00000000..77e5cc0e --- /dev/null +++ b/tests/fixtures/sed/output/pattern_delete_print @@ -0,0 +1,13 @@ +l1_1 +l1_2 +l1_3 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/pattern_delete_to_newline b/tests/fixtures/sed/output/pattern_delete_to_newline new file mode 100644 index 00000000..b4b743ef --- /dev/null +++ b/tests/fixtures/sed/output/pattern_delete_to_newline @@ -0,0 +1,3 @@ +l1_2 +l1_3 +l1_3 diff --git a/tests/fixtures/sed/output/pattern_hold_append_swap b/tests/fixtures/sed/output/pattern_hold_append_swap new file mode 100644 index 00000000..e9169dc7 --- /dev/null +++ b/tests/fixtures/sed/output/pattern_hold_append_swap @@ -0,0 +1,20 @@ +l1_1 +l1_2 +l1_3 +l1_2 +l1_3 +l1_5 +l1_2 +l1_3 +l1_2 +l1_3 +l1_6 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/pattern_next_no_output b/tests/fixtures/sed/output/pattern_next_no_output new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/sed/output/pattern_next_output b/tests/fixtures/sed/output/pattern_next_output new file mode 100644 index 00000000..3bcc601e --- /dev/null +++ b/tests/fixtures/sed/output/pattern_next_output @@ -0,0 +1,14 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/pattern_next_print b/tests/fixtures/sed/output/pattern_next_print new file mode 100644 index 00000000..8cf6fd76 --- /dev/null +++ b/tests/fixtures/sed/output/pattern_next_print @@ -0,0 +1,4 @@ +l1_1 +l1_4 +l1_7 +l1_10 diff --git a/tests/fixtures/sed/output/pattern_next_print_no_output b/tests/fixtures/sed/output/pattern_next_print_no_output new file mode 100644 index 00000000..77e5cc0e --- /dev/null +++ b/tests/fixtures/sed/output/pattern_next_print_no_output @@ -0,0 +1,13 @@ +l1_1 +l1_2 +l1_3 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/pattern_next_print_output b/tests/fixtures/sed/output/pattern_next_print_output new file mode 100644 index 00000000..30d6f04d --- /dev/null +++ b/tests/fixtures/sed/output/pattern_next_print_output @@ -0,0 +1,27 @@ +l1_1 +l1_1 +l1_2 +l1_2 +l1_3 +l1_3 +l1_4 +l1_5 +l1_5 +l1_6 +l1_6 +l1_7 +l1_7 +l1_8 +l1_8 +l1_9 +l1_9 +l1_10 +l1_10 +l1_11 +l1_11 +l1_12 +l1_12 +l1_13 +l1_13 +l1_14 +l1_14 diff --git a/tests/fixtures/sed/output/pattern_quit b/tests/fixtures/sed/output/pattern_quit new file mode 100644 index 00000000..7ffbba3e --- /dev/null +++ b/tests/fixtures/sed/output/pattern_quit @@ -0,0 +1,5 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 diff --git a/tests/fixtures/sed/output/pattern_quit_2 b/tests/fixtures/sed/output/pattern_quit_2 new file mode 100644 index 00000000..7ffbba3e --- /dev/null +++ b/tests/fixtures/sed/output/pattern_quit_2 @@ -0,0 +1,5 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 diff --git a/tests/fixtures/sed/output/print_to_newline b/tests/fixtures/sed/output/print_to_newline new file mode 100644 index 00000000..67a31ba0 --- /dev/null +++ b/tests/fixtures/sed/output/print_to_newline @@ -0,0 +1,4 @@ +l1_1 +l1_1 +l1_1 +l1_2 diff --git a/tests/fixtures/sed/output/subst_any b/tests/fixtures/sed/output/subst_any new file mode 100644 index 00000000..dcf124b4 --- /dev/null +++ b/tests/fixtures/sed/output/subst_any @@ -0,0 +1,14 @@ +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXXX +XXXXX +XXXXX +XXXXX +XXXXX diff --git a/tests/fixtures/sed/output/subst_any_global b/tests/fixtures/sed/output/subst_any_global new file mode 100644 index 00000000..dcf124b4 --- /dev/null +++ b/tests/fixtures/sed/output/subst_any_global @@ -0,0 +1,14 @@ +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXXX +XXXXX +XXXXX +XXXXX +XXXXX diff --git a/tests/fixtures/sed/output/subst_brace b/tests/fixtures/sed/output/subst_brace new file mode 100644 index 00000000..ded4e981 --- /dev/null +++ b/tests/fixtures/sed/output/subst_brace @@ -0,0 +1,14 @@ +lX_X +lX_X +lX_X +lX_4 +lX_5 +lX_6 +lX_7 +lX_8 +lX_9 +lX_X0 +lX_XX +lX_XX +lX_XX +lX_X4 diff --git a/tests/fixtures/sed/output/subst_case_insensitive b/tests/fixtures/sed/output/subst_case_insensitive new file mode 100644 index 00000000..3bcc601e --- /dev/null +++ b/tests/fixtures/sed/output/subst_case_insensitive @@ -0,0 +1,14 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/subst_ere_numerical_groups b/tests/fixtures/sed/output/subst_ere_numerical_groups new file mode 100644 index 00000000..08bbf8cf --- /dev/null +++ b/tests/fixtures/sed/output/subst_ere_numerical_groups @@ -0,0 +1,14 @@ +x_x1xl1 +x_x1xl2 +x_x1xl3 +x_x1xl4 +x_x1xl5 +x_x1xl6 +x_x1xl7 +x_x1xl8 +x_x1xl9 +x_x1xl10 +x_x1xl11 +x_x1xl12 +x_x1xl13 +x_x1xl14 diff --git a/tests/fixtures/sed/output/subst_escaped_braced_separator b/tests/fixtures/sed/output/subst_escaped_braced_separator new file mode 100644 index 00000000..3bcc601e --- /dev/null +++ b/tests/fixtures/sed/output/subst_escaped_braced_separator @@ -0,0 +1,14 @@ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/subst_escaped_magic_separator b/tests/fixtures/sed/output/subst_escaped_magic_separator new file mode 100644 index 00000000..dcf124b4 --- /dev/null +++ b/tests/fixtures/sed/output/subst_escaped_magic_separator @@ -0,0 +1,14 @@ +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXXX +XXXXX +XXXXX +XXXXX +XXXXX diff --git a/tests/fixtures/sed/output/subst_escaped_separator b/tests/fixtures/sed/output/subst_escaped_separator new file mode 100644 index 00000000..833e1ba4 --- /dev/null +++ b/tests/fixtures/sed/output/subst_escaped_separator @@ -0,0 +1,14 @@ +l1X1 +l1X2 +l1X3 +l1X4 +l1X5 +l1X6 +l1X7 +l1X8 +l1X9 +l1X10 +l1X11 +l1X12 +l1X13 +l1X14 diff --git a/tests/fixtures/sed/output/subst_escaped_whole_match_group b/tests/fixtures/sed/output/subst_escaped_whole_match_group new file mode 100644 index 00000000..0ff0b9e7 --- /dev/null +++ b/tests/fixtures/sed/output/subst_escaped_whole_match_group @@ -0,0 +1,14 @@ +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) diff --git a/tests/fixtures/sed/output/subst_multiline b/tests/fixtures/sed/output/subst_multiline new file mode 100644 index 00000000..e0cd8b50 --- /dev/null +++ b/tests/fixtures/sed/output/subst_multiline @@ -0,0 +1,42 @@ +l1u0 +u1 +u21 +l1u0 +u1 +u22 +l1u0 +u1 +u23 +l1u0 +u1 +u24 +l1u0 +u1 +u25 +l1u0 +u1 +u26 +l1u0 +u1 +u27 +l1u0 +u1 +u28 +l1u0 +u1 +u29 +l1u0 +u1 +u210 +l1u0 +u1 +u211 +l1u0 +u1 +u212 +l1u0 +u1 +u213 +l1u0 +u1 +u214 diff --git a/tests/fixtures/sed/output/subst_newline_class b/tests/fixtures/sed/output/subst_newline_class new file mode 100644 index 00000000..0c5c10a5 --- /dev/null +++ b/tests/fixtures/sed/output/subst_newline_class @@ -0,0 +1 @@ +l1_1Xl1_2 diff --git a/tests/fixtures/sed/output/subst_newline_re b/tests/fixtures/sed/output/subst_newline_re new file mode 100644 index 00000000..0c5c10a5 --- /dev/null +++ b/tests/fixtures/sed/output/subst_newline_re @@ -0,0 +1 @@ +l1_1Xl1_2 diff --git a/tests/fixtures/sed/output/subst_no_new_line b/tests/fixtures/sed/output/subst_no_new_line new file mode 100644 index 00000000..accf6276 --- /dev/null +++ b/tests/fixtures/sed/output/subst_no_new_line @@ -0,0 +1 @@ +HeLLo \ No newline at end of file diff --git a/tests/fixtures/sed/output/subst_numbered_replacement b/tests/fixtures/sed/output/subst_numbered_replacement new file mode 100644 index 00000000..86f75c15 --- /dev/null +++ b/tests/fixtures/sed/output/subst_numbered_replacement @@ -0,0 +1,14 @@ +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X0 +l1_X1 +l1_X2 +l1_X3 +l1_X4 diff --git a/tests/fixtures/sed/output/subst_numerical_groups b/tests/fixtures/sed/output/subst_numerical_groups new file mode 100644 index 00000000..08bbf8cf --- /dev/null +++ b/tests/fixtures/sed/output/subst_numerical_groups @@ -0,0 +1,14 @@ +x_x1xl1 +x_x1xl2 +x_x1xl3 +x_x1xl4 +x_x1xl5 +x_x1xl6 +x_x1xl7 +x_x1xl8 +x_x1xl9 +x_x1xl10 +x_x1xl11 +x_x1xl12 +x_x1xl13 +x_x1xl14 diff --git a/tests/fixtures/sed/output/subst_print b/tests/fixtures/sed/output/subst_print new file mode 100644 index 00000000..9f44d0c9 --- /dev/null +++ b/tests/fixtures/sed/output/subst_print @@ -0,0 +1,6 @@ +lS1_1 +lS1_10 +lS1_11 +lS1_12 +lS1_13 +lS1_14 diff --git a/tests/fixtures/sed/output/subst_whole_match_group b/tests/fixtures/sed/output/subst_whole_match_group new file mode 100644 index 00000000..47c08aeb --- /dev/null +++ b/tests/fixtures/sed/output/subst_whole_match_group @@ -0,0 +1,14 @@ +(l)(1)(_)(1) +(l)(1)(_)(2) +(l)(1)(_)(3) +(l)(1)(_)(4) +(l)(1)(_)(5) +(l)(1)(_)(6) +(l)(1)(_)(7) +(l)(1)(_)(8) +(l)(1)(_)(9) +(l)(1)(_)(1)(0) +(l)(1)(_)(1)(1) +(l)(1)(_)(1)(2) +(l)(1)(_)(1)(3) +(l)(1)(_)(1)(4) diff --git a/tests/fixtures/sed/output/subst_write_file b/tests/fixtures/sed/output/subst_write_file new file mode 100644 index 00000000..952ff972 --- /dev/null +++ b/tests/fixtures/sed/output/subst_write_file @@ -0,0 +1,6 @@ +l1S_1 +l1S_10 +l1S_11 +l1S_12 +l1S_13 +l1S_14 diff --git a/tests/fixtures/sed/output/trans_delimiter b/tests/fixtures/sed/output/trans_delimiter new file mode 100644 index 00000000..48646d1a --- /dev/null +++ b/tests/fixtures/sed/output/trans_delimiter @@ -0,0 +1,14 @@ +l8_8 +l8_7 +l8_6 +l8_5 +l8_4 +l8_3 +l8_2 +l8_1 +l8_0 +l8_89 +l8_88 +l8_87 +l8_86 +l8_85 diff --git a/tests/fixtures/sed/output/trans_newline b/tests/fixtures/sed/output/trans_newline new file mode 100644 index 00000000..f414bb53 --- /dev/null +++ b/tests/fixtures/sed/output/trans_newline @@ -0,0 +1,13 @@ +l1_1Xl1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 diff --git a/tests/fixtures/sed/output/trans_no_new_line b/tests/fixtures/sed/output/trans_no_new_line new file mode 100644 index 00000000..accf6276 --- /dev/null +++ b/tests/fixtures/sed/output/trans_no_new_line @@ -0,0 +1 @@ +HeLLo \ No newline at end of file diff --git a/tests/fixtures/sed/output/trans_simple b/tests/fixtures/sed/output/trans_simple new file mode 100644 index 00000000..48646d1a --- /dev/null +++ b/tests/fixtures/sed/output/trans_simple @@ -0,0 +1,14 @@ +l8_8 +l8_7 +l8_6 +l8_5 +l8_4 +l8_3 +l8_2 +l8_1 +l8_0 +l8_89 +l8_88 +l8_87 +l8_86 +l8_85 diff --git a/tests/fixtures/sed/script/hanoi.sed b/tests/fixtures/sed/script/hanoi.sed new file mode 100644 index 00000000..e93d358b --- /dev/null +++ b/tests/fixtures/sed/script/hanoi.sed @@ -0,0 +1,98 @@ +# Towers of Hanoi in sed. +# Ex: +# Run "sed -f hanoi.sed", and enter: +# +# :abcd: : : +# +# note -- TWO carriage returns were once required, this will output the +# sequence of states involved in moving 4 rings, the largest called "a" and +# the smallest called "d", from the first to the second of three towers, so +# that the rings on any tower at any time are in descending order of size. +# You can start with a different arrangement and a different number of rings, +# say :ce:b:ax: and it will give the shortest procedure for moving them all +# to the middle tower. The rules are: the names of the rings must all be +# lower-case letters, they must be input within 3 fields (representing the +# towers) and delimited by 4 colons, such that the letters within each field +# are in alphabetical order (i.e. rings are in descending order of size). +# +# For the benefit of anyone who wants to figure out the script, an "internal" +# line of the form +# b:0abx:1a2b3 :2 :3x2 +# has the following meaning: the material after the three markers :1, :2, +# and :3 represents the three towers; in this case the current set-up is +# ":ab : :x :". The numbers after a, b and x in these fields indicate +# that the next time it gets a chance, it will move a to tower 2, move b +# to tower 3, and move x to tower 2. The string after :0 just keeps track +# of the alphabetical order of the names of the rings. The b at the +# beginning means that it is now dealing with ring b (either about to move +# it, or re-evaluating where it should next be moved to). +# +# Although this version is "limited" to 26 rings because of the size of the +# alphabet, one could write a script using the same idea in which the rings +# were represented by arbitrary [strings][within][brackets], and in place of +# the built-in line of the script giving the order of the letters of the +# alphabet, it would accept from the user a line giving the ordering to be +# assumed, e.g. [ucbvax][decvax][hplabs][foo][bar]. +# +# George Bergman +# Math, UC Berkeley 94720 USA + +# cleaning, diagnostics +s/ *//g +/^$/d +/[^a-z:]/{a\ +Illegal characters: use only a-z and ":". Try again. +d +} +/^:[a-z]*:[a-z]*:[a-z]*:$/!{a\ +Incorrect format: use\ +\ : string1 : string2 : string3 :\ +Try again. +d +} +/\([a-z]\).*\1/{a\ +Repeated letters not allowed. Try again. +d +} +# initial formatting +h +s/[a-z]/ /g +G +s/^:\( *\):\( *\):\( *\):\n:\([a-z]*\):\([a-z]*\):\([a-z]*\):$/:1\4\2\3:2\5\1\3:3\6\1\2:0/ +s/[a-z]/&2/g +s/^/abcdefghijklmnopqrstuvwxyz/ +:a +s/^\(.\).*\1.*/&\1/ +s/.// +/^[^:]/ba +s/\([^0]*\)\(:0.*\)/\2\1:/ +s/^[^0]*0\(.\)/\1&/ +:b +# outputting current state without markers +h +s/.*:1/:/ +s/[123]//gp +g +:c +# establishing destinations +/^\(.\).*\1:1/td +/^\(.\).*:1[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ +/^\(.\).*:1[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ +/^\(.\).*:1[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ +/^\(.\).*:2[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ +/^\(.\).*:2[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ +/^\(.\).*:2[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ +/^\(.\).*:3[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/ +/^\(.\).*:3[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/ +/^\(.\).*:3[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/ +bc +# iterate back to find smallest out-of-place ring +:d +s/^\(.\)\(:0[^:]*\([^:]\)\1.*:\([123]\)[^:]*\1\)\4/\3\2\4/ +td +# move said ring (right, resp. left) +s/^\(.\)\(.*\)\1\([23]\)\(.*:\3[^ ]*\) /\1\2 \4\1\3/ +s/^\(.\)\(.*:\([12]\)[^ ]*\) \(.*\)\1\3/\1\2\1\3\4 / +tb +s/.*/Done! Try another, or end with ^D./p +d diff --git a/tests/fixtures/sed/script/math.sed b/tests/fixtures/sed/script/math.sed new file mode 100644 index 00000000..509e6c3e --- /dev/null +++ b/tests/fixtures/sed/script/math.sed @@ -0,0 +1,438 @@ +# This is ksb's infamous sed calculator. (ksb@sa.fedex.com) +# +# +# $Id: math.sed,v 2.5 1998/08/02 13:23:34 ksb Exp ksb $ +# expr ::= (expr) | expr! | +# expr ^ expr | +# -expr | expr * expr | expr / expr | expr % expr | +# expr + expr | expr - expr | +# [0-9][0-9]* ; +# Bugs: some sign combinations don't work, and I got sick of added cases +# for unary +. Don't depend on signed math working all the time. -- ksb +# +# $Compile: echo "4+7*3+2^7/3" | sed -f %f + +# make sure the expression is well formed +s/[ ]//g +/[*\/^%+-]$/{ + a\ + poorly formed expression, dyadic operator on the end + q +} +/^[*\/^%]/{ + a\ + poorly formed expression, leading dyadic operator + q +} + +# fill hold space with done token +x +s/^.*/done/ +x + +# main loop, process operators ((), !, *, /, %, +, and -) +: loop +# uncomment the print below to follow the "logic" -- ksb +#p +/^[+]/{ + s/// + b loop +} +/^--/{ + s/// + b loop +} +# eval parenthesised sub expressions first +/^\(.*\)(\([^)]*\))\(.*\)$/{ + H + s//\2/ + x + s/^\(.*\)\n\(.*\)(\([^()]*\))\(.*\)$/()\2@\4@\1/ + x + b loop +} +# reduce a^b^c -> a^(b^c) +/\([0-9][0-9]*^\)\([0-9][0-9]*^[0-9][0-9^]*\)/{ + s//\1(\2)/ + b loop +} +# pull any buried exponents +/^\(.*[^0-9]\)\([0-9][0-9]*^[0-9][0-9]*\)$/{ + s//\1(\2)/ + b loop +} +/^\(.*[^0-9]\)\([0-9][0-9]*^[0-9][0-9]*\)\([^0-9].*\)$/{ + s//\1(\2)\3/ + b loop +} +/^\([0-9][0-9]*^[0-9][0-9]*\)\([^0-9].*\)$/{ + s//(\1)\2/ + b loop +} +/^\([-]*[0-9]*\)^0*$/{ + s//1/ + b loop +} +/^\([-]*[0-9]*\)^0*1$/{ + s//\1/ + b loop +} +/^\([-]*[0-9]*\)^-[0-9]*$/{ + s//0/ + b loop +} +/^\([-]*\)\([0-9]*\)^\([0-9][0-9]*[13579]\)$/{ + s//\1\2*((\2*\2)^(\3\/2))/ + b loop +} +/^[-]*\([0-9]*\)^\([0-9][0-9]*[02468]\)$/{ + s//(\1*\1)^(\2\/2)/ + b loop +} +# single digit powers (2 3,9 4,6,8 5,7 +/^[-]*\([0-9]*\)^0*2$/{ + s//(\1*\1)/ + b loop +} +/^\([-]*\)\([0-9]*\)^0*\([39]\)$/{ + s//\1(\2*(\2*\2))^(\3\/3)/ + b loop +} +/^[-]*\([0-9]*\)^0*\([468]\)$/{ + s//(\1*\1)^(\2\/2)/ + b loop +} +# 5 7 +/^\([-]*[0-9]*\)^\([0-9]*\)$/{ + s//\1*(\1^(\2-1))/ + b loop +} +# reduce all number factorials +/^0*[01]!/{ + s//1/ + b loop +} +/\([*+-/%^]\)0*[01]!/{ + s//\11/ + b loop +} +/\([0-9]*\)!/{ + s//(\1-1)!*\1/ + b loop +} +# sign simplifications +/^-\([0-9]*\)\([*/%]\)-\([0-9]*\)$/{ + s//\1\2\3/ + b loop +} +/^\([0-9]*\)\([*/%]\)-\([0-9]*\)$/{ + s//-\1\2\3/ + b loop +} +/^-\([0-9][0-9]*\)[+]*-\([0-9][0-9]*\)$/{ + s//\1+\2/ + x + s/\(.*\)/()-@@\1/ + x + b loop +} +/^-\([0-9]*\)[+]\([0-9]\)*$/{ + s//\2-\1/ + b loop +} +/^-.*[-+*/%].*/{ + H + s/^-// + x + s/^\(.*\)\n-.*$/()-@@\1/ + x + b loop +} +# can we simplify multiplications +/^\([0-9]*\)\([*][0-9]*[1-9]\)00*$/{ + H + s//\1\2/ + x + s/^\(.*\)\n[0-9]*[*][0-9]*[1-9]\(00*\)$/()@\2@\1/ + x + b loop +} +/^\([0-9][1-9]*\)00*\([*][0-9]*\)$/{ + H + s//\1\2/ + x + s/^\(.*\)\n[0-9][1-9]*\(00*\)[*][0-9]*$/()@\2@\1/ + x + b loop +} +# can we simplify division (20/30 -> 2/3) +/^\([0-9][0-9]*\)0\([/%]\)\([0-9][0-9]*\)0$/{ + s//\1\2\3/ + b loop +} +# n/1 -> n +/^0*\([0-9][0-9]*\)0[/]0*1$/{ + s//\1/ + b loop +} +# n%2 -> last_digit(n)%2 (same for 1, BTW) N.B. NO LOOP +/^[0-9]*\([0-9]\)%0*\([12]\)$/{ + s//\1%\2/ +} +# move any mul/divs to the front via parans +/^\([0-9+]*\)\([-+]\)\([0-9]*[*/][0-9*/]*\)/{ + s//\1\2(\3)/ + b loop +} +# can we div or mul +/^[0-9]*[*][0-9]*$/{ + b mul +} +/^[0-9]*[/%]0*$/{ + i\ +divide by zero + d +} +/^[0-9]*[/%][0-9]*$/{ + H + s/\([0-9]\).*[/%]/\1-/ + x + s/^\(.*\)\n\([0-9]\)\([0-9]*\)\([/%]\)\([0-9]*\).*$/.\4\3q0r\2-\5@\1/ + x + b loop +} +/^\([0-9]*[*/%][0-9]*\)\(.*\)/{ + H + s//\1/ + x + s/^\(.*\)\n\([0-9]*[*/][0-9]*\)\(.*\)$/()@\3@\1/ + x + b loop +} +# can we add or subtract -- note subtract hold expression for underflow +/^[0-9]*[+][0-9]*$/{ + s/$/=/ + b add +} +/^[0-9][0-9]*-[0-9]*$/{ + H + s/$/=/ + b sub +} +/^\([0-9][0-9]*[-+][0-9]*\)\(.*\)/{ + H + s//\1/ + x + s/^\(.*\)\n\([0-9]*[-+][0-9]*\)\(.*\)$/()@\3@\1/ + x + b loop +} +# look in hold space for stack to reduce +x +/^done$/{ + x + s/^0*\([0-9][0-9]*\)/\1/ + p + d +} +# .[/%] numerator q quotient r remainder-divisor @stack +/^\./{ + x + /^[^-]/{ + H + x + s/.\(.\)\([0-9]*\)q\([^r]*\)r\([0-9]*\)-\([0-9]*\)@\(.*\)\n\(.*\)/.\1\2q\3+1r\7-\5@\6/ + h + s/..[0-9]*q[^r]*r\([0-9]*-[0-9]*\)@.*/\1/ + b loop + } + /^-/{ + g + /.\(.\)\([0-9]\)\([0-9]*\)q\([^r]*\)r0*\([0-9]*\)-\([^@]*\)@.*/{ + s//\5\2-\6/ + x + s/.\(.\)\([0-9]\)\([0-9]*\)q\([^r]*\)r0*\([0-9]*\)-\([0-9]*\)@\(.*\)/.\1\3q(\4)*10r\5\2-\6@\7/ + x + b loop + } +# no digits to shift on + s/^\.[/]q\([^r]*\)r[^@]*@.*/\1/ + s/^\.[%]q[^r]*r0*\([0-9][0-9]*\)-[^@]*@.*/\1/ + /^\./{ + i\ +divide error + q + } + x + s/^\.[/%]q[^r]*r[^@]*@\(.*\)/\1/ + x + b loop + } +} +/^()/{ + s/// + x + G + s/\(.*\)\n\([^@]*\)@\([^@]*\)@\(.*\)/\2\1\3/ + x + s/[^@]*@[^@]*@\(.*\)/\1/ + x + b loop +} +i\ +help, stack problem - the hold space +p +x +i\ +and the pat space +p +i\ +quit +q + +# turn mul into add until 1*x -> x, 0*x -> 0 +: mul +/^00*\*.*/{ + s//0/ + b loop +} +/^0*1\*/{ + s/// +: leading + s/^0*\([0-9][0-9]*\)/\1/ + b loop +} +s/^\([0-9]*\)0\*\([0-9]*\)/\1*\20/ +s/^\([0-9]*\)1\*\([0-9]*\)/\1*\20+\2/ +s/^\([0-9]*\)2\*\([0-9]*\)/\1*\20+(\2+\2)/ +s/^\([0-9]*\)3\*\([0-9]*\)/\1*\20+(\2+\2+\2)/ +s/^\([0-9]*\)4\*\([0-9]*\)/\1*\20+(\2+\2+\2+\2)/ +s/^\([0-9]*\)5\*\([0-9]*\)/\1*\20+(\2+\2+\2+\2+\2)/ +s/^\([0-9]*\)6\*\([0-9]*\)/\1*\20+(\2+\2+\2+\2+\2+\2)/ +s/^\([0-9]*\)7\*\([0-9]*\)/\1*\20+(\2+\2+\2+\2+\2+\2+\2)/ +s/^\([0-9]*\)8\*\([0-9]*\)/\1*\20+(\2+\2+\2+\2+\2+\2+\2+\2)/ +s/^\([0-9]*\)9\*\([0-9]*\)/\1*\20+(\2+\2+\2+\2+\2+\2+\2+\2+\2)/ +/^0*\*[0-9]*[+]*\(.*\)/{ + s//\1/ + b loop +} +b mul + +# get rid of a plus term until 0+x -> x +: add +/^[+]\([0-9+*]*\)=/{ + s//\1/ + b leading +} +/^\([0-9*]*\)[+]=/{ + s//\1/ + b loop +} +/^\([0-9]*\)0[+]\([0-9]*\)\([0-9]\)=/{ + s//\1+\2=\3/ + b add +} +/^\([0-9]*\)\([0-9]\)[+]\([0-9]*\)0=/{ + s//\1+\3=\2/ + b add +} +s/^\([0-9]*\)1[+]/\10+/ +s/^\([0-9]*\)2[+]/\11+/ +s/^\([0-9]*\)3[+]/\12+/ +s/^\([0-9]*\)4[+]/\13+/ +s/^\([0-9]*\)5[+]/\14+/ +s/^\([0-9]*\)6[+]/\15+/ +s/^\([0-9]*\)7[+]/\16+/ +s/^\([0-9]*\)8[+]/\17+/ +s/^\([0-9]*\)9[+]/\18+/ + +s/9=\([0-9]*\)$/_=\1/ +s/8=\([0-9]*\)$/9=\1/ +s/7=\([0-9]*\)$/8=\1/ +s/6=\([0-9]*\)$/7=\1/ +s/5=\([0-9]*\)$/6=\1/ +s/4=\([0-9]*\)$/5=\1/ +s/3=\([0-9]*\)$/4=\1/ +s/2=\([0-9]*\)$/3=\1/ +s/1=\([0-9]*\)$/2=\1/ +/_/{ + s//_0/ + : inc + s/9_/_0/ + s/8_/9/ + s/7_/8/ + s/6_/7/ + s/5_/6/ + s/4_/5/ + s/3_/4/ + s/2_/3/ + s/1_/2/ + s/0_/1/ + s/[+]_/+1/ + /_/b inc +} +b add + +# get rid of a sub term until /-0*=/ or underflow +: sub +/^\([0-9]*\)-0*=/{ + s//\1/ + x + s/\(.*\)\n.*$/\1/ + x + b leading +} +/^-\([0-9].*\)=/{ +: under + g + s/.*\n\([0-9]*\)-\([0-9]*\).*/-(\2-\1)/ + x + s/\(.*\)\n.*/\1/ + x + b loop +} +/^\([0-9]*\)\([0-9]\)-\([0-9]*\)0=/{ + s//\1-\3=\2/ + b sub +} +s/1=/0=/ +s/2=/1=/ +s/3=/2=/ +s/4=/3=/ +s/5=/4=/ +s/6=/5=/ +s/7=/6=/ +s/8=/7=/ +s/9=/8=/ + +s/^\([0-9]*\)1-/\1_-/ +s/^\([0-9]*\)2-/\11-/ +s/^\([0-9]*\)3-/\12-/ +s/^\([0-9]*\)4-/\13-/ +s/^\([0-9]*\)5-/\14-/ +s/^\([0-9]*\)6-/\15-/ +s/^\([0-9]*\)7-/\16-/ +s/^\([0-9]*\)8-/\17-/ +s/^\([0-9]*\)9-/\18-/ +s/^\([0-9]*\)0-/\1'9-/ +s/_/0/ + +: scarry +/0'/{ + s//'9/ + b scarry +} +/^'/{ + b under +} +s/1'/0/ +s/2'/1/ +s/3'/2/ +s/4'/3/ +s/5'/4/ +s/6'/5/ +s/7'/6/ +s/8'/7/ +s/9'/8/ + +b sub