From d1ed9c189d9e69a5eacf66f8a5ffaf2b6e07530e Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 8 Aug 2025 12:09:57 +0100 Subject: [PATCH 1/6] Refactor CLI build handling --- src/runner.rs | 97 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 72 insertions(+), 25 deletions(-) diff --git a/src/runner.rs b/src/runner.rs index aca4d7c1..210eeb07 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -13,7 +13,7 @@ use std::io::{self, BufRead, BufReader, Write}; use std::path::Path; use std::process::{Command, Stdio}; use std::thread; -use tempfile::Builder; +use tempfile::{Builder, NamedTempFile}; use tracing::{debug, info}; #[derive(Debug, Clone)] @@ -74,26 +74,10 @@ pub fn run(cli: &Cli) -> Result<()> { targets: Vec::new(), })); match command { - Commands::Build(args) => { - let ninja = generate_ninja(cli)?; - let targets = BuildTargets::new(args.targets); - if let Some(path) = args.emit { - write_and_log(&path, &ninja)?; - run_ninja(Path::new("ninja"), cli, &path, &targets)?; - } else { - let tmp = Builder::new() - .prefix("netsuke.") - .suffix(".ninja") - .tempfile() - .context("create temp file")?; - write_and_log(tmp.path(), &ninja)?; - run_ninja(Path::new("ninja"), cli, tmp.path(), &targets)?; - } - Ok(()) - } + Commands::Build(args) => handle_build(cli, &args), Commands::Manifest { file } => { let ninja = generate_ninja(cli)?; - write_and_log(&file, &ninja)?; + write_ninja_file(&file, &ninja)?; Ok(()) } Commands::Clean => { @@ -107,6 +91,56 @@ pub fn run(cli: &Cli) -> Result<()> { } } +/// Resolve the manifest, generate the Ninja file and invoke the build. +/// +/// # Errors +/// +/// Returns an error if manifest generation or Ninja execution fails. +/// +/// # Examples +/// ```ignore +/// use netsuke::cli::{BuildArgs, Cli}; +/// use netsuke::runner::handle_build; +/// let cli = Cli { file: "Netsukefile".into(), directory: None, jobs: None, verbose: false, command: None }; +/// let args = BuildArgs { emit: None, targets: vec![] }; +/// handle_build(&cli, &args).unwrap(); +/// ``` +fn handle_build(cli: &Cli, args: &BuildArgs) -> Result<()> { + let ninja = generate_ninja(cli)?; + let targets = BuildTargets::new(args.targets.clone()); + + if let Some(path) = &args.emit { + write_ninja_file(path, &ninja)?; + run_ninja(Path::new("ninja"), cli, path, &targets)?; + } else { + let tmp = create_temp_ninja_file(&ninja)?; + run_ninja(Path::new("ninja"), cli, tmp.path(), &targets)?; + } + Ok(()) +} + +/// Create a temporary Ninja file on disk containing `content`. +/// +/// # Errors +/// +/// Returns an error if the file cannot be created or written. +/// +/// # Examples +/// ```ignore +/// use netsuke::runner::{create_temp_ninja_file, NinjaContent}; +/// let tmp = create_temp_ninja_file(&NinjaContent::new("".into())).unwrap(); +/// assert!(tmp.path().to_string_lossy().ends_with(".ninja")); +/// ``` +fn create_temp_ninja_file(content: &NinjaContent) -> Result { + let tmp = Builder::new() + .prefix("netsuke.") + .suffix(".ninja") + .tempfile() + .context("create temp file")?; + write_ninja_file(tmp.path(), content)?; + Ok(tmp) +} + /// Write `content` to `path` and log the file's location. /// /// # Errors @@ -116,9 +150,9 @@ pub fn run(cli: &Cli) -> Result<()> { /// # Examples /// ```ignore /// let content = NinjaContent::new("rule cc\n".to_string()); -/// write_and_log(Path::new("out.ninja"), &content).unwrap(); +/// write_ninja_file(Path::new("out.ninja"), &content).unwrap(); /// ``` -fn write_and_log(path: &Path, content: &NinjaContent) -> Result<()> { +fn write_ninja_file(path: &Path, content: &NinjaContent) -> Result<()> { fs::write(path, content.as_str()) .with_context(|| format!("failed to write Ninja file to {}", path.display()))?; info!("Generated Ninja file at {}", path.display()); @@ -146,10 +180,7 @@ fn write_and_log(path: &Path, content: &NinjaContent) -> Result<()> { /// assert!(ninja.as_str().contains("rule")); /// ``` fn generate_ninja(cli: &Cli) -> Result { - let manifest_path = cli - .directory - .as_ref() - .map_or_else(|| cli.file.clone(), |dir| dir.join(&cli.file)); + let manifest_path = resolve_manifest_path(cli); let manifest = manifest::from_path(&manifest_path) .with_context(|| format!("loading manifest at {}", manifest_path.display()))?; let ast_json = serde_json::to_string_pretty(&manifest).context("serialising manifest")?; @@ -158,6 +189,22 @@ fn generate_ninja(cli: &Cli) -> Result { Ok(NinjaContent::new(ninja_gen::generate(&graph))) } +/// Determine the manifest path respecting the CLI's directory option. +/// +/// # Examples +/// ``` +/// use netsuke::cli::Cli; +/// use netsuke::runner::resolve_manifest_path; +/// let cli = Cli { file: "Netsukefile".into(), directory: None, jobs: None, verbose: false, command: None }; +/// assert!(resolve_manifest_path(&cli).ends_with("Netsukefile")); +/// ``` +#[must_use] +fn resolve_manifest_path(cli: &Cli) -> std::path::PathBuf { + cli.directory + .as_ref() + .map_or_else(|| cli.file.clone(), |dir| dir.join(&cli.file)) +} + /// Check if `arg` contains a sensitive keyword. /// /// # Examples From 9930c3a11e41175616307e743127f4c7817d3d36 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 8 Aug 2025 13:24:11 +0100 Subject: [PATCH 2/6] Deduplicate Ninja invocation and expand tests --- AGENTS.md | 32 ++- docs/srgn.md | 544 ++++++++++++++++++++++++++--------- src/runner.rs | 39 +-- tests/runner_tests.rs | 6 +- tests/steps/process_steps.rs | 2 +- tests/support/mod.rs | 5 + 6 files changed, 466 insertions(+), 162 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8dfd23e2..4646a738 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -150,13 +150,13 @@ project: with a backslash. - Prefer single line versions of functions where appropriate. I.e., - ``` + ```rust pub fn new(id: u64) -> Self { Self(id) } ``` Instead of: - ``` + ```rust pub fn new(id: u64) -> Self { Self(id) } @@ -205,12 +205,17 @@ project: The following tooling is available in this environment: - `mbake` – A Makefile validator. Run using `mbake validate Makefile`. -- `strace` – Traces system calls and signals made by a process; useful for debugging runtime behaviour and syscalls. -- `gdb` – The GNU Debugger, for inspecting and controlling programs as they execute (or post-mortem via core dumps). -- `ripgrep` – Fast, recursive text search tool (`grep` alternative) that respects `.gitignore` files. +- `strace` – Traces system calls and signals made by a process; useful for + debugging runtime behaviour and syscalls. +- `gdb` – The GNU Debugger, for inspecting and controlling programs as they + execute (or post-mortem via core dumps). +- `ripgrep` – Fast, recursive text search tool (`grep` alternative) that + respects `.gitignore` files. - `ltrace` – Traces calls to dynamic library functions made by a process. -- `valgrind` – Suite for detecting memory leaks, profiling, and debugging low-level memory errors. -- `bpftrace` – High-level tracing tool for eBPF, using a custom scripting language for kernel and application tracing. +- `valgrind` – Suite for detecting memory leaks, profiling, and debugging + low-level memory errors. +- `bpftrace` – High-level tracing tool for eBPF, using a custom scripting + language for kernel and application tracing. - `lsof` – Lists open files and the processes using them. - `htop` – Interactive process viewer (visual upgrade to `top`). - `iotop` – Displays and monitors I/O usage by processes. @@ -219,16 +224,21 @@ The following tooling is available in this environment: - `bat` – `cat` clone with syntax highlighting, Git integration, and paging. - `delta` – Syntax-highlighted pager for Git and diff output. - `tcpdump` – Captures and analyses network traffic at the packet level. -- `nmap` – Network scanner for host discovery, port scanning, and service identification. +- `nmap` – Network scanner for host discovery, port scanning, and service + identification. - `lldb` – LLVM debugger, alternative to `gdb`. - `eza` – Modern `ls` replacement with more features and better defaults. - `fzf` – Interactive fuzzy finder for selecting files, commands, etc. - `hyperfine` – Command-line benchmarking tool with statistical output. - `shellcheck` – Linter for shell scripts, identifying errors and bad practices. - `fd` – Fast, user-friendly `find` alternative with sensible defaults. -- `checkmake` – Linter for `Makefile`s, ensuring they follow best practices and conventions. -- `srgn` – [Structural grep](https://github.com/alexpovel/srgn), searches code and enables editing by syntax tree patterns (see `docs/srgn.md` for a complete guide). -- `difft` **(Difftastic)** – Semantic diff tool that compares code structure rather than just text differences. +- `checkmake` – Linter for `Makefile`s, ensuring they follow best practices and + conventions. +- `srgn` – [Structural grep](https://github.com/alexpovel/srgn), searches code + and enables editing by syntax tree patterns (see `docs/srgn.md` for a + complete guide). +- `difft` **(Difftastic)** – Semantic diff tool that compares code structure + rather than just text differences. ## Key Takeaway diff --git a/docs/srgn.md b/docs/srgn.md index 6d97671c..6c661b41 100644 --- a/docs/srgn.md +++ b/docs/srgn.md @@ -4,47 +4,99 @@ ### 1.1 Beyond Grep: The Need for Syntactic Precision -In the arsenal of any command-line proficient developer, tools like `grep`, `sed`, and `ripgrep` are indispensable instruments for searching and manipulating text. They are fast, powerful, and universally available. However, they share a fundamental limitation: they perceive source code as a flat stream of characters, oblivious to its intricate grammatical structure. This blindness prevents them from reliably performing context-aware refactoring, where a change in one syntactic location (e.g., a function signature) should not affect another (e.g., a string literal). - -This is the precise gap that `srgn`, the "code surgeon," is designed to fill.1 It operates as a powerful hybrid, blending the regex-based pattern matching of - -`grep`, the stream-editing capabilities of `tr` and `sed`, and the syntactic intelligence of the `tree-sitter` parsing framework.1 - -`srgn` complements traditional tools by operating on a different "dimension" of code analysis.1 It is not a replacement for full-featured IDE refactoring engines but a specialized scalpel for tasks that are too complex for a simple regex and too specific for a generic IDE command. - -The ideal use case for `srgn` emerges when a refactoring task requires precision that text-based tools cannot provide, yet falls outside the scope of standard IDE functions like "Rename All" or "Find All References." For example, a global regex replacement to change a function call `foo()` to `bar()` might incorrectly alter variable names like `my_foo` or text within comments. Conversely, an IDE's rename function operates on a specific symbol's definition and usages but cannot execute a rule-based transformation, such as "convert all top-level `print()` calls to `logging.debug()`." `srgn` excels at this kind of precise, rule-based, cross-file surgery, making it a strategic asset for enforcing coding standards, executing targeted API migrations, and performing complex cleanups. +In the arsenal of any command-line proficient developer, tools like `grep`, +`sed`, and `ripgrep` are indispensable instruments for searching and +manipulating text. They are fast, powerful, and universally available. However, +they share a fundamental limitation: they perceive source code as a flat stream +of characters, oblivious to its intricate grammatical structure. This blindness +prevents them from reliably performing context-aware refactoring, where a +change in one syntactic location (e.g., a function signature) should not affect +another (e.g., a string literal). + +This is the precise gap that `srgn`, the "code surgeon," is designed to fill.1 +It operates as a powerful hybrid, blending the regex-based pattern matching of + +`grep`, the stream-editing capabilities of `tr` and `sed`, and the syntactic +intelligence of the `tree-sitter` parsing framework.1 + +`srgn` complements traditional tools by operating on a different "dimension" of +code analysis.1 It is not a replacement for full-featured IDE refactoring +engines but a specialized scalpel for tasks that are too complex for a simple +regex and too specific for a generic IDE command. + +The ideal use case for `srgn` emerges when a refactoring task requires +precision that text-based tools cannot provide, yet falls outside the scope of +standard IDE functions like "Rename All" or "Find All References." For example, +a global regex replacement to change a function call `foo()` to `bar()` might +incorrectly alter variable names like `my_foo` or text within comments. +Conversely, an IDE's rename function operates on a specific symbol's definition +and usages but cannot execute a rule-based transformation, such as "convert all +top-level `print()` calls to `logging.debug()`." `srgn` excels at this kind of +precise, rule-based, cross-file surgery, making it a strategic asset for +enforcing coding standards, executing targeted API migrations, and performing +complex cleanups. ### 1.2 Disambiguation: Identifying the Correct srgn -To ensure clarity, it is essential to acknowledge that the name "srgn" is overloaded across different domains. This guide is exclusively dedicated to `alexpovel/srgn`, the command-line code search and manipulation utility.1 Other projects bearing a similar name are unrelated to the tool discussed here. These include, but are not limited to, SRGAN, a Generative Adversarial Network for image super-resolution 3; SRGN, a high-energy physics technique for parameter estimation 4; and SRGN (SolRagon), a cryptocurrency token.6 This report focuses solely on the code refactoring tool. +To ensure clarity, it is essential to acknowledge that the name "srgn" is +overloaded across different domains. This guide is exclusively dedicated to +`alexpovel/srgn`, the command-line code search and manipulation utility.1 Other +projects bearing a similar name are unrelated to the tool discussed here. These +include, but are not limited to, SRGAN, a Generative Adversarial Network for +image super-resolution 3; SRGN, a high-energy physics technique for parameter +estimation 4; and SRGN (SolRagon), a cryptocurrency token.6 This report focuses +solely on the code refactoring tool. ### 1.3 Core Philosophy: Scopes, Actions, and Intentional Simplicity -The design of `srgn` is built upon two foundational pillars: **Scopes** and **Actions**.2 Scopes define +The design of `srgn` is built upon two foundational pillars: **Scopes** and +**Actions**.2 Scopes define -*where* in the code an operation should take place, while Actions define *what* should be done to the text within that scope. This separation of concerns is central to the tool's power and usability. +*where* in the code an operation should take place, while Actions define *what* +should be done to the text within that scope. This separation of concerns is +central to the tool's power and usability. -A core tenet of `srgn` is its intentional simplicity. The documentation states its design goal clearly: "if you know regex and the basics of the language you are working with, you are good to go".2 This philosophy distinguishes +A core tenet of `srgn` is its intentional simplicity. The documentation states +its design goal clearly: "if you know regex and the basics of the language you +are working with, you are good to go".2 This philosophy distinguishes -`srgn` from other advanced code-querying tools. While tools like Semgrep use a declarative, template-based syntax with metavariables (`$X`) and ellipses (`...`) to find code that matches an abstract pattern 8, +`srgn` from other advanced code-querying tools. While tools like Semgrep use a +declarative, template-based syntax with metavariables (`$X`) and ellipses +(`...`) to find code that matches an abstract pattern 8, `srgn` employs a more direct approach. -`srgn` does not use a proprietary structural pattern language. Instead, it functions as a highly precise location filter. It answers the question, "Find text matching this regex, but only at *this kind of location* (e.g., inside a Python class definition)." This is fundamentally different from a tool that answers, "Find code that *looks like this abstract pattern*." `srgn`'s power derives from its compositional filtering model—layering predefined grammar queries and user-supplied regular expressions—rather than from a complex, abstract query language. This design choice makes its mechanisms transparent and its learning curve gentle for anyone already comfortable with the command line. +`srgn` does not use a proprietary structural pattern language. Instead, it +functions as a highly precise location filter. It answers the question, "Find +text matching this regex, but only at *this kind of location* (e.g., inside a +Python class definition)." This is fundamentally different from a tool that +answers, "Find code that *looks like this abstract pattern*." `srgn`'s power +derives from its compositional filtering model—layering predefined grammar +queries and user-supplied regular expressions—rather than from a complex, +abstract query language. This design choice makes its mechanisms transparent +and its learning curve gentle for anyone already comfortable with the command +line. ## Part 2: Getting Started - Installation and First Cuts ### 2.1 Installation: Preparing the Operating Theater -`srgn` can be installed across various platforms, catering to the diverse environments of command-line users. The following methods are officially supported 1: +`srgn` can be installed across various platforms, catering to the diverse +environments of command-line users. The following methods are officially +supported 1: -- **Prebuilt Binaries**: The most straightforward method is to download a prebuilt binary for your specific architecture directly from the project's GitHub Releases page.1 +- **Prebuilt Binaries**: The most straightforward method is to download a + prebuilt binary for your specific architecture directly from the project's + GitHub Releases page.1 -- `cargo-binstall`: For users with the Rust toolchain, this is the recommended installation method. It is significantly faster than compiling from source as it downloads prebuilt binaries when available. It is tested in the project's CI and serves as a reliable installation vector.1 +- `cargo-binstall`: For users with the Rust toolchain, this is the recommended + installation method. It is significantly faster than compiling from source as + it downloads prebuilt binaries when available. It is tested in the project's + CI and serves as a reliable installation vector.1 Bash - ``` + ```bash # Install the Rust toolchain if you haven't already # Then, install cargo-binstall cargo install cargo-binstall @@ -53,16 +105,19 @@ A core tenet of `srgn` is its intentional simplicity. The documentation states i ``` -- `cargo install`: The traditional method of compiling from source using Rust's package manager. This requires a C compiler to be present on the system (`gcc` on Linux, `clang` on macOS, or MSVC on Windows).1 +- `cargo install`: The traditional method of compiling from source using Rust's + package manager. This requires a C compiler to be present on the system + (`gcc` on Linux, `clang` on macOS, or MSVC on Windows).1 Bash - ``` + ```bash cargo install srgn ``` -- **Package Managers**: `srgn` is available through several system package managers, offering convenient installation and updates 1: +- **Package Managers**: `srgn` is available through several system package + managers, offering convenient installation and updates 1: - **Homebrew (macOS/Linux):** `brew install srgn` @@ -72,302 +127,484 @@ A core tenet of `srgn` is its intentional simplicity. The documentation states i - **MacPorts (macOS):** `sudo port install srgn` -For integration into automated workflows, a GitHub Action is available for `cargo-binstall`, allowing for easy installation of `srgn` in CI/CD pipelines.1 +For integration into automated workflows, a GitHub Action is available for +`cargo-binstall`, allowing for easy installation of `srgn` in CI/CD pipelines.1 ### 2.2 The Anatomy of a srgn Command -The fundamental structure of a `srgn` command is analogous to familiar Unix tools, making it intuitive for experienced users. The general syntax is: +The fundamental structure of a `srgn` command is analogous to familiar Unix +tools, making it intuitive for experienced users. The general syntax is: `srgn '' -- ''` -Each component has a distinct role, as illustrated by the canonical `tr`-like example from the documentation 1: +Each component has a distinct role, as illustrated by the canonical `tr`-like +example from the documentation 1: Bash -``` +```bash echo 'Hello World!' | srgn '[wW]orld' -- 'there' # Output: Hello there! ``` -- \`\`: These are flags that specify Actions (e.g., `--upper`, `--delete`) or language-aware grammar Scopes (e.g., `--python`, `--rust`). +- \`\`: These are flags that specify Actions (e.g., `--upper`, `--delete`) or + language-aware grammar Scopes (e.g., `--python`, `--rust`). -- `''`: This is the mandatory, positional regular expression that defines the final layer of text to be matched. In the example, it's `'[wW]orld'`. +- `''`: This is the mandatory, positional regular expression that defines the + final layer of text to be matched. In the example, it's `'[wW]orld'`. -- \`\`: These are optional file or directory paths. If omitted, `srgn` reads from standard input (`stdin`). If a directory is provided, `srgn` performs a high-speed, recursive search for relevant files based on extensions and shebangs.1 +- \`\`: These are optional file or directory paths. If omitted, `srgn` reads + from standard input (`stdin`). If a directory is provided, `srgn` performs a + high-speed, recursive search for relevant files based on extensions and + shebangs.1 -- `-- ''`: This is the optional replacement string. The `--` separator is a critical safety feature that disambiguates the replacement string from file paths or other arguments, especially when the replacement itself might resemble a flag.1 +- `-- ''`: This is the optional replacement string. The `--` separator is a + critical safety feature that disambiguates the replacement string from file + paths or other arguments, especially when the replacement itself might + resemble a flag.1 -If no replacement string or action flags are provided, `srgn` may enter its "search mode," which transforms it into a powerful, syntax-aware search tool.1 +If no replacement string or action flags are provided, `srgn` may enter its +"search mode," which transforms it into a powerful, syntax-aware search tool.1 ### 2.3 Search Mode: ripgrep with Syntactic Superpowers -When a language flag (e.g., `--python` or its shorthand `--py` 9) is provided without any accompanying actions or a replacement string, +When a language flag (e.g., `--python` or its shorthand `--py` 9) is provided +without any accompanying actions or a replacement string, -`srgn` enters search mode.1 The documentation describes this mode as "'ripgrep but with syntactical language elements'".2 +`srgn` enters search mode.1 The documentation describes this mode as "'ripgrep +but with syntactical language elements'".2 For instance, to find all class definitions in a Python project, one could run: Bash -``` +```bash srgn --python 'class'. ``` -The output mimics `grep` and `ripgrep`, prepending the file name and line number to each match, making it easy to integrate into standard command-line workflows.2 +The output mimics `grep` and `ripgrep`, prepending the file name and line +number to each match, making it easy to integrate into standard command-line +workflows.2 -This mode is not only precise but also exceptionally fast. A benchmark cited in the documentation demonstrates its performance: `srgn` can find approximately 140,000 occurrences of a regex pattern within Go string literals across the entire Kubernetes codebase (\~3 million lines of code) in under 3 seconds on a modern multi-core machine.1 This combination of speed and syntactic precision makes search mode a formidable tool for code exploration and auditing. +This mode is not only precise but also exceptionally fast. A benchmark cited in +the documentation demonstrates its performance: `srgn` can find approximately +140,000 occurrences of a regex pattern within Go string literals across the +entire Kubernetes codebase (\~3 million lines of code) in under 3 seconds on a +modern multi-core machine.1 This combination of speed and syntactic precision +makes search mode a formidable tool for code exploration and auditing. ## Part 3: The Core Concept - Surgical Scoping ### 3.1 What srgn Means by 'Scope': Textual Regions, Not Semantic Namespaces -The term "scope" carries significant weight in programming, often referring to semantic concepts of visibility and lifetime, such as Python's LEGB rule (Local, Enclosing, Global, Built-in) or Rust's complex ownership and lifetime scopes.10 A critical step in mastering +The term "scope" carries significant weight in programming, often referring to +semantic concepts of visibility and lifetime, such as Python's LEGB rule +(Local, Enclosing, Global, Built-in) or Rust's complex ownership and lifetime +scopes.10 A critical step in mastering `srgn` is understanding that its use of the term is different. -In `srgn`, a "language grammar-aware scope" does not refer to a semantic namespace but to a **textual region** of the source code that corresponds to a specific node in its Abstract Syntax Tree (AST), as parsed by `tree-sitter`.2 For example, the +In `srgn`, a "language grammar-aware scope" does not refer to a semantic +namespace but to a **textual region** of the source code that corresponds to a +specific node in its Abstract Syntax Tree (AST), as parsed by `tree-sitter`.2 +For example, the -`--python 'function'` scope selects the entire block of text that constitutes a function definition, from the `def` keyword to the end of its body. It does not understand which variables are accessible within that function. +`--python 'function'` scope selects the entire block of text that constitutes a +function definition, from the `def` keyword to the end of its body. It does not +understand which variables are accessible within that function. -This distinction is paramount. `srgn` operates on the code's grammatical structure, not its compiled or interpreted meaning. It can identify all comments, all string literals, or all function definitions, but it cannot resolve a variable name to its declaration. This focus on syntactic structure is the source of its speed and simplicity, but it also defines the boundaries of its capabilities. +This distinction is paramount. `srgn` operates on the code's grammatical +structure, not its compiled or interpreted meaning. It can identify all +comments, all string literals, or all function definitions, but it cannot +resolve a variable name to its declaration. This focus on syntactic structure +is the source of its speed and simplicity, but it also defines the boundaries +of its capabilities. ### 3.2 The Scoping Pipeline: Layering with Logical AND -The precision of `srgn` comes from its default mechanism of combining scopes: a left-to-right, progressively narrowing filter that acts as a logical AND.2 Each subsequent scope operates only on the text that was passed through by the previous one. +The precision of `srgn` comes from its default mechanism of combining scopes: a +left-to-right, progressively narrowing filter that acts as a logical AND.2 Each +subsequent scope operates only on the text that was passed through by the +previous one. Consider the following command: Bash -``` +```bash # Find all occurrences of 'github.com' but only inside docstrings of Python classes. srgn --python 'class' --python 'doc-strings' 'github\.com' my_project/ ``` The execution pipeline for this command is as follows: -1. **Initial Scope**: `srgn` first parses all files in `my_project/` and identifies the textual regions of all `class` definitions. All other code is discarded from consideration. +1. **Initial Scope**: `srgn` first parses all files in `my_project/` and + identifies the textual regions of all `class` definitions. All other code is + discarded from consideration. -2. **Intersection**: *Within the text of the class definitions only*, it then identifies all regions corresponding to `doc-strings`. +2. **Intersection**: *Within the text of the class definitions only*, it then + identifies all regions corresponding to `doc-strings`. -3. **Final Match**: Finally, *within the text of those docstrings only*, it applies the regex `'github\.com'` to find the ultimate matches. +3. **Final Match**: Finally, *within the text of those docstrings only*, it + applies the regex `'github\.com'` to find the ultimate matches. -This directional, filtering nature means the order of scopes is crucial. The documentation provides a clear example of a nonsensical query, `srgn --python 'doc-strings' --python 'class'`, which would attempt to find a class definition *inside* a docstring and would almost certainly return no results.1 This illustrates the power and predictability of the intersectional pipeline. +This directional, filtering nature means the order of scopes is crucial. The +documentation provides a clear example of a nonsensical query, +`srgn --python 'doc-strings' --python 'class'`, which would attempt to find a +class definition *inside* a docstring and would almost certainly return no +results.1 This illustrates the power and predictability of the intersectional +pipeline. ### 3.3 Broadening the Search: Joining Scopes with Logical OR -While the default AND logic is excellent for drilling down, some tasks require a broader search across different types of syntax. For this, `srgn` provides the `--join-language-scopes` flag (or its shorthand, `-j`).2 This flag alters the behavior for language scopes, changing the operation from intersection (AND) to a union (OR). +While the default AND logic is excellent for drilling down, some tasks require +a broader search across different types of syntax. For this, `srgn` provides +the `--join-language-scopes` flag (or its shorthand, `-j`).2 This flag alters +the behavior for language scopes, changing the operation from intersection +(AND) to a union (OR). A practical example from the release notes demonstrates its utility 9: Bash -``` +```bash # Find all TODOs, whether they are in comments or docstrings. srgn -j --python comments --python doc-strings 'TODO:' src/ ``` -Without the `-j` flag, this command would nonsensically search for docstrings *inside* of comments. With `-j`, it creates a combined scope of all text that is *either* a comment *or* a docstring, and then applies the `'TODO:'` regex to that combined set. This is a common and powerful pattern for code maintenance tasks. +Without the `-j` flag, this command would nonsensically search for docstrings +*inside* of comments. With `-j`, it creates a combined scope of all text that +is *either* a comment *or* a docstring, and then applies the `'TODO:'` regex to +that combined set. This is a common and powerful pattern for code maintenance +tasks. ### 3.4 The Two Fundamental Scope Types -To summarize, all `srgn` operations are built from two fundamental types of scopes: +To summarize, all `srgn` operations are built from two fundamental types of +scopes: -1. **Language Grammar Scopes**: These are the predefined syntactic elements specified with the `-- ''` syntax (e.g., `--python 'class'`, `--rust 'unsafe'`). They leverage `tree-sitter` to provide the foundational context awareness that sets `srgn` apart.1 A reference list of known scopes is provided in the Appendix. +1. **Language Grammar Scopes**: These are the predefined syntactic elements + specified with the `-- ''` syntax (e.g., + `--python 'class'`, `--rust 'unsafe'`). They leverage `tree-sitter` to + provide the foundational context awareness that sets `srgn` apart.1 A + reference list of known scopes is provided in the Appendix. -2. **Regular Expression Scope**: This is the mandatory, positional argument that provides the final, fine-grained pattern matching. It is always the last filter applied in the pipeline, operating only on the text selected by the preceding language scopes.2 +2. **Regular Expression Scope**: This is the mandatory, positional argument + that provides the final, fine-grained pattern matching. It is always the + last filter applied in the pipeline, operating only on the text selected by + the preceding language scopes.2 ## Part 4: Taking Action - Manipulation and Refactoring ### 4.1 Simple and Dynamic Replacement -The simplest action in `srgn` is replacement, specified with the `-- 'replacement'` syntax. However, for any meaningful refactoring, dynamic replacements are essential. `srgn` supports this through regex capture groups (`$1`, `$2`, etc.), which substitute parts of the matched text into the replacement string.2 +The simplest action in `srgn` is replacement, specified with the +`-- 'replacement'` syntax. However, for any meaningful refactoring, dynamic +replacements are essential. `srgn` supports this through regex capture groups +(`$1`, `$2`, etc.), which substitute parts of the matched text into the +replacement string.2 -A rich example from the documentation showcases several advanced features at once 2: +A rich example from the documentation showcases several advanced features at +once 2: Bash -``` +```bash srgn --python 'doc-strings' '(?` or `--py `) - - -

Scope Name

Description

Example Command

class

Selects entire class definitions, from class to the end of the block.

srgn --py 'class' 'MyClass'

function

Selects entire function definitions, from def to the end of the block.

srgn --py 'function' 'my_func'

doc-strings

Selects the content of docstrings ("""...""" or '''...''').

srgn --py 'doc-strings' 'TODO'

comments

Selects the content of line comments (#...).

srgn --py 'comments' 'FIXME'

strings

Selects the content of all string literals.

srgn --py 'strings' 'hardcoded-secret'

identifiers

Selects language identifiers (variable names, function names, etc.).

srgn --py 'identifiers' '^temp_\w+'

module-names-in-imports

Selects only the module names in import and from... import statements.

srgn --py 'module-names-in-imports' 'old_lib'

call

Selects entire function or method call expressions (e.g., foo(bar, baz)).

srgn --py 'call' '^print\('

+| Scope Name | Description | Example Command | +| ----------------------- | ------------------------------------------------------------------------- | --------------------------------------------- | +| class | Selects entire class definitions, from class to the end of the block. | srgn --py 'class' 'MyClass' | +| function | Selects entire function definitions, from def to the end of the block. | srgn --py 'function' 'my_func' | +| doc-strings | Selects the content of docstrings ("""...""" or '''...'''). | srgn --py 'doc-strings' 'TODO' | +| comments | Selects the content of line comments (#...). | srgn --py 'comments' 'FIXME' | +| strings | Selects the content of all string literals. | srgn --py 'strings' 'hardcoded-secret' | +| identifiers | Selects language identifiers (variable names, function names, etc.). | srgn --py 'identifiers' '^temp_\w+' | +| module-names-in-imports | Selects only the module names in import and from... import statements. | srgn --py 'module-names-in-imports' 'old_lib' | +| call | Selects entire function or method call expressions (e.g., foo(bar, baz)). | srgn --py 'call' '^print\(' | ### A.3 Table: Rust Grammar Scopes (`--rust ` or `--rs `) - - -

Scope Name

Description

Example Command

unsafe

Selects unsafe blocks and unsafe function definitions.

srgn --rs 'unsafe' '.'

comments

Selects the content of line (//) and block (/*... */) comments.

srgn --rs 'comments' 'HACK'

strings

Selects the content of all string literals.

srgn --rs 'strings' 'password'

attribute

Selects the content of attributes (#[...] and #![...]).

srgn --rs 'attribute' 'deprecated'

names-in-uses-declarations

Selects only the crate/module paths within use statements.

srgn --rs 'names-in-uses-declarations' 'old_crate'

pub-enum

Selects public enum definitions.

srgn --rs 'pub-enum' 'MyEnum'

type-identifier

Selects identifiers that refer to a type.

srgn --rs 'pub-enum' --rs 'type-identifier' 'Subgenre'

struct

Selects struct definitions.

srgn --rs 'struct' 'RequestPayload'

impl

Selects impl blocks.

srgn --rs 'impl' 'MyTrait for MyStruct'

fn

Selects function definitions.

srgn --rs 'fn' 'main'

extern-crate

Selects extern crate...; declarations.

srgn --rs 'extern-crate' 'libc'

+| Scope Name | Description | Example Command | +| -------------------------- | -------------------------------------------------------------- | ------------------------------------------------------ | +| unsafe | Selects unsafe blocks and unsafe function definitions. | srgn --rs 'unsafe' '.' | +| comments | Selects the content of line (//) and block (/*...*/) comments. | srgn --rs 'comments' 'HACK' | +| strings | Selects the content of all string literals. | srgn --rs 'strings' 'password' | +| attribute | Selects the content of attributes (#[...] and #![...]). | srgn --rs 'attribute' 'deprecated' | +| names-in-uses-declarations | Selects only the crate/module paths within use statements. | srgn --rs 'names-in-uses-declarations' 'old_crate' | +| pub-enum | Selects public enum definitions. | srgn --rs 'pub-enum' 'MyEnum' | +| type-identifier | Selects identifiers that refer to a type. | srgn --rs 'pub-enum' --rs 'type-identifier' 'Subgenre' | +| struct | Selects struct definitions. | srgn --rs 'struct' 'RequestPayload' | +| impl | Selects impl blocks. | srgn --rs 'impl' 'MyTrait for MyStruct' | +| fn | Selects function definitions. | srgn --rs 'fn' 'main' | +| extern-crate | Selects extern crate...; declarations. | srgn --rs 'extern-crate' 'libc' | ## Works Cited - 1. alexpovel/srgn: A grep-like tool which understands source code syntax and allows for manipulation in addition to search - GitHub, accessed on July 11, 2025, + 1. alexpovel/srgn: A grep-like tool which understands source code syntax and + allows for manipulation in addition to search - GitHub, accessed on July + 11, 2025, - 2. srgn/[README.md](http://README.md) at main · alexpovel/srgn · GitHub, accessed on July 11, 2025, + 2. srgn/[README.md](http://README.md) at main · alexpovel/srgn · GitHub, + accessed on July 11, 2025, + - 3. Lornatang/SRGAN-PyTorch: A simple and complete implementation of super-resolution paper. - GitHub, accessed on July 11, 2025, + 3. Lornatang/SRGAN-PyTorch: A simple and complete implementation of + super-resolution paper. - GitHub, accessed on July 11, 2025, + - 4. hep-lbdl/SRGN - GitHub, accessed on July 11, 2025, + 4. hep-lbdl/SRGN - GitHub, accessed on July 11, 2025, + - 5. Security - hep-lbdl/SRGN - GitHub, accessed on July 11, 2025, + 5. Security - hep-lbdl/SRGN - GitHub, accessed on July 11, 2025, + - 6. How to Open and Manage Leveraged $SRGN (SolRagon) Trades on Hyperliquid: A Beginner's Tutorial · Issue #5 · synthesizearrayHSy/generatemonitorGhZ - GitHub, accessed on July 11, 2025, + 6. How to Open and Manage Leveraged $SRGN (SolRagon) Trades on Hyperliquid: A + Beginner's Tutorial · Issue #5 · synthesizearrayHSy/generatemonitorGhZ - + GitHub, accessed on July 11, 2025, + - 7. srgn - Rust - [Docs.rs](http://Docs.rs), accessed on July 11, 2025, + 7. srgn - Rust - [Docs.rs](http://Docs.rs), accessed on July 11, 2025, + - 8. Pattern syntax - Semgrep, accessed on July 11, 2025, + 8. Pattern syntax - Semgrep, accessed on July 11, 2025, + - 9. Releases · alexpovel/srgn - GitHub, accessed on July 11, 2025, + 9. Releases · alexpovel/srgn - GitHub, accessed on July 11, 2025, + -10. Python Scope & the LEGB Rule: Resolving Names in Your Code, accessed on July 11, 2025, +10. Python Scope & the LEGB Rule: Resolving Names in Your Code, accessed on + July 11, 2025, -11. Scopes - The Rust Reference, accessed on July 11, 2025, +11. Scopes - The Rust Reference, accessed on July 11, 2025, + -12. I can't understand the Rust "scope" definition (Rust Programming Language, 2nd Ed. Klabnik & Nichols) - Stack Overflow, accessed on July 11, 2025, +12. I can't understand the Rust "scope" definition (Rust Programming Language, + 2nd Ed. Klabnik & Nichols) - Stack Overflow, accessed on July 11, 2025, + -13. betterletter/[README.md](http://README.md) at main · alexpovel/betterletter · GitHub, accessed on July 11, 2025, +13. betterletter/[README.md](http://README.md) at main · alexpovel/betterletter + · GitHub, accessed on July 11, 2025, + -14. srgn - Rust Package Registry - [Crates.io](http://Crates.io), accessed on July 11, 2025, +14. srgn - Rust Package Registry - [Crates.io](http://Crates.io), accessed on + July 11, 2025, -15. accessed on January 1, 1970, +15. accessed on January 1, 1970, + -16. accessed on January 1, 1970, +16. accessed on January 1, 1970, + diff --git a/src/runner.rs b/src/runner.rs index 210eeb07..9976127c 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -10,12 +10,15 @@ use anyhow::{Context, Result}; use serde_json; use std::fs; use std::io::{self, BufRead, BufReader, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; use std::thread; use tempfile::{Builder, NamedTempFile}; use tracing::{debug, info}; +/// Default Ninja executable to invoke. +const NINJA_PROGRAM: &str = "ninja"; + #[derive(Debug, Clone)] pub struct NinjaContent(String); impl NinjaContent { @@ -46,16 +49,16 @@ impl CommandArg { } } -#[derive(Debug, Clone)] -pub struct BuildTargets(Vec); -impl BuildTargets { +#[derive(Debug, Clone, Copy)] +pub struct BuildTargets<'a>(&'a [String]); +impl<'a> BuildTargets<'a> { #[must_use] - pub fn new(targets: Vec) -> Self { + pub fn new(targets: &'a [String]) -> Self { Self(targets) } #[must_use] - pub fn as_slice(&self) -> &[String] { - &self.0 + pub fn as_slice(&self) -> &'a [String] { + self.0 } #[must_use] pub fn is_empty(&self) -> bool { @@ -107,15 +110,19 @@ pub fn run(cli: &Cli) -> Result<()> { /// ``` fn handle_build(cli: &Cli, args: &BuildArgs) -> Result<()> { let ninja = generate_ninja(cli)?; - let targets = BuildTargets::new(args.targets.clone()); + let targets = BuildTargets::new(&args.targets); - if let Some(path) = &args.emit { + // Normalise the build file path and keep the temporary file alive for the + // duration of the Ninja invocation. + let (build_path, _tmp): (PathBuf, Option) = if let Some(path) = &args.emit { write_ninja_file(path, &ninja)?; - run_ninja(Path::new("ninja"), cli, path, &targets)?; + (path.clone(), None) } else { let tmp = create_temp_ninja_file(&ninja)?; - run_ninja(Path::new("ninja"), cli, tmp.path(), &targets)?; - } + (tmp.path().to_path_buf(), Some(tmp)) + }; + + run_ninja(Path::new(NINJA_PROGRAM), cli, &build_path, &targets)?; Ok(()) } @@ -192,9 +199,9 @@ fn generate_ninja(cli: &Cli) -> Result { /// Determine the manifest path respecting the CLI's directory option. /// /// # Examples -/// ``` -/// use netsuke::cli::Cli; -/// use netsuke::runner::resolve_manifest_path; +/// ```ignore +/// use crate::cli::Cli; +/// use crate::runner::resolve_manifest_path; /// let cli = Cli { file: "Netsukefile".into(), directory: None, jobs: None, verbose: false, command: None }; /// assert!(resolve_manifest_path(&cli).ends_with("Netsukefile")); /// ``` @@ -284,7 +291,7 @@ pub fn run_ninja( program: &Path, cli: &Cli, build_file: &Path, - targets: &BuildTargets, + targets: &BuildTargets<'_>, ) -> io::Result<()> { let mut cmd = Command::new(program); if let Some(dir) = &cli.directory { diff --git a/tests/runner_tests.rs b/tests/runner_tests.rs index d0eef025..a8e76afd 100644 --- a/tests/runner_tests.rs +++ b/tests/runner_tests.rs @@ -40,7 +40,7 @@ fn run_ninja_not_found() { targets: vec![], })), }; - let targets = BuildTargets::new(vec![]); + let targets = BuildTargets::new(&[]); let err = run_ninja( Path::new("does-not-exist"), &cli, @@ -54,7 +54,7 @@ fn run_ninja_not_found() { #[rstest] #[serial] fn run_executes_ninja_without_persisting_file() { - let (ninja_dir, ninja_path) = support::fake_ninja(0); + let (ninja_dir, ninja_path) = support::fake_ninja_check_build_file(); let original_path = std::env::var_os("PATH").unwrap_or_default(); let mut paths: Vec<_> = std::env::split_paths(&original_path).collect(); paths.insert(0, ninja_dir.path().to_path_buf()); @@ -93,7 +93,7 @@ fn run_executes_ninja_without_persisting_file() { #[test] #[serial] fn run_build_with_emit_keeps_file() { - let (ninja_dir, ninja_path) = support::fake_ninja(0); + let (ninja_dir, ninja_path) = support::fake_ninja_check_build_file(); let original_path = std::env::var_os("PATH").unwrap_or_default(); let mut paths: Vec<_> = std::env::split_paths(&original_path).collect(); paths.insert(0, ninja_dir.path().to_path_buf()); diff --git a/tests/steps/process_steps.rs b/tests/steps/process_steps.rs index 7edbc961..00c7ce0a 100644 --- a/tests/steps/process_steps.rs +++ b/tests/steps/process_steps.rs @@ -108,7 +108,7 @@ fn run(world: &mut CliWorld) { } else { Path::new("ninja") }; - let targets = BuildTargets::new(vec![]); + let targets = BuildTargets::new(&[]); match runner::run_ninja(program, cli, Path::new("build.ninja"), &targets) { Ok(()) => { world.run_status = Some(true); diff --git a/tests/support/mod.rs b/tests/support/mod.rs index caba6283..1b339506 100644 --- a/tests/support/mod.rs +++ b/tests/support/mod.rs @@ -14,6 +14,11 @@ use tracing_subscriber::fmt; /// Create a fake Ninja executable that exits with `exit_code`. /// /// Returns the temporary directory and the path to the executable. +#[allow( + unfulfilled_lint_expectations, + reason = "used only in some test crates", +)] +#[expect(dead_code, reason = "used in CLI behaviour tests")] pub fn fake_ninja(exit_code: i32) -> (TempDir, PathBuf) { let dir = TempDir::new().expect("temp dir"); let path = dir.path().join("ninja"); From d1378516eb96b1ae4e997855ea673a256745e0ea Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 8 Aug 2025 14:33:38 +0100 Subject: [PATCH 3/6] Remove trailing comma from lint attribute --- tests/support/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/support/mod.rs b/tests/support/mod.rs index 1b339506..3a8e84bc 100644 --- a/tests/support/mod.rs +++ b/tests/support/mod.rs @@ -16,7 +16,7 @@ use tracing_subscriber::fmt; /// Returns the temporary directory and the path to the executable. #[allow( unfulfilled_lint_expectations, - reason = "used only in some test crates", + reason = "used only in some test crates" )] #[expect(dead_code, reason = "used in CLI behaviour tests")] pub fn fake_ninja(exit_code: i32) -> (TempDir, PathBuf) { From 452c9d9992421fb1737484eeda89260ef9c89a46 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 8 Aug 2025 15:33:22 +0100 Subject: [PATCH 4/6] Apply mdtablefix to srgn doc --- docs/srgn.md | 118 ++++++++++++++++++++++++++------------------------- 1 file changed, 61 insertions(+), 57 deletions(-) diff --git a/docs/srgn.md b/docs/srgn.md index 6c661b41..73308493 100644 --- a/docs/srgn.md +++ b/docs/srgn.md @@ -13,14 +13,15 @@ prevents them from reliably performing context-aware refactoring, where a change in one syntactic location (e.g., a function signature) should not affect another (e.g., a string literal). -This is the precise gap that `srgn`, the "code surgeon," is designed to fill.1 -It operates as a powerful hybrid, blending the regex-based pattern matching of +This is the precise gap that `srgn`, the "code surgeon," is designed to +fill.[^1] It operates as a powerful hybrid, blending the regex-based pattern +matching of `grep`, the stream-editing capabilities of `tr` and `sed`, and the syntactic -intelligence of the `tree-sitter` parsing framework.1 +intelligence of the `tree-sitter` parsing framework.[^1] `srgn` complements traditional tools by operating on a different "dimension" of -code analysis.1 It is not a replacement for full-featured IDE refactoring +code analysis.[^1] It is not a replacement for full-featured IDE refactoring engines but a specialized scalpel for tasks that are too complex for a simple regex and too specific for a generic IDE command. @@ -40,17 +41,17 @@ complex cleanups. To ensure clarity, it is essential to acknowledge that the name "srgn" is overloaded across different domains. This guide is exclusively dedicated to -`alexpovel/srgn`, the command-line code search and manipulation utility.1 Other -projects bearing a similar name are unrelated to the tool discussed here. These -include, but are not limited to, SRGAN, a Generative Adversarial Network for -image super-resolution 3; SRGN, a high-energy physics technique for parameter -estimation 4; and SRGN (SolRagon), a cryptocurrency token.6 This report focuses -solely on the code refactoring tool. +`alexpovel/srgn`, the command-line code search and manipulation utility.[^1] +Other projects bearing a similar name are unrelated to the tool discussed here. +These include, but are not limited to, SRGAN, a Generative Adversarial Network +for image super-resolution 3; SRGN, a high-energy physics technique for +parameter estimation 4; and SRGN (SolRagon), a cryptocurrency token.[^6] This +report focuses solely on the code refactoring tool. ### 1.3 Core Philosophy: Scopes, Actions, and Intentional Simplicity The design of `srgn` is built upon two foundational pillars: **Scopes** and -**Actions**.2 Scopes define +**Actions**.[^2] Scopes define *where* in the code an operation should take place, while Actions define *what* should be done to the text within that scope. This separation of concerns is @@ -58,7 +59,7 @@ central to the tool's power and usability. A core tenet of `srgn` is its intentional simplicity. The documentation states its design goal clearly: "if you know regex and the basics of the language you -are working with, you are good to go".2 This philosophy distinguishes +are working with, you are good to go".[^2] This philosophy distinguishes `srgn` from other advanced code-querying tools. While tools like Semgrep use a declarative, template-based syntax with metavariables (`$X`) and ellipses @@ -87,12 +88,12 @@ supported 1: - **Prebuilt Binaries**: The most straightforward method is to download a prebuilt binary for your specific architecture directly from the project's - GitHub Releases page.1 + GitHub Releases page.[^1] - `cargo-binstall`: For users with the Rust toolchain, this is the recommended installation method. It is significantly faster than compiling from source as it downloads prebuilt binaries when available. It is tested in the project's - CI and serves as a reliable installation vector.1 + CI and serves as a reliable installation vector.[^1] Bash @@ -107,7 +108,7 @@ supported 1: - `cargo install`: The traditional method of compiling from source using Rust's package manager. This requires a C compiler to be present on the system - (`gcc` on Linux, `clang` on macOS, or MSVC on Windows).1 + (`gcc` on Linux, `clang` on macOS, or MSVC on Windows).[^1] Bash @@ -128,7 +129,8 @@ supported 1: - **MacPorts (macOS):** `sudo port install srgn` For integration into automated workflows, a GitHub Action is available for -`cargo-binstall`, allowing for easy installation of `srgn` in CI/CD pipelines.1 +`cargo-binstall`, allowing for easy installation of `srgn` in CI/CD +pipelines.[^1] ### 2.2 The Anatomy of a srgn Command @@ -156,23 +158,24 @@ echo 'Hello World!' | srgn '[wW]orld' -- 'there' - \`\`: These are optional file or directory paths. If omitted, `srgn` reads from standard input (`stdin`). If a directory is provided, `srgn` performs a high-speed, recursive search for relevant files based on extensions and - shebangs.1 + shebangs.[^1] - `-- ''`: This is the optional replacement string. The `--` separator is a critical safety feature that disambiguates the replacement string from file paths or other arguments, especially when the replacement itself might - resemble a flag.1 + resemble a flag.[^1] If no replacement string or action flags are provided, `srgn` may enter its -"search mode," which transforms it into a powerful, syntax-aware search tool.1 +"search mode," which transforms it into a powerful, syntax-aware search +tool.[^1] ### 2.3 Search Mode: ripgrep with Syntactic Superpowers When a language flag (e.g., `--python` or its shorthand `--py` 9) is provided without any accompanying actions or a replacement string, -`srgn` enters search mode.1 The documentation describes this mode as "'ripgrep -but with syntactical language elements'".2 +`srgn` enters search mode.[^1] The documentation describes this mode as +"'ripgrep but with syntactical language elements'".[^2] For instance, to find all class definitions in a Python project, one could run: @@ -184,14 +187,14 @@ srgn --python 'class'. The output mimics `grep` and `ripgrep`, prepending the file name and line number to each match, making it easy to integrate into standard command-line -workflows.2 +workflows.[^2] This mode is not only precise but also exceptionally fast. A benchmark cited in the documentation demonstrates its performance: `srgn` can find approximately 140,000 occurrences of a regex pattern within Go string literals across the entire Kubernetes codebase (\~3 million lines of code) in under 3 seconds on a -modern multi-core machine.1 This combination of speed and syntactic precision -makes search mode a formidable tool for code exploration and auditing. +modern multi-core machine.[^1] This combination of speed and syntactic +precision makes search mode a formidable tool for code exploration and auditing. ## Part 3: The Core Concept - Surgical Scoping @@ -200,14 +203,14 @@ makes search mode a formidable tool for code exploration and auditing. The term "scope" carries significant weight in programming, often referring to semantic concepts of visibility and lifetime, such as Python's LEGB rule (Local, Enclosing, Global, Built-in) or Rust's complex ownership and lifetime -scopes.10 A critical step in mastering +scopes.[^10] A critical step in mastering `srgn` is understanding that its use of the term is different. In `srgn`, a "language grammar-aware scope" does not refer to a semantic namespace but to a **textual region** of the source code that corresponds to a -specific node in its Abstract Syntax Tree (AST), as parsed by `tree-sitter`.2 -For example, the +specific node in its Abstract Syntax Tree (AST), as parsed by +`tree-sitter`.[^2] For example, the `--python 'function'` scope selects the entire block of text that constitutes a function definition, from the `def` keyword to the end of its body. It does not @@ -223,8 +226,8 @@ of its capabilities. ### 3.2 The Scoping Pipeline: Layering with Logical AND The precision of `srgn` comes from its default mechanism of combining scopes: a -left-to-right, progressively narrowing filter that acts as a logical AND.2 Each -subsequent scope operates only on the text that was passed through by the +left-to-right, progressively narrowing filter that acts as a logical AND.[^2] +Each subsequent scope operates only on the text that was passed through by the previous one. Consider the following command: @@ -252,16 +255,16 @@ This directional, filtering nature means the order of scopes is crucial. The documentation provides a clear example of a nonsensical query, `srgn --python 'doc-strings' --python 'class'`, which would attempt to find a class definition *inside* a docstring and would almost certainly return no -results.1 This illustrates the power and predictability of the intersectional -pipeline. +results.[^1] This illustrates the power and predictability of the +intersectional pipeline. ### 3.3 Broadening the Search: Joining Scopes with Logical OR While the default AND logic is excellent for drilling down, some tasks require a broader search across different types of syntax. For this, `srgn` provides -the `--join-language-scopes` flag (or its shorthand, `-j`).2 This flag alters -the behavior for language scopes, changing the operation from intersection -(AND) to a union (OR). +the `--join-language-scopes` flag (or its shorthand, `-j`).[^2] This flag +alters the behavior for language scopes, changing the operation from +intersection (AND) to a union (OR). A practical example from the release notes demonstrates its utility 9: @@ -286,13 +289,13 @@ scopes: 1. **Language Grammar Scopes**: These are the predefined syntactic elements specified with the `-- ''` syntax (e.g., `--python 'class'`, `--rust 'unsafe'`). They leverage `tree-sitter` to - provide the foundational context awareness that sets `srgn` apart.1 A + provide the foundational context awareness that sets `srgn` apart.[^1] A reference list of known scopes is provided in the Appendix. 2. **Regular Expression Scope**: This is the mandatory, positional argument that provides the final, fine-grained pattern matching. It is always the last filter applied in the pipeline, operating only on the text selected by - the preceding language scopes.2 + the preceding language scopes.[^2] ## Part 4: Taking Action - Manipulation and Refactoring @@ -302,7 +305,7 @@ The simplest action in `srgn` is replacement, specified with the `-- 'replacement'` syntax. However, for any meaningful refactoring, dynamic replacements are essential. `srgn` supports this through regex capture groups (`$1`, `$2`, etc.), which substitute parts of the matched text into the -replacement string.2 +replacement string.[^2] A rich example from the documentation showcases several advanced features at once 2: @@ -330,38 +333,39 @@ This command deconstructs as follows: Beyond simple replacement, `srgn` offers a suite of built-in actions specified via command-line flags. These actions are applied in a defined order *after* -the main replacement has occurred.2 +the main replacement has occurred.[^2] The command `srgn --upper '[wW]orld' -- 'you'` illustrates this two-stage process. First, the regex match `World` is replaced with `you`. Second, the -`--upper` action is applied to that result, yielding the final output `YOU`.2 +`--upper` action is applied to that result, yielding the final output `YOU`.[^2] Common built-in action flags include: -- `--upper`, `--lower`, `--titlecase`: For changing the case of matched text.2 +- `--upper`, `--lower`, `--titlecase`: For changing the case of matched + text.[^2] - `--delete`: Removes the matched text. As a safety measure, this action will produce an error if no scope is specified, preventing the accidental deletion - of an entire file's content.1 + of an entire file's content.[^1] - `--squeeze`: Collapses sequences of whitespace. Like `--delete`, this - requires an explicit scope.1 + requires an explicit scope.[^1] - `--german`: A specialized action that correctly handles German orthography, such as converting "Ueberflieger" to "Überflieger," demonstrating the - potential for domain-specific transformations.7 + potential for domain-specific transformations.[^7] ### 4.3 In-place File Modification and Operational Safety To apply changes directly to files on disk, one can provide a path to `srgn` instead of piping from `stdin`. For more complex file selections, the `--glob` -option accepts a glob pattern.1 +option accepts a glob pattern.[^1] It is crucial to heed the official documentation's warning: `srgn` is currently in beta (major version 0). **Any in-place modifications should only be -performed on files that are safely under version control**.1 +performed on files that are safely under version control**.[^1] -To mitigate risk, the `--dry-run` flag is an indispensable safety feature.9 +To mitigate risk, the `--dry-run` flag is an indispensable safety feature.[^9] When used, `srgn` will print a `diff`-like output of the changes it *would* make without @@ -390,8 +394,8 @@ challenges by combining `srgn`'s scoping and action capabilities. - **Explanation**: This command's precision comes from the `'module-names-in-imports'` grammar scope, a feature highlighted in the - project's release notes.9 This scope surgically targets only the module names - within + project's release notes.[^9] This scope surgically targets only the module + names within `import` and `from... import` statements, completely avoiding the risk of altering variables or strings that happen to contain the text `old_utils`. @@ -468,7 +472,7 @@ showcasing `srgn`'s versatility across different languages. `#[expect(some_lint)]`. This ensures that if the underlying code is fixed and no longer triggers the lint, the build will fail, forcing the removal of the now-unnecessary attribute. This exact use case is mentioned as an example in - the `srgn` documentation.2 + the `srgn` documentation.[^2] - **Command**: @@ -526,7 +530,7 @@ showcasing `srgn`'s versatility across different languages. - **Explanation**: This operation's surgical precision is enabled by the `'names-in-uses-declarations'` scope, a powerful feature documented in the - release notes.9 This scope targets + release notes.[^9] This scope targets *only* the paths inside `use...;` statements. It will correctly change `use old_api::prelude::*;` to `use new_api::prelude::*;` and @@ -542,7 +546,7 @@ showcasing `srgn`'s versatility across different languages. For the most demanding refactoring tasks, `srgn` offers an escape hatch beyond the command line. It is a dual-use tool, available not only as a binary but -also as a Rust library that can be added to a project with `cargo add srgn`.7 +also as a Rust library that can be added to a project with `cargo add srgn`.[^7] This library interface provides the ultimate level of control for power users. For extremely complex, multi-pass, or stateful refactoring scenarios where the @@ -610,20 +614,20 @@ modifications that would otherwise be tedious and error-prone. ## Appendix: Grammar Scope Reference -### A.1 A Note on This List +### A.[^1] A Note on This List The following tables list the known language grammar scopes for Python and Rust. This reference has been meticulously compiled from the official `srgn` -documentation, README examples, and GitHub release notes.2 As direct inspection -of the +documentation, README examples, and GitHub release notes.[^2] As direct +inspection of the `PreparedQuery` source enum was not possible during research 15, this list should be considered comprehensive but potentially subject to change in future `srgn` versions. Users can often discover available scopes by providing an -invalid one, as `srgn` will helpfully list the valid options.9 +invalid one, as `srgn` will helpfully list the valid options.[^9] -### A.2 Table: Python Grammar Scopes (`--python ` or `--py `) +### A.[^2] Table: Python Grammar Scopes (`--python ` or `--py `) | Scope Name | Description | Example Command | | ----------------------- | ------------------------------------------------------------------------- | --------------------------------------------- | @@ -636,7 +640,7 @@ invalid one, as `srgn` will helpfully list the valid options.9 | module-names-in-imports | Selects only the module names in import and from... import statements. | srgn --py 'module-names-in-imports' 'old_lib' | | call | Selects entire function or method call expressions (e.g., foo(bar, baz)). | srgn --py 'call' '^print\(' | -### A.3 Table: Rust Grammar Scopes (`--rust ` or `--rs `) +### A.[^3] Table: Rust Grammar Scopes (`--rust ` or `--rs `) | Scope Name | Description | Example Command | | -------------------------- | -------------------------------------------------------------- | ------------------------------------------------------ | From 348bea55cde703e9b8be61b8177ce9a843349914 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 8 Aug 2025 17:49:02 +0100 Subject: [PATCH 5/6] Expose Ninja constant for reuse - publish NINJA_PROGRAM so tests can import the default executable\n- point cucumber steps at the constant instead of repeating the string\n- tidy srgn guide paragraph and narrow test helper lint --- docs/srgn.md | 6 ++---- src/runner.rs | 2 +- tests/steps/process_steps.rs | 4 ++-- tests/support/mod.rs | 6 +++++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/srgn.md b/docs/srgn.md index 73308493..92703b77 100644 --- a/docs/srgn.md +++ b/docs/srgn.md @@ -15,10 +15,8 @@ another (e.g., a string literal). This is the precise gap that `srgn`, the "code surgeon," is designed to fill.[^1] It operates as a powerful hybrid, blending the regex-based pattern -matching of - -`grep`, the stream-editing capabilities of `tr` and `sed`, and the syntactic -intelligence of the `tree-sitter` parsing framework.[^1] +matching of `grep`, the stream-editing capabilities of `tr` and `sed`, and the +syntactic intelligence of the `tree-sitter` parsing framework.[^1] `srgn` complements traditional tools by operating on a different "dimension" of code analysis.[^1] It is not a replacement for full-featured IDE refactoring diff --git a/src/runner.rs b/src/runner.rs index 9976127c..4ba0a0f4 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -17,7 +17,7 @@ use tempfile::{Builder, NamedTempFile}; use tracing::{debug, info}; /// Default Ninja executable to invoke. -const NINJA_PROGRAM: &str = "ninja"; +pub const NINJA_PROGRAM: &str = "ninja"; #[derive(Debug, Clone)] pub struct NinjaContent(String); diff --git a/tests/steps/process_steps.rs b/tests/steps/process_steps.rs index 00c7ce0a..2e70c505 100644 --- a/tests/steps/process_steps.rs +++ b/tests/steps/process_steps.rs @@ -2,7 +2,7 @@ use crate::{CliWorld, support}; use cucumber::{given, then, when}; -use netsuke::runner::{self, BuildTargets}; +use netsuke::runner::{self, BuildTargets, NINJA_PROGRAM}; use std::fs; use std::path::{Path, PathBuf}; use tempfile::{NamedTempFile, TempDir}; @@ -106,7 +106,7 @@ fn run(world: &mut CliWorld) { let program = if let Some(ninja) = &world.ninja { Path::new(ninja) } else { - Path::new("ninja") + Path::new(NINJA_PROGRAM) }; let targets = BuildTargets::new(&[]); match runner::run_ninja(program, cli, Path::new("build.ninja"), &targets) { diff --git a/tests/support/mod.rs b/tests/support/mod.rs index 3a8e84bc..443d7221 100644 --- a/tests/support/mod.rs +++ b/tests/support/mod.rs @@ -148,7 +148,11 @@ pub fn fake_ninja_pwd() -> (TempDir, PathBuf) { /// The manifest declares a single `hello` target that prints a greeting. /// This must be `allow` as `expect` will trigger an unfulfilled warning /// despite the lint violation arising. -#[allow(dead_code, reason = "shared test utility not used in all crates")] +#[allow( + unfulfilled_lint_expectations, + reason = "shared test utility not used in all crates" +)] +#[expect(dead_code, reason = "shared test utility not used in all crates")] pub fn write_manifest(file: &mut impl Write) -> io::Result<()> { writeln!( file, From fe477d6b8fee19659cd1123488419b5b4ec9614e Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 8 Aug 2025 23:29:56 +0100 Subject: [PATCH 6/6] Support Ninja override via env var --- src/runner.rs | 13 ++++++++++-- tests/runner_tests.rs | 41 ++++++++++++++++++++++++++++++++++-- tests/steps/process_steps.rs | 2 +- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/runner.rs b/src/runner.rs index 4ba0a0f4..ff170246 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -18,6 +18,8 @@ use tracing::{debug, info}; /// Default Ninja executable to invoke. pub const NINJA_PROGRAM: &str = "ninja"; +/// Environment variable override for the Ninja executable. +pub const NINJA_ENV: &str = "NETSUKE_NINJA"; #[derive(Debug, Clone)] pub struct NinjaContent(String); @@ -49,7 +51,7 @@ impl CommandArg { } } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Default)] pub struct BuildTargets<'a>(&'a [String]); impl<'a> BuildTargets<'a> { #[must_use] @@ -122,7 +124,8 @@ fn handle_build(cli: &Cli, args: &BuildArgs) -> Result<()> { (tmp.path().to_path_buf(), Some(tmp)) }; - run_ninja(Path::new(NINJA_PROGRAM), cli, &build_path, &targets)?; + let program = resolve_ninja_program(); + run_ninja(program.as_path(), cli, &build_path, &targets)?; Ok(()) } @@ -212,6 +215,12 @@ fn resolve_manifest_path(cli: &Cli) -> std::path::PathBuf { .map_or_else(|| cli.file.clone(), |dir| dir.join(&cli.file)) } +/// Determine which Ninja executable to invoke. +#[must_use] +fn resolve_ninja_program() -> PathBuf { + std::env::var_os(NINJA_ENV).map_or_else(|| PathBuf::from(NINJA_PROGRAM), PathBuf::from) +} + /// Check if `arg` contains a sensitive keyword. /// /// # Examples diff --git a/tests/runner_tests.rs b/tests/runner_tests.rs index a8e76afd..5d4ace19 100644 --- a/tests/runner_tests.rs +++ b/tests/runner_tests.rs @@ -1,5 +1,5 @@ use netsuke::cli::{BuildArgs, Cli, Commands}; -use netsuke::runner::{BuildTargets, run, run_ninja}; +use netsuke::runner::{BuildTargets, NINJA_ENV, run, run_ninja}; use rstest::rstest; use serial_test::serial; use std::path::{Path, PathBuf}; @@ -40,7 +40,7 @@ fn run_ninja_not_found() { targets: vec![], })), }; - let targets = BuildTargets::new(&[]); + let targets = BuildTargets::default(); let err = run_ninja( Path::new("does-not-exist"), &cli, @@ -163,3 +163,40 @@ fn run_manifest_subcommand_writes_file() { std::env::set_var("PATH", original_path); } } + +#[test] +#[serial] +fn run_respects_env_override_for_ninja() { + let (temp_dir, ninja_path) = support::fake_ninja(0); + let original = std::env::var_os(NINJA_ENV); + unsafe { + std::env::set_var(NINJA_ENV, &ninja_path); + } + + let temp = tempfile::tempdir().expect("temp dir"); + let manifest_path = temp.path().join("Netsukefile"); + std::fs::copy("tests/data/minimal.yml", &manifest_path).expect("copy manifest"); + let cli = Cli { + file: manifest_path.clone(), + directory: Some(temp.path().to_path_buf()), + jobs: None, + verbose: false, + command: Some(Commands::Build(BuildArgs { + emit: None, + targets: vec![], + })), + }; + + let result = run(&cli); + assert!(result.is_ok()); + + unsafe { + if let Some(val) = original { + std::env::set_var(NINJA_ENV, val); + } else { + std::env::remove_var(NINJA_ENV); + } + } + drop(ninja_path); + drop(temp_dir); +} diff --git a/tests/steps/process_steps.rs b/tests/steps/process_steps.rs index 2e70c505..bffcb331 100644 --- a/tests/steps/process_steps.rs +++ b/tests/steps/process_steps.rs @@ -108,7 +108,7 @@ fn run(world: &mut CliWorld) { } else { Path::new(NINJA_PROGRAM) }; - let targets = BuildTargets::new(&[]); + let targets = BuildTargets::default(); match runner::run_ninja(program, cli, Path::new("build.ninja"), &targets) { Ok(()) => { world.run_status = Some(true);