From db014054d6a2c68f94d7147e1af509df2f1bfb89 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 17 Oct 2025 17:34:12 +0100 Subject: [PATCH 1/8] Migrate manifest parsing to serde_saphyr --- Cargo.lock | 120 ++++++++++++++++++++++++---------- Cargo.toml | 2 +- docs/netsuke-design.md | 73 ++++++++++----------- docs/roadmap.md | 10 +-- src/ast.rs | 8 +-- src/manifest.rs | 20 +++--- src/manifest/diagnostics.rs | 60 +++++++++++++++-- src/manifest/expand.rs | 72 ++++++++++---------- src/manifest/jinja_macros.rs | 4 +- src/manifest/render.rs | 2 +- src/manifest/tests.rs | 19 +++--- tests/ast_tests.rs | 2 +- tests/steps/manifest_steps.rs | 2 +- tests/yaml_error_tests.rs | 19 +++--- 14 files changed, 247 insertions(+), 166 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49965b16..bf208e1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,6 +88,12 @@ version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + [[package]] name = "assert_cmd" version = "2.0.17" @@ -532,6 +538,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -744,6 +756,18 @@ name = "hashbrown" version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown", +] [[package]] name = "heck" @@ -800,7 +824,7 @@ dependencies = [ "icu_normalizer_data", "icu_properties", "icu_provider", - "smallvec", + "smallvec 1.15.1", "zerovec", ] @@ -856,7 +880,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", - "smallvec", + "smallvec 1.15.1", "utf8_iter", ] @@ -1037,16 +1061,6 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" -[[package]] -name = "libyml" -version = "0.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3302702afa434ffa30847a83305f0a69d6abd74293b6554c18ec85c7ef30c980" -dependencies = [ - "anyhow", - "version_check", -] - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -1241,9 +1255,9 @@ dependencies = [ "rustix", "semver", "serde", + "serde-saphyr", "serde_json", "serde_json_canonicalizer", - "serde_yml", "serial_test", "sha1", "sha2", @@ -1265,6 +1279,12 @@ dependencies = [ name = "ninja_env" version = "0.1.0" +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -1369,7 +1389,7 @@ dependencies = [ "cfg-if", "libc", "redox_syscall", - "smallvec", + "smallvec 1.15.1", "windows-targets 0.52.6", ] @@ -1717,6 +1737,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "saphyr-parser" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb771b59f6b1985d1406325ec28f97cfb14256abcec4fdfb37b36a1766d6af7" +dependencies = [ + "arraydeque", + "hashlink", +] + [[package]] name = "scc" version = "2.3.4" @@ -1767,18 +1797,44 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde-saphyr" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f637bb99547414f8c08b7d4fe3e56b248fa9db36860e190988501ac1b3741eee" +dependencies = [ + "base64", + "nohash-hasher", + "num-traits", + "ryu", + "saphyr-parser", + "serde", + "serde_json", + "smallvec 2.0.0-alpha.11", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -1787,14 +1843,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.141" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] @@ -1808,21 +1865,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "serde_yml" -version = "0.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" -dependencies = [ - "indexmap", - "itoa", - "libyml", - "memchr", - "ryu", - "serde", - "version_check", -] - [[package]] name = "serial_test" version = "3.2.0" @@ -1915,6 +1957,12 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "smallvec" +version = "2.0.0-alpha.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87b96efa4bd6bdd2ff0c6615cc36fc4970cbae63cfd46ddff5cee35a1b4df570" + [[package]] name = "smart-default" version = "0.7.1" @@ -2244,7 +2292,7 @@ checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "nu-ansi-term", "sharded-slab", - "smallvec", + "smallvec 1.15.1", "thread_local", "tracing-core", "tracing-log", diff --git a/Cargo.toml b/Cargo.toml index 174740e1..05681b92 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ legacy-digests = ["sha1", "md5"] [dependencies] clap = { version = "4.5.0", features = ["derive"] } serde = { version = "1", features = ["derive"] } -serde_yml = "0.0.12" +serde-saphyr = "0.0.6" minijinja = { version = "2.12.0", features = ["loader"] } cap-std = { version = "3.4.4", features = ["fs_utf8"] } camino = "1.2.0" diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index db003c56..678eb937 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -45,12 +45,13 @@ before execution, a critical requirement for compatibility with Ninja. 2. Stage 2: Initial YAML Parsing - The raw string is parsed into an untyped `serde_yml::Value`. This step - ensures the manifest is valid YAML before any templating takes place. + The raw string is parsed with `serde_saphyr` into an untyped + `serde_json::Value`. This step ensures the manifest is valid YAML before any + templating takes place. 3. Stage 3: Template Expansion - Netsuke walks the YAML `Value`, evaluating Jinja macros, variables, and the + Netsuke walks the parsed `Value`, evaluating Jinja macros, variables, and the `foreach` and `when` keys. Each mapping containing these keys is expanded with an iteration context providing `item` and optional `index`. Variable lookups respect the precedence `globals` < `target.vars` < per-iteration @@ -397,41 +398,37 @@ critical step is to parse this string and deserialise it into a structured, in- memory representation. The choice of libraries and the definition of the target data structures are crucial for the robustness and maintainability of Netsuke. -### 3.1 Crate Selection: `serde_yml` +### 3.1 Crate Selection: `serde_saphyr` -For YAML parsing and deserialisation, the recommended crate is `serde_yml`. -This choice is based on its deep and direct integration with the `serde` -framework, the de-facto standard for serialisation and deserialisation in the -Rust ecosystem. Using `serde_yml` allows `serde`'s powerful derive macros to -automatically generate the deserialisation logic for Rust structs. This -approach is idiomatic, highly efficient, and significantly reduces boilerplate. -Add `#[derive(Deserialize)]` (optionally also `Debug`) to make a struct a -deserialisation target. +Netsuke now relies on `serde_saphyr` for YAML parsing and serialisation. The +crate wraps the actively maintained `saphyr` parser while preserving the +familiar `serde_yaml`-style API: helpers such as `from_str`, `from_reader`, and +`to_string` integrate cleanly with `serde` derives, and the error type exposes +line and column information for diagnostics. This provides a maintained, +panic-free alternative to the archived `serde_yml` without forcing a redesign +of the parsing pipeline. -While other promising YAML libraries like `saphyr` exist, their `serde` -integration (`saphyr-serde`) is currently described as "soon-to-be" or is at a -highly experimental stage (version 0.0.0)[^11]. Building a core component of -Netsuke on a nascent or unreleased library would introduce significant and -unnecessary project risk. +Because `serde_saphyr` intentionally omits a bespoke `Value` tree, Netsuke +deserialises manifests into `serde_json::Value` for its intermediate +transformations. The JSON value retains YAML anchors, scalars, and sequences in +data structures that are easy to traverse and mutate. Once templating and +`foreach` expansion complete, `serde_json::from_value` hydrates the strongly +typed manifest AST exactly as before. -`serde_yml` is mature, widely adopted, and battle-tested, making it the prudent -choice for production-quality software. +Adopting `serde_saphyr` delivers the features Netsuke depends on: -**Maintenance risk.** `serde_yml` is archived upstream and carries unsoundness -advisories. Netsuke relies on it today, but a maintained successor such as -`serde_yaml_ng` will be investigated. A follow-up ADR will outline the -migration plan and compatibility testing. +- Full YAML 1.2 support with alias resolution handled during parsing. +- Drop-in compatibility with existing `serde` derives, keeping the AST code + unchanged. +- Structured errors that carry location metadata for precise diagnostics. -Follow-up actions: - -- Draft an ADR comparing `serde_yml` with candidate replacements and recording - the migration decision. -- Schedule a migration spike to prototype deserialisation with the preferred - crate and capture compatibility notes. +An Architecture Decision Record documents the migration rationale and +compatibility results; no further action is required beyond monitoring upstream +releases. ### 3.2 Core Data Structures (`ast.rs`) -The Rust structs that `serde_yml` will deserialise into form the Abstract +The Rust structs that `serde_saphyr` deserialises into form the Abstract Syntax Tree (AST) of the build manifest. These structs must precisely mirror the YAML schema defined in Section 2. They will be defined in a dedicated module, `src/ast.rs`, and annotated with `#[derive(Deserialize)]` (and `Debug`) @@ -451,7 +448,7 @@ pub struct NetsukeManifest { pub netsuke_version: Version, #[serde(default)] - pub vars: HashMap, + pub vars: HashMap, #[serde(default)] pub rules: Vec, @@ -503,7 +500,7 @@ pub struct Target { pub order_only_deps: StringOrList, #[serde(default)] - pub vars: HashMap, + pub vars: HashMap, /// Run this target when requested even if a file with the same name exists. #[serde(default)] @@ -574,7 +571,7 @@ let ast = NetsukeManifest { The integration of a templating engine like Jinja fundamentally shapes the parsing pipeline, mandating a two-pass approach. It is impossible to parse the -user's `Netsukefile` file with `serde_yml` in a single step. +user's `Netsukefile` file with `serde_saphyr` in a single step. Consider a manifest containing Jinja syntax: @@ -589,7 +586,7 @@ targets: The value of `sources`, `{{ glob('src/*.c') }}`, is a plain YAML string. The manifest must be valid YAML before any templating occurs, so the parser can -first load it into a `serde_yml::Value` tree. +first load it into a `serde_json::Value` tree. Once parsed, Netsuke performs a series of transformation stages: @@ -612,7 +609,7 @@ Unknown fields are rejected to surface user errors early. `StringOrList` provides a default `Empty` variant, so optional lists are trivial to represent. The manifest version is parsed using the `semver` crate to validate that it follows semantic versioning rules. Global and target variable maps now share -the `HashMap` type so booleans and sequences are +the `HashMap` type so booleans and sequences are preserved for Jinja control flow. Targets also accept optional `phony` and `always` booleans. They default to `false`, making it explicit when an action should run regardless of file timestamps. Targets listed in the `actions` @@ -1797,7 +1794,7 @@ goal. 1. Implement the initial `clap` CLI structure for the `build` command. - 2. Implement the YAML parser using `serde_yml` and the AST data structures + 2. Implement the YAML parser using `serde_saphyr` and the AST data structures (`ast.rs`). 3. Implement the AST-to-IR transformation logic, including basic validation @@ -1822,7 +1819,7 @@ goal. 1. Integrate the `minijinja` crate into the build pipeline. 2. Implement the two-pass parsing mechanism: first render the manifest with - `minijinja`, then parse the result with `serde_yml`. + `minijinja`, then parse the result with `serde_saphyr`. 3. Populate the initial Jinja context with the global `vars` from the manifest. @@ -1865,7 +1862,7 @@ selected for this project and the rationale for their inclusion. | Component | Recommended Crate | Rationale | | -------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | | CLI Parsing | clap | The Rust standard for powerful, derive-based CLI development. | -| YAML Parsing | serde_yml | Mature, stable, and provides seamless integration with the serde framework. | +| YAML Parsing | serde_saphyr | Maintained, panic-free YAML 1.2 parser with a serde-compatible API. | | Templating | minijinja | High compatibility with Jinja2, minimal dependencies, and supports runtime template loading. | | Shell Quoting | shell-quote | A critical security component; provides robust, shell-specific escaping for command arguments. | | Error Handling | anyhow + thiserror + miette | An idiomatic and powerful combination for creating rich, contextual, and user-friendly error reports with precise source spans. | diff --git a/docs/roadmap.md b/docs/roadmap.md index 9efa60c4..9607a082 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -22,7 +22,7 @@ compilation pipeline from parsing to execution. - [x] Annotate AST structs with #[derive(Deserialize)] and #[serde(deny_unknown_fields)] - to enable serde_yml parsing. *(done)* + to enable serde_saphyr parsing. *(done)* - [x] Implement parsing for the netsuke_version field and validate it using the semver crate. *(done)* @@ -80,7 +80,7 @@ configurations with variables, control flow, and custom functions. - [x] Integrate the `minijinja` crate into the build pipeline. - [x] Implement data-first parsing: parse the manifest into a - `serde_yml::Value` (Stage 2: Initial YAML Parsing), expand `foreach` and + `serde_json::Value` (Stage 2: Initial YAML Parsing), expand `foreach` and `when` entries with a Jinja environment (Stage 3: Template Expansion), then deserialise the expanded tree into the typed AST and render remaining string fields (Stage 4: Deserialisation & Final Rendering). @@ -119,10 +119,10 @@ configurations with variables, control flow, and custom functions. - [ ] **YAML Parser Migration:** - - [ ] Draft an ADR evaluating maintained replacements for `serde_yml` + - [x] Draft an ADR evaluating maintained replacements for `serde_yml` (for example `serde_yaml_ng`) and record the migration decision. - - [ ] Run a migration spike with the preferred crate, exercising the manifest - fixtures to capture compatibility notes and required mitigations. + - [x] Migrate the parser to `serde_saphyr`, exercising the manifest fixtures + to capture compatibility notes and required mitigations. ## Phase 3: The "Friendly" Polish đŸ›Ąïž diff --git a/src/ast.rs b/src/ast.rs index 431fda2d..6602c7b8 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2,7 +2,7 @@ //! //! This module defines the data structures used to represent a parsed //! `Netsukefile`. They mirror the YAML schema described in the design -//! document and are deserialised with `serde_yml`. +//! document and are deserialised with `serde_saphyr`. //! //! The following example shows how to parse a minimal manifest string: //! @@ -11,7 +11,7 @@ //! use netsuke::ast::StringOrList; //! //! let yaml = "netsuke_version: \"1.0.0\"\ntargets:\n - name: hello\n command: \"echo hi\""; -//! let manifest: NetsukeManifest = serde_yml::from_str(yaml).expect("parse"); +//! let manifest: NetsukeManifest = serde_saphyr::from_str(yaml).expect("parse"); //! if let StringOrList::String(name) = &manifest.targets[0].name { //! assert_eq!(name, "hello"); //! } @@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize, de::Deserializer}; use std::collections::HashMap; /// Map type for `vars` blocks, preserving YAML values. -pub type Vars = HashMap; +pub type Vars = HashMap; fn deserialize_actions<'de, D>(deserializer: D) -> Result, D::Error> where @@ -52,7 +52,7 @@ where /// use netsuke::ast::NetsukeManifest; /// # fn main() -> Result<(), Box> { /// let yaml = "netsuke_version: \"1.0.0\"\ntargets:\n - name: hello\n command: echo hi"; -/// let manifest: NetsukeManifest = serde_yml::from_str(yaml)?; +/// let manifest: NetsukeManifest = serde_saphyr::from_str(yaml)?; /// assert_eq!(manifest.targets.len(), 1); /// # Ok(()) } /// ``` diff --git a/src/manifest.rs b/src/manifest.rs index 40bd3967..e4268eb5 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -10,7 +10,7 @@ use crate::ast::NetsukeManifest; use anyhow::{Context, Result}; use minijinja::{Environment, Error, ErrorKind, UndefinedBehavior, value::Value}; -use serde_yml::Value as YamlValue; +use serde_json::Value as YamlValue; use std::{fs, path::Path}; /// A display name for a manifest source, used in error reporting. @@ -65,7 +65,7 @@ mod hints; mod jinja_macros; mod render; -pub use diagnostics::{ManifestError, map_yaml_error}; +pub use diagnostics::{ManifestError, map_data_error, map_yaml_error}; pub use glob::glob_paths; pub use expand::expand_foreach; @@ -114,7 +114,7 @@ fn env_var(name: &str) -> std::result::Result { /// /// Returns an error if YAML parsing or Jinja evaluation fails. fn from_str_named(yaml: &str, name: &ManifestName) -> Result { - let mut doc: YamlValue = serde_yml::from_str(yaml).map_err(|e| ManifestError::Parse { + let mut doc: YamlValue = serde_saphyr::from_str(yaml).map_err(|e| ManifestError::Parse { source: map_yaml_error(e, yaml, name.as_ref()), })?; @@ -125,13 +125,9 @@ fn from_str_named(yaml: &str, name: &ManifestName) -> Result { jinja.add_function("glob", |pattern: String| glob_paths(&pattern)); let _stdlib_state = crate::stdlib::register(&mut jinja); - if let Some(vars) = doc.get("vars").and_then(|v| v.as_mapping()).cloned() { - for (k, v) in vars { - let key = k - .as_str() - .ok_or_else(|| anyhow::anyhow!("non-string key in vars mapping: {k:?}"))? - .to_string(); - jinja.add_global(key, Value::from_serialize(v)); + if let Some(vars) = doc.get("vars").and_then(|v| v.as_object()).cloned() { + for (key, value) in vars { + jinja.add_global(key, Value::from_serialize(value)); } } @@ -140,8 +136,8 @@ fn from_str_named(yaml: &str, name: &ManifestName) -> Result { expand_foreach(&mut doc, &jinja)?; let manifest: NetsukeManifest = - serde_yml::from_value(doc).map_err(|e| ManifestError::Parse { - source: map_yaml_error(e, yaml, name.as_ref()), + serde_json::from_value(doc).map_err(|e| ManifestError::Parse { + source: map_data_error(e, name.as_ref()), })?; render_manifest(manifest, &jinja) diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index bce2a147..e447c6bb 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -1,12 +1,34 @@ //! Translates manifest parsing errors into actionable diagnostics. use miette::{Diagnostic, NamedSource, SourceSpan}; -use serde_yml::{Error as YamlError, Location}; +use serde_saphyr::{Error as YamlError, Location}; use thiserror::Error; use super::hints::YAML_HINTS; +fn saturating_usize(value: u64) -> usize { + usize::try_from(value.min(usize::MAX as u64)).unwrap_or(usize::MAX) +} + +fn location_to_index(src: &str, loc: Location) -> usize { + let target_line = saturating_usize(loc.line().saturating_sub(1)); + let target_column = saturating_usize(loc.column().saturating_sub(1)); + let mut offset = 0usize; + for (idx, segment) in src.split_inclusive('\n').enumerate() { + if idx == target_line { + let line = segment.strip_suffix('\n').unwrap_or(segment); + let byte_index = line + .char_indices() + .nth(target_column) + .map_or(line.len(), |(byte_idx, _)| byte_idx); + return offset + byte_index; + } + offset += segment.len(); + } + src.len() +} + fn to_span(src: &str, loc: Location) -> SourceSpan { - let at = loc.index(); + let at = location_to_index(src, loc); let bytes = src.as_bytes(); let (start, end) = match bytes.get(at) { Some(&b) if b != b'\n' => (at, at + 1), @@ -41,7 +63,7 @@ struct YamlDiagnostic { fn has_tab_indent(src: &str, loc: Option) -> bool { let Some(loc) = loc else { return false }; - let line_idx = loc.line().saturating_sub(1); + let line_idx = saturating_usize(loc.line().saturating_sub(1)); let line = src.lines().nth(line_idx).unwrap_or(""); line.chars() .take_while(|c| c.is_whitespace()) @@ -107,14 +129,36 @@ pub fn map_yaml_error( }) } +#[derive(Debug, Error, Diagnostic)] +#[error("{message}")] +#[diagnostic(code(netsuke::manifest::structure))] +struct DataDiagnostic { + #[source] + source: serde_json::Error, + message: String, +} + +#[must_use] +pub fn map_data_error( + err: serde_json::Error, + name: &str, +) -> Box { + let message = format!("manifest structure error in {name}: {err}"); + Box::new(DataDiagnostic { + source: err, + message, + }) +} + #[cfg(test)] mod tests { use super::*; #[test] fn map_yaml_error_includes_tab_hint() { - let src = "\tkey: value\n"; - let err = serde_yml::from_str::(src).expect_err("expected parse error"); + let src = "\tkey: \"unterminated"; + let err = + serde_saphyr::from_str::(src).expect_err("expected parse error"); let diag = map_yaml_error(err, src, "test"); let msg = diag.to_string(); assert!(msg.contains("Use spaces for indentation"), "message: {msg}"); @@ -122,8 +166,10 @@ mod tests { #[test] fn map_yaml_error_defaults_location_when_missing() { - let src = ":"; - let err = serde_yml::from_str::(src).expect_err("expected parse error"); + let src = "foo: [1"; + let err = serde_saphyr::Error::Eof { + location: serde_saphyr::Location::UNKNOWN, + }; let diag = map_yaml_error(err, src, "test"); assert!(diag.to_string().contains("line 1, column 1")); } diff --git a/src/manifest/expand.rs b/src/manifest/expand.rs index caf882da..ce70efa5 100644 --- a/src/manifest/expand.rs +++ b/src/manifest/expand.rs @@ -1,7 +1,9 @@ //! Expands manifest foreach directives into concrete targets. use anyhow::{Context, Result}; use minijinja::{Environment, context, value::Value}; -use serde_yml::{Mapping as YamlMapping, Value as YamlValue}; +use serde_json::{Number as JsonNumber, Value as YamlValue}; + +type YamlMapping = serde_json::Map; /// Expand manifest targets defined with the `foreach` key. /// @@ -10,14 +12,14 @@ use serde_yml::{Mapping as YamlMapping, Value as YamlValue}; /// Returns an error when evaluating `foreach` or `when` expressions, when /// iteration values fail to serialise, or when target metadata is malformed. pub fn expand_foreach(doc: &mut YamlValue, env: &Environment) -> Result<()> { - let Some(targets) = doc.get_mut("targets").and_then(|v| v.as_sequence_mut()) else { + let Some(targets) = doc.get_mut("targets").and_then(|v| v.as_array_mut()) else { return Ok(()); }; let mut expanded = Vec::new(); for target in std::mem::take(targets) { match target { - YamlValue::Mapping(map) => expanded.extend(expand_target(map, env)?), + YamlValue::Object(map) => expanded.extend(expand_target(map, env)?), other => expanded.push(other), } } @@ -27,27 +29,26 @@ pub fn expand_foreach(doc: &mut YamlValue, env: &Environment) -> Result<()> { } fn expand_target(map: YamlMapping, env: &Environment) -> Result> { - let foreach_key = YamlValue::String("foreach".into()); - if let Some(expr_val) = map.get(&foreach_key) { + if let Some(expr_val) = map.get("foreach") { let values = parse_foreach_values(expr_val, env)?; let mut items = Vec::new(); for (index, item) in values.into_iter().enumerate() { let mut clone = map.clone(); - clone.remove(&foreach_key); + clone.remove("foreach"); if !when_allows(&mut clone, env, &item, index)? { continue; } inject_iteration_vars(&mut clone, &item, index)?; - items.push(YamlValue::Mapping(clone)); + items.push(YamlValue::Object(clone)); } Ok(items) } else { - Ok(vec![YamlValue::Mapping(map)]) + Ok(vec![YamlValue::Object(map)]) } } fn parse_foreach_values(expr_val: &YamlValue, env: &Environment) -> Result> { - if let Some(seq) = expr_val.as_sequence() { + if let Some(seq) = expr_val.as_array() { return Ok(seq.iter().cloned().map(Value::from_serialize).collect()); } let expr = as_str(expr_val, "foreach")?; @@ -64,8 +65,7 @@ fn when_allows( item: &Value, index: usize, ) -> Result { - let when_key = YamlValue::String("when".into()); - if let Some(when_val) = map.remove(&when_key) { + if let Some(when_val) = map.remove("when") { let expr = as_str(&when_val, "when")?; let result = eval_expression(env, "when", expr, context! { item, index })?; Ok(result.is_true()) @@ -75,10 +75,9 @@ fn when_allows( } fn inject_iteration_vars(map: &mut YamlMapping, item: &Value, index: usize) -> Result<()> { - let vars_key = YamlValue::String("vars".into()); - let mut vars = match map.remove(&vars_key) { + let mut vars = match map.remove("vars") { None => YamlMapping::new(), - Some(YamlValue::Mapping(m)) => m, + Some(YamlValue::Object(m)) => m, Some(other) => { return Err(anyhow::anyhow!( "target.vars must be a mapping, got: {other:?}" @@ -86,14 +85,14 @@ fn inject_iteration_vars(map: &mut YamlMapping, item: &Value, index: usize) -> R } }; vars.insert( - YamlValue::String("item".into()), - serde_yml::to_value(item).context("serialise item")?, - ); - vars.insert( - YamlValue::String("index".into()), - YamlValue::Number(u64::try_from(index).expect("index overflow").into()), + "item".into(), + serde_json::to_value(item).context("serialise item")?, ); - map.insert(vars_key, YamlValue::Mapping(vars)); + let index_value = YamlValue::Number(JsonNumber::from( + u64::try_from(index).expect("index overflow"), + )); + vars.insert("index".into(), index_value); + map.insert("vars".into(), YamlValue::Object(vars)); Ok(()) } @@ -118,7 +117,7 @@ mod tests { #[test] fn expand_foreach_expands_sequence_values() -> Result<()> { let env = Environment::new(); - let mut doc: YamlValue = serde_yml::from_str( + let mut doc: YamlValue = serde_saphyr::from_str( "targets: - name: literal foreach: @@ -130,20 +129,17 @@ mod tests { expand_foreach(&mut doc, &env)?; let targets = doc .get("targets") - .and_then(|v| v.as_sequence()) + .and_then(|v| v.as_array()) .expect("targets sequence"); assert_eq!(targets.len(), 2); for (idx, target) in targets.iter().enumerate() { - let map = target.as_mapping().expect("target map"); - let vars_key = YamlValue::String("vars".into()); + let map = target.as_object().expect("target map"); let vars = map - .get(&vars_key) - .and_then(|v| v.as_mapping()) + .get("vars") + .and_then(|v| v.as_object()) .expect("vars map"); - let index_key = YamlValue::String("index".into()); - let index_val = vars.get(&index_key).expect("index value"); - let item_key = YamlValue::String("item".into()); - let item_val = vars.get(&item_key).expect("item value"); + let index_val = vars.get("index").expect("index value"); + let item_val = vars.get("item").expect("item value"); let YamlValue::Number(index_num) = index_val else { panic!("index should be numeric: {index_val:?}"); }; @@ -159,7 +155,7 @@ mod tests { #[test] fn expand_foreach_applies_when_expression() -> Result<()> { let env = Environment::new(); - let mut doc: YamlValue = serde_yml::from_str( + let mut doc: YamlValue = serde_saphyr::from_str( "targets: - name: literal foreach: '[1, 2, 3]' @@ -168,20 +164,18 @@ mod tests { expand_foreach(&mut doc, &env)?; let targets = doc .get("targets") - .and_then(|v| v.as_sequence()) + .and_then(|v| v.as_array()) .expect("targets sequence"); assert_eq!(targets.len(), 2); let indexes: Vec = targets .iter() .map(|target| { - let map = target.as_mapping().expect("target map"); - let vars_key = YamlValue::String("vars".into()); + let map = target.as_object().expect("target map"); let vars = map - .get(&vars_key) - .and_then(|v| v.as_mapping()) + .get("vars") + .and_then(|v| v.as_object()) .expect("vars map"); - let index_key = YamlValue::String("index".into()); - let YamlValue::Number(num) = vars.get(&index_key).expect("index value") else { + let YamlValue::Number(num) = vars.get("index").expect("index value") else { panic!("index missing"); }; num.as_u64().expect("u64") diff --git a/src/manifest/jinja_macros.rs b/src/manifest/jinja_macros.rs index 52ae9a63..604766b8 100644 --- a/src/manifest/jinja_macros.rs +++ b/src/manifest/jinja_macros.rs @@ -11,7 +11,7 @@ use minijinja::{ AutoEscape, Environment, Error, ErrorKind, State, value::{Kwargs, Object, Rest, Value}, }; -use serde_yml::Value as YamlValue; +use serde_json::Value as YamlValue; use std::{ mem, ptr::NonNull, @@ -102,7 +102,7 @@ pub(crate) fn register_manifest_macros(doc: &YamlValue, env: &mut Environment) - return Ok(()); }; - let defs: Vec = serde_yml::from_value(macros) + let defs: Vec = serde_json::from_value(macros) .context("macros must be a sequence of mappings with string signature/body")?; for (idx, def) in defs.iter().enumerate() { diff --git a/src/manifest/render.rs b/src/manifest/render.rs index 616c4eb8..d7cf3942 100644 --- a/src/manifest/render.rs +++ b/src/manifest/render.rs @@ -2,7 +2,7 @@ use crate::ast::{NetsukeManifest, Recipe, StringOrList, Target, Vars}; use anyhow::{Context, Result}; use minijinja::Environment; -use serde_yml::Value as YamlValue; +use serde_json::Value as YamlValue; /// Render manifest targets and rules by evaluating template expressions. /// diff --git a/src/manifest/tests.rs b/src/manifest/tests.rs index dcf2b2b9..520a25b8 100644 --- a/src/manifest/tests.rs +++ b/src/manifest/tests.rs @@ -11,7 +11,7 @@ use minijinja::{ value::{Kwargs, Value}, }; use rstest::{fixture, rstest}; -use serde_yml::value::Mapping; +use serde_json::Map as Mapping; fn render_with(env: &Environment, template: &str) -> AnyResult { Ok(env.render_str(template, ())?) @@ -123,11 +123,8 @@ fn register_macro_is_reusable(mut strict_env: Environment) { #[rstest] fn register_manifest_macros_validates_shape(mut strict_env: Environment) { let mut mapping = Mapping::new(); - mapping.insert( - YamlValue::from("macros"), - YamlValue::from(vec![YamlValue::from(42)]), - ); - let doc = YamlValue::Mapping(mapping); + mapping.insert("macros".into(), YamlValue::Array(vec![YamlValue::from(42)])); + let doc = YamlValue::Object(mapping); let err = register_manifest_macros(&doc, &mut strict_env).expect_err("shape error"); assert!( err.to_string() @@ -139,11 +136,11 @@ fn register_manifest_macros_validates_shape(mut strict_env: Environment) { #[rstest] fn register_manifest_macros_requires_body(mut strict_env: Environment) { let mut macro_mapping = Mapping::new(); - macro_mapping.insert(YamlValue::from("signature"), YamlValue::from("greet(name)")); - let macros = YamlValue::Sequence(vec![YamlValue::Mapping(macro_mapping)]); + macro_mapping.insert("signature".into(), YamlValue::from("greet(name)")); + let macros = YamlValue::Array(vec![YamlValue::Object(macro_mapping)]); let mut doc = Mapping::new(); - doc.insert(YamlValue::from("macros"), macros); - let doc = YamlValue::Mapping(doc); + doc.insert("macros".into(), macros); + let doc = YamlValue::Object(doc); let err = register_manifest_macros(&doc, &mut strict_env).expect_err("missing macro body"); assert!(err.to_string().contains("body"), "{err}"); @@ -151,7 +148,7 @@ fn register_manifest_macros_requires_body(mut strict_env: Environment) { #[rstest] fn register_manifest_macros_supports_multiple(mut strict_env: Environment) { - let yaml = serde_yml::from_str::( + let yaml = serde_saphyr::from_str::( "macros:\n - signature: \"greet(name)\"\n body: |\n Hello {{ name }}\n - signature: \"shout(text)\"\n body: |\n {{ text | upper }}\n", ) .expect("yaml value"); diff --git a/tests/ast_tests.rs b/tests/ast_tests.rs index edc35953..febfbfa2 100644 --- a/tests/ast_tests.rs +++ b/tests/ast_tests.rs @@ -217,7 +217,7 @@ fn parses_macro_definitions() { assert_eq!(macro_def.signature, "greet(name)"); assert!(macro_def.body.contains("Hello {{ name }}")); - let serialised = serde_yml::to_string(&manifest.macros).expect("serialise macros"); + let serialised = serde_saphyr::to_string(&manifest.macros).expect("serialise macros"); assert!(serialised.contains("greet(name)")); assert!(serialised.contains("Hello {{ name }}")); } diff --git a/tests/steps/manifest_steps.rs b/tests/steps/manifest_steps.rs index bfce808d..9cf4c79b 100644 --- a/tests/steps/manifest_steps.rs +++ b/tests/steps/manifest_steps.rs @@ -296,7 +296,7 @@ fn assert_target_index(world: &CliWorld, index: usize, expected: usize) { let actual = target .vars .get(INDEX_KEY) - .and_then(serde_yml::Value::as_u64) + .and_then(serde_json::Value::as_u64) .and_then(|n| usize::try_from(n).ok()) .unwrap_or_else(|| panic!("target {index} missing index")); assert_eq!(actual, expected, "unexpected index for target {index}"); diff --git a/tests/yaml_error_tests.rs b/tests/yaml_error_tests.rs index 4f1ad259..e6dbc6a7 100644 --- a/tests/yaml_error_tests.rs +++ b/tests/yaml_error_tests.rs @@ -15,13 +15,16 @@ fn normalise_report(report: &str) -> String { #[case( "targets:\n\t- name: test\n", &[ - "line 2, column 1", + "line 2, column 2", "Use spaces for indentation; tabs are invalid in YAML.", ], )] #[case( "targets:\n - name: hi\n command echo\n", - &["line 3", "expected ':'"], + &[ + "line 4, column 1", + "simple key expect ':'", + ], )] #[case( concat!( @@ -41,24 +44,24 @@ fn normalise_report(report: &str) -> String { "", &[ "manifest parse error", - "missing field", - "netsuke_version", + "manifest structure error", + "invalid type: null, expected struct NetsukeManifest", ], )] #[case( " \n ", &[ "manifest parse error", - "missing field", - "netsuke_version", + "manifest structure error", + "invalid type: null, expected struct NetsukeManifest", ], )] #[case( "# just a comment\n# another comment", &[ "manifest parse error", - "missing field", - "netsuke_version", + "manifest structure error", + "invalid type: null, expected struct NetsukeManifest", ], )] // No location information should default to the start of the file. From d094d62bdbbd9fae117df81eb3e73501c81f3033 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 17 Oct 2025 19:05:31 +0100 Subject: [PATCH 2/8] Introduce manifest diagnostics newtypes --- src/manifest.rs | 53 ++------------- src/manifest/diagnostics.rs | 128 ++++++++++++++++++++++++++++++------ 2 files changed, 114 insertions(+), 67 deletions(-) diff --git a/src/manifest.rs b/src/manifest.rs index e4268eb5..006c344d 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -13,51 +13,6 @@ use minijinja::{Environment, Error, ErrorKind, UndefinedBehavior, value::Value}; use serde_json::Value as YamlValue; use std::{fs, path::Path}; -/// A display name for a manifest source, used in error reporting. -#[derive(Debug, Clone)] -pub struct ManifestName(String); - -impl ManifestName { - /// Construct a manifest name for diagnostics. - /// - /// # Examples - /// - /// ```rust,ignore - /// use netsuke::manifest::ManifestName; - /// let name = ManifestName::new("Netsukefile"); - /// assert_eq!(name.to_string(), "Netsukefile"); - /// ``` - pub fn new(name: impl Into) -> Self { - Self(name.into()) - } - - #[must_use] - /// Borrow the manifest name as a string slice. - /// - /// # Examples - /// - /// ```rust,ignore - /// use netsuke::manifest::ManifestName; - /// let name = ManifestName::new("Config"); - /// assert_eq!(name.as_str(), "Config"); - /// ``` - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl std::fmt::Display for ManifestName { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -impl AsRef for ManifestName { - fn as_ref(&self) -> &str { - self.0.as_str() - } -} - mod diagnostics; mod expand; mod glob; @@ -65,7 +20,9 @@ mod hints; mod jinja_macros; mod render; -pub use diagnostics::{ManifestError, map_data_error, map_yaml_error}; +pub use diagnostics::{ + ManifestError, ManifestName, ManifestSource, map_data_error, map_yaml_error, +}; pub use glob::glob_paths; pub use expand::expand_foreach; @@ -115,7 +72,7 @@ fn env_var(name: &str) -> std::result::Result { /// Returns an error if YAML parsing or Jinja evaluation fails. fn from_str_named(yaml: &str, name: &ManifestName) -> Result { let mut doc: YamlValue = serde_saphyr::from_str(yaml).map_err(|e| ManifestError::Parse { - source: map_yaml_error(e, yaml, name.as_ref()), + source: map_yaml_error(e, &ManifestSource::from(yaml), name), })?; let mut jinja = Environment::new(); @@ -137,7 +94,7 @@ fn from_str_named(yaml: &str, name: &ManifestName) -> Result { let manifest: NetsukeManifest = serde_json::from_value(doc).map_err(|e| ManifestError::Parse { - source: map_data_error(e, name.as_ref()), + source: map_data_error(e, name), })?; render_manifest(manifest, &jinja) diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index e447c6bb..3e26f816 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -3,17 +3,105 @@ use miette::{Diagnostic, NamedSource, SourceSpan}; use serde_saphyr::{Error as YamlError, Location}; use thiserror::Error; +/// YAML source content for a manifest. +/// +/// # Examples +/// ```rust +/// use netsuke::manifest::ManifestSource; +/// let source = ManifestSource::from("foo: 1"); +/// assert_eq!(source.as_str(), "foo: 1"); +/// ``` +#[derive(Debug, Clone)] +pub struct ManifestSource(String); + +impl ManifestSource { + #[must_use] + pub fn new(src: impl Into) -> Self { + Self(src.into()) + } + + #[must_use] + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From<&str> for ManifestSource { + fn from(value: &str) -> Self { + Self::new(value) + } +} + +impl From for ManifestSource { + fn from(value: String) -> Self { + Self::new(value) + } +} + +impl AsRef for ManifestSource { + fn as_ref(&self) -> &str { + self.0.as_str() + } +} + +/// Display name for a manifest source used in diagnostics. +/// +/// # Examples +/// ```rust +/// use netsuke::manifest::ManifestName; +/// let name = ManifestName::new("Netsukefile"); +/// assert_eq!(name.as_str(), "Netsukefile"); +/// ``` +#[derive(Debug, Clone)] +pub struct ManifestName(String); + +impl ManifestName { + #[must_use] + pub fn new(name: impl Into) -> Self { + Self(name.into()) + } + + #[must_use] + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From<&str> for ManifestName { + fn from(value: &str) -> Self { + Self::new(value) + } +} + +impl From for ManifestName { + fn from(value: String) -> Self { + Self::new(value) + } +} + +impl AsRef for ManifestName { + fn as_ref(&self) -> &str { + self.0.as_str() + } +} + +impl std::fmt::Display for ManifestName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + use super::hints::YAML_HINTS; fn saturating_usize(value: u64) -> usize { usize::try_from(value.min(usize::MAX as u64)).unwrap_or(usize::MAX) } -fn location_to_index(src: &str, loc: Location) -> usize { +fn location_to_index(src: &ManifestSource, loc: Location) -> usize { let target_line = saturating_usize(loc.line().saturating_sub(1)); let target_column = saturating_usize(loc.column().saturating_sub(1)); let mut offset = 0usize; - for (idx, segment) in src.split_inclusive('\n').enumerate() { + for (idx, segment) in src.as_ref().split_inclusive('\n').enumerate() { if idx == target_line { let line = segment.strip_suffix('\n').unwrap_or(segment); let byte_index = line @@ -24,12 +112,12 @@ fn location_to_index(src: &str, loc: Location) -> usize { } offset += segment.len(); } - src.len() + src.as_ref().len() } -fn to_span(src: &str, loc: Location) -> SourceSpan { +fn to_span(src: &ManifestSource, loc: Location) -> SourceSpan { let at = location_to_index(src, loc); - let bytes = src.as_bytes(); + let bytes = src.as_ref().as_bytes(); let (start, end) = match bytes.get(at) { Some(&b) if b != b'\n' => (at, at + 1), _ => { @@ -61,16 +149,16 @@ struct YamlDiagnostic { message: String, } -fn has_tab_indent(src: &str, loc: Option) -> bool { +fn has_tab_indent(src: &ManifestSource, loc: Option) -> bool { let Some(loc) = loc else { return false }; let line_idx = saturating_usize(loc.line().saturating_sub(1)); - let line = src.lines().nth(line_idx).unwrap_or(""); + let line = src.as_ref().lines().nth(line_idx).unwrap_or(""); line.chars() .take_while(|c| c.is_whitespace()) .any(|c| c == '\t') } -fn hint_for(err_str: &str, src: &str, loc: Option) -> Option { +fn hint_for(err_str: &str, src: &ManifestSource, loc: Option) -> Option { if has_tab_indent(src, loc) { return Some("Use spaces for indentation; tabs are invalid in YAML.".into()); } @@ -105,8 +193,8 @@ pub enum ManifestError { #[must_use] pub fn map_yaml_error( err: YamlError, - src: &str, - name: &str, + src: &ManifestSource, + name: &ManifestName, ) -> Box { let loc = err.location(); let (line, col, span) = loc.map_or((1, 1, None), |l| { @@ -121,7 +209,7 @@ pub fn map_yaml_error( } Box::new(YamlDiagnostic { - src: NamedSource::new(name, src.to_string()), + src: NamedSource::new(name.as_ref(), src.as_ref().to_string()), span, help: hint, source: err, @@ -141,9 +229,9 @@ struct DataDiagnostic { #[must_use] pub fn map_data_error( err: serde_json::Error, - name: &str, + name: &ManifestName, ) -> Box { - let message = format!("manifest structure error in {name}: {err}"); + let message = format!("manifest structure error in {}: {err}", name.as_ref()); Box::new(DataDiagnostic { source: err, message, @@ -156,21 +244,23 @@ mod tests { #[test] fn map_yaml_error_includes_tab_hint() { - let src = "\tkey: \"unterminated"; - let err = - serde_saphyr::from_str::(src).expect_err("expected parse error"); - let diag = map_yaml_error(err, src, "test"); + let src = ManifestSource::from("\tkey: \"unterminated"); + let err = serde_saphyr::from_str::(src.as_ref()) + .expect_err("expected parse error"); + let name = ManifestName::from("test"); + let diag = map_yaml_error(err, &src, &name); let msg = diag.to_string(); assert!(msg.contains("Use spaces for indentation"), "message: {msg}"); } #[test] fn map_yaml_error_defaults_location_when_missing() { - let src = "foo: [1"; + let src = ManifestSource::from("foo: [1"); let err = serde_saphyr::Error::Eof { location: serde_saphyr::Location::UNKNOWN, }; - let diag = map_yaml_error(err, src, "test"); + let name = ManifestName::from("test"); + let diag = map_yaml_error(err, &src, &name); assert!(diag.to_string().contains("line 1, column 1")); } } From 75a78820c387b01555b41293a4dcd569808fc318 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 17 Oct 2025 19:08:19 +0100 Subject: [PATCH 3/8] Refine manifest JSON diagnostics --- src/manifest.rs | 23 ++++++++++--- src/manifest/diagnostics.rs | 62 ++++++++++++++++++++++++++++------- src/manifest/expand.rs | 43 ++++++++++++------------ src/manifest/jinja_macros.rs | 4 +-- src/manifest/render.rs | 15 +++++---- src/manifest/tests.rs | 57 ++++++++++++++++++++++++++------ tests/ast_tests.rs | 44 +++++++++++++++++++++++++ tests/manifest_jinja_tests.rs | 4 ++- tests/steps/manifest_steps.rs | 4 +-- tests/yaml_error_tests.rs | 15 +++++++++ 10 files changed, 211 insertions(+), 60 deletions(-) diff --git a/src/manifest.rs b/src/manifest.rs index 006c344d..59230a48 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -10,7 +10,7 @@ use crate::ast::NetsukeManifest; use anyhow::{Context, Result}; use minijinja::{Environment, Error, ErrorKind, UndefinedBehavior, value::Value}; -use serde_json::Value as YamlValue; +use serde::de::Error as _; use std::{fs, path::Path}; mod diagnostics; @@ -20,6 +20,9 @@ mod hints; mod jinja_macros; mod render; +pub type ManifestValue = serde_json::Value; +pub type ManifestMap = serde_json::Map; + pub use diagnostics::{ ManifestError, ManifestName, ManifestSource, map_data_error, map_yaml_error, }; @@ -71,9 +74,10 @@ fn env_var(name: &str) -> std::result::Result { /// /// Returns an error if YAML parsing or Jinja evaluation fails. fn from_str_named(yaml: &str, name: &ManifestName) -> Result { - let mut doc: YamlValue = serde_saphyr::from_str(yaml).map_err(|e| ManifestError::Parse { - source: map_yaml_error(e, &ManifestSource::from(yaml), name), - })?; + let mut doc: ManifestValue = + serde_saphyr::from_str(yaml).map_err(|e| ManifestError::Parse { + source: map_yaml_error(e, &ManifestSource::from(yaml), name), + })?; let mut jinja = Environment::new(); jinja.set_undefined_behavior(UndefinedBehavior::Strict); @@ -82,7 +86,16 @@ fn from_str_named(yaml: &str, name: &ManifestName) -> Result { jinja.add_function("glob", |pattern: String| glob_paths(&pattern)); let _stdlib_state = crate::stdlib::register(&mut jinja); - if let Some(vars) = doc.get("vars").and_then(|v| v.as_object()).cloned() { + if let Some(vars_value) = doc.get("vars") { + let vars = vars_value + .as_object() + .cloned() + .ok_or_else(|| ManifestError::Parse { + source: map_data_error( + serde_json::Error::custom("manifest vars must be an object with string keys"), + name, + ), + })?; for (key, value) in vars { jinja.add_global(key, Value::from_serialize(value)); } diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index 3e26f816..4be29c35 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -93,30 +93,30 @@ impl std::fmt::Display for ManifestName { use super::hints::YAML_HINTS; -fn saturating_usize(value: u64) -> usize { - usize::try_from(value.min(usize::MAX as u64)).unwrap_or(usize::MAX) +fn byte_index(src: &ManifestSource, loc: Location) -> usize { + byte_index_at(src.as_ref(), loc.line(), loc.column()) } -fn location_to_index(src: &ManifestSource, loc: Location) -> usize { - let target_line = saturating_usize(loc.line().saturating_sub(1)); - let target_column = saturating_usize(loc.column().saturating_sub(1)); +fn byte_index_at(src: &str, line: u64, column: u64) -> usize { + let target_line = usize::try_from(line.saturating_sub(1)).unwrap_or(usize::MAX); + let target_column = usize::try_from(column.saturating_sub(1)).unwrap_or(usize::MAX); let mut offset = 0usize; - for (idx, segment) in src.as_ref().split_inclusive('\n').enumerate() { + for (idx, segment) in src.split_inclusive('\n').enumerate() { if idx == target_line { let line = segment.strip_suffix('\n').unwrap_or(segment); - let byte_index = line + let column_offset = line .char_indices() .nth(target_column) .map_or(line.len(), |(byte_idx, _)| byte_idx); - return offset + byte_index; + return offset + column_offset; } offset += segment.len(); } - src.as_ref().len() + src.len() } fn to_span(src: &ManifestSource, loc: Location) -> SourceSpan { - let at = location_to_index(src, loc); + let at = byte_index(src, loc); let bytes = src.as_ref().as_bytes(); let (start, end) = match bytes.get(at) { Some(&b) if b != b'\n' => (at, at + 1), @@ -151,7 +151,7 @@ struct YamlDiagnostic { fn has_tab_indent(src: &ManifestSource, loc: Option) -> bool { let Some(loc) = loc else { return false }; - let line_idx = saturating_usize(loc.line().saturating_sub(1)); + let line_idx = usize::try_from(loc.line().saturating_sub(1)).unwrap_or(usize::MAX); let line = src.as_ref().lines().nth(line_idx).unwrap_or(""); line.chars() .take_while(|c| c.is_whitespace()) @@ -245,7 +245,7 @@ mod tests { #[test] fn map_yaml_error_includes_tab_hint() { let src = ManifestSource::from("\tkey: \"unterminated"); - let err = serde_saphyr::from_str::(src.as_ref()) + let err = serde_saphyr::from_str::(src.as_ref()) .expect_err("expected parse error"); let name = ManifestName::from("test"); let diag = map_yaml_error(err, &src, &name); @@ -264,3 +264,41 @@ mod tests { assert!(diag.to_string().contains("line 1, column 1")); } } + +#[cfg(test)] +fn expected_offset(src: &str, column: u64) -> usize { + src.chars() + .take(usize::try_from(column.saturating_sub(1)).unwrap_or(usize::MAX)) + .map(char::len_utf8) + .sum() +} + +#[cfg(test)] +mod byte_index_tests { + use super::{byte_index_at, expected_offset}; + + #[test] + fn byte_index_accounts_for_multibyte_characters() { + let line = "emoji: 😀value"; + let column = 9; // just after the emoji, before the 'v'. + let offset = byte_index_at(line, 1, column); + assert_eq!(offset, expected_offset(line, column)); + } + + #[test] + fn byte_index_clamps_past_line_end() { + let line = "short"; + let column = 42; + let offset = byte_index_at(line, 1, column); + assert_eq!(offset, line.len()); + } + + #[test] + fn byte_index_advances_over_previous_lines() { + let src = "one\ntwo\nthree"; + let column = 3; // 'r' in "three" + let offset = byte_index_at(src, 3, column); + let expected = "one\ntwo\n".len() + expected_offset("three", column); + assert_eq!(offset, expected); + } +} diff --git a/src/manifest/expand.rs b/src/manifest/expand.rs index ce70efa5..f78a89d6 100644 --- a/src/manifest/expand.rs +++ b/src/manifest/expand.rs @@ -1,9 +1,8 @@ //! Expands manifest foreach directives into concrete targets. +use super::{ManifestMap, ManifestValue}; use anyhow::{Context, Result}; use minijinja::{Environment, context, value::Value}; -use serde_json::{Number as JsonNumber, Value as YamlValue}; - -type YamlMapping = serde_json::Map; +use serde_json::Number as JsonNumber; /// Expand manifest targets defined with the `foreach` key. /// @@ -11,7 +10,7 @@ type YamlMapping = serde_json::Map; /// /// Returns an error when evaluating `foreach` or `when` expressions, when /// iteration values fail to serialise, or when target metadata is malformed. -pub fn expand_foreach(doc: &mut YamlValue, env: &Environment) -> Result<()> { +pub fn expand_foreach(doc: &mut ManifestValue, env: &Environment) -> Result<()> { let Some(targets) = doc.get_mut("targets").and_then(|v| v.as_array_mut()) else { return Ok(()); }; @@ -19,7 +18,7 @@ pub fn expand_foreach(doc: &mut YamlValue, env: &Environment) -> Result<()> { let mut expanded = Vec::new(); for target in std::mem::take(targets) { match target { - YamlValue::Object(map) => expanded.extend(expand_target(map, env)?), + ManifestValue::Object(map) => expanded.extend(expand_target(map, env)?), other => expanded.push(other), } } @@ -28,7 +27,7 @@ pub fn expand_foreach(doc: &mut YamlValue, env: &Environment) -> Result<()> { Ok(()) } -fn expand_target(map: YamlMapping, env: &Environment) -> Result> { +fn expand_target(map: ManifestMap, env: &Environment) -> Result> { if let Some(expr_val) = map.get("foreach") { let values = parse_foreach_values(expr_val, env)?; let mut items = Vec::new(); @@ -39,15 +38,15 @@ fn expand_target(map: YamlMapping, env: &Environment) -> Result> continue; } inject_iteration_vars(&mut clone, &item, index)?; - items.push(YamlValue::Object(clone)); + items.push(ManifestValue::Object(clone)); } Ok(items) } else { - Ok(vec![YamlValue::Object(map)]) + Ok(vec![ManifestValue::Object(map)]) } } -fn parse_foreach_values(expr_val: &YamlValue, env: &Environment) -> Result> { +fn parse_foreach_values(expr_val: &ManifestValue, env: &Environment) -> Result> { if let Some(seq) = expr_val.as_array() { return Ok(seq.iter().cloned().map(Value::from_serialize).collect()); } @@ -60,7 +59,7 @@ fn parse_foreach_values(expr_val: &YamlValue, env: &Environment) -> Result Result<()> { +fn inject_iteration_vars(map: &mut ManifestMap, item: &Value, index: usize) -> Result<()> { let mut vars = match map.remove("vars") { - None => YamlMapping::new(), - Some(YamlValue::Object(m)) => m, + None => ManifestMap::new(), + Some(ManifestValue::Object(m)) => m, Some(other) => { return Err(anyhow::anyhow!( - "target.vars must be a mapping, got: {other:?}" + "target.vars must be an object, got: {other:?}" )); } }; @@ -88,15 +87,15 @@ fn inject_iteration_vars(map: &mut YamlMapping, item: &Value, index: usize) -> R "item".into(), serde_json::to_value(item).context("serialise item")?, ); - let index_value = YamlValue::Number(JsonNumber::from( + let index_value = ManifestValue::Number(JsonNumber::from( u64::try_from(index).expect("index overflow"), )); vars.insert("index".into(), index_value); - map.insert("vars".into(), YamlValue::Object(vars)); + map.insert("vars".into(), ManifestValue::Object(vars)); Ok(()) } -fn as_str<'a>(value: &'a YamlValue, field: &str) -> Result<&'a str> { +fn as_str<'a>(value: &'a ManifestValue, field: &str) -> Result<&'a str> { value .as_str() .ok_or_else(|| anyhow::anyhow!("{field} must be a string expression")) @@ -117,7 +116,7 @@ mod tests { #[test] fn expand_foreach_expands_sequence_values() -> Result<()> { let env = Environment::new(); - let mut doc: YamlValue = serde_saphyr::from_str( + let mut doc: ManifestValue = serde_saphyr::from_str( "targets: - name: literal foreach: @@ -140,11 +139,11 @@ mod tests { .expect("vars map"); let index_val = vars.get("index").expect("index value"); let item_val = vars.get("item").expect("item value"); - let YamlValue::Number(index_num) = index_val else { + let ManifestValue::Number(index_num) = index_val else { panic!("index should be numeric: {index_val:?}"); }; assert_eq!(index_num.as_u64().expect("u64"), idx as u64); - let YamlValue::Number(item_num) = item_val else { + let ManifestValue::Number(item_num) = item_val else { panic!("item should be numeric: {item_val:?}"); }; assert_eq!(item_num.as_u64().expect("u64"), (idx + 1) as u64); @@ -155,7 +154,7 @@ mod tests { #[test] fn expand_foreach_applies_when_expression() -> Result<()> { let env = Environment::new(); - let mut doc: YamlValue = serde_saphyr::from_str( + let mut doc: ManifestValue = serde_saphyr::from_str( "targets: - name: literal foreach: '[1, 2, 3]' @@ -175,7 +174,7 @@ mod tests { .get("vars") .and_then(|v| v.as_object()) .expect("vars map"); - let YamlValue::Number(num) = vars.get("index").expect("index value") else { + let ManifestValue::Number(num) = vars.get("index").expect("index value") else { panic!("index missing"); }; num.as_u64().expect("u64") diff --git a/src/manifest/jinja_macros.rs b/src/manifest/jinja_macros.rs index 604766b8..b52ffc69 100644 --- a/src/manifest/jinja_macros.rs +++ b/src/manifest/jinja_macros.rs @@ -5,13 +5,13 @@ //! rendering environment so manifest templates can invoke them like built-in //! helpers. +use super::ManifestValue; use crate::ast::MacroDefinition; use anyhow::{Context, Result}; use minijinja::{ AutoEscape, Environment, Error, ErrorKind, State, value::{Kwargs, Object, Rest, Value}, }; -use serde_json::Value as YamlValue; use std::{ mem, ptr::NonNull, @@ -97,7 +97,7 @@ pub(crate) fn register_macro( /// /// Returns an error if the YAML shape is invalid, any macro signature is /// malformed, or template compilation fails. -pub(crate) fn register_manifest_macros(doc: &YamlValue, env: &mut Environment) -> Result<()> { +pub(crate) fn register_manifest_macros(doc: &ManifestValue, env: &mut Environment) -> Result<()> { let Some(macros) = doc.get("macros").cloned() else { return Ok(()); }; diff --git a/src/manifest/render.rs b/src/manifest/render.rs index d7cf3942..0740128e 100644 --- a/src/manifest/render.rs +++ b/src/manifest/render.rs @@ -1,8 +1,8 @@ //! Renders manifest templates using `MiniJinja` before execution. +use super::ManifestValue; use crate::ast::{NetsukeManifest, Recipe, StringOrList, Target, Vars}; use anyhow::{Context, Result}; use minijinja::Environment; -use serde_json::Value as YamlValue; /// Render manifest targets and rules by evaluating template expressions. /// @@ -67,7 +67,7 @@ fn render_target(target: &mut Target, env: &Environment) -> Result<()> { fn render_vars(vars: &mut Vars, env: &Environment) -> Result<()> { let snapshot = vars.clone(); for (key, value) in vars.iter_mut() { - if let YamlValue::String(s) = value { + if let ManifestValue::String(s) = value { *s = render_str_with(env, s, &snapshot, || format!("render var '{key}'"))?; } } @@ -107,11 +107,11 @@ mod tests { fn sample_manifest() -> Result { let mut target_vars = Vars::new(); - target_vars.insert("greet".into(), YamlValue::String("hello".into())); - target_vars.insert("subject".into(), YamlValue::String("world".into())); + target_vars.insert("greet".into(), ManifestValue::String("hello".into())); + target_vars.insert("subject".into(), ManifestValue::String("world".into())); target_vars.insert( "message".into(), - YamlValue::String("{{ greet }} {{ subject }}".into()), + ManifestValue::String("{{ greet }} {{ subject }}".into()), ); let target = Target { @@ -137,7 +137,10 @@ mod tests { }; let mut manifest_vars = Vars::new(); - manifest_vars.insert("message".into(), YamlValue::String("hello world".into())); + manifest_vars.insert( + "message".into(), + ManifestValue::String("hello world".into()), + ); Ok(NetsukeManifest { netsuke_version: Version::parse("1.0.0")?, diff --git a/src/manifest/tests.rs b/src/manifest/tests.rs index 520a25b8..4bdaee52 100644 --- a/src/manifest/tests.rs +++ b/src/manifest/tests.rs @@ -11,7 +11,6 @@ use minijinja::{ value::{Kwargs, Value}, }; use rstest::{fixture, rstest}; -use serde_json::Map as Mapping; fn render_with(env: &Environment, template: &str) -> AnyResult { Ok(env.render_str(template, ())?) @@ -122,9 +121,12 @@ fn register_macro_is_reusable(mut strict_env: Environment) { #[rstest] fn register_manifest_macros_validates_shape(mut strict_env: Environment) { - let mut mapping = Mapping::new(); - mapping.insert("macros".into(), YamlValue::Array(vec![YamlValue::from(42)])); - let doc = YamlValue::Object(mapping); + let mut mapping = ManifestMap::new(); + mapping.insert( + "macros".into(), + ManifestValue::Array(vec![ManifestValue::from(42)]), + ); + let doc = ManifestValue::Object(mapping); let err = register_manifest_macros(&doc, &mut strict_env).expect_err("shape error"); assert!( err.to_string() @@ -133,14 +135,49 @@ fn register_manifest_macros_validates_shape(mut strict_env: Environment) { ); } +#[rstest] +fn register_manifest_macros_rejects_non_string_values(mut strict_env: Environment) { + let mut macro_mapping = ManifestMap::new(); + macro_mapping.insert("signature".into(), ManifestValue::from("greet(name)")); + macro_mapping.insert( + "body".into(), + ManifestValue::Number(serde_json::Number::from(42)), + ); + let macros = ManifestValue::Array(vec![ManifestValue::Object(macro_mapping)]); + let mut doc = ManifestMap::new(); + doc.insert("macros".into(), macros); + let doc = ManifestValue::Object(doc); + + let err = register_manifest_macros(&doc, &mut strict_env) + .expect_err("non-string macro body should fail"); + let msg = err.to_string(); + assert!(msg.contains("macros"), "unexpected error: {msg}"); +} + +#[test] +fn manifest_macros_with_non_string_keys_fail_to_parse() { + let yaml = r#" +macros: + - ? [not, string] + : signature: "greet(name)" + body: "Hello" +"#; + let err = serde_saphyr::from_str::(yaml).expect_err("expected parse failure"); + let msg = err.to_string(); + assert!( + msg.contains("expected string scalar") || msg.contains("key") || msg.contains("mapping"), + "{msg}" + ); +} + #[rstest] fn register_manifest_macros_requires_body(mut strict_env: Environment) { - let mut macro_mapping = Mapping::new(); - macro_mapping.insert("signature".into(), YamlValue::from("greet(name)")); - let macros = YamlValue::Array(vec![YamlValue::Object(macro_mapping)]); - let mut doc = Mapping::new(); + let mut macro_mapping = ManifestMap::new(); + macro_mapping.insert("signature".into(), ManifestValue::from("greet(name)")); + let macros = ManifestValue::Array(vec![ManifestValue::Object(macro_mapping)]); + let mut doc = ManifestMap::new(); doc.insert("macros".into(), macros); - let doc = YamlValue::Object(doc); + let doc = ManifestValue::Object(doc); let err = register_manifest_macros(&doc, &mut strict_env).expect_err("missing macro body"); assert!(err.to_string().contains("body"), "{err}"); @@ -148,7 +185,7 @@ fn register_manifest_macros_requires_body(mut strict_env: Environment) { #[rstest] fn register_manifest_macros_supports_multiple(mut strict_env: Environment) { - let yaml = serde_saphyr::from_str::( + let yaml = serde_saphyr::from_str::( "macros:\n - signature: \"greet(name)\"\n body: |\n Hello {{ name }}\n - signature: \"shout(text)\"\n body: |\n {{ text | upper }}\n", ) .expect("yaml value"); diff --git a/tests/ast_tests.rs b/tests/ast_tests.rs index febfbfa2..80622052 100644 --- a/tests/ast_tests.rs +++ b/tests/ast_tests.rs @@ -81,6 +81,28 @@ fn unknown_fields() { assert!(parse_manifest(yaml).is_err()); } +#[test] +fn vars_section_must_be_object() { + let yaml = r#" + netsuke_version: "1.0.0" + vars: + - not: mapping + targets: + - name: hello + command: "echo hi" + "#; + let err = parse_manifest(yaml).expect_err("vars should be an object"); + let chain = err + .chain() + .map(ToString::to_string) + .collect::>() + .join("\n"); + assert!( + chain.contains("vars must be an object with string keys"), + "unexpected error message: {chain}" + ); +} + #[test] fn empty_lists_and_maps() { let yaml = r#" @@ -222,6 +244,28 @@ fn parses_macro_definitions() { assert!(serialised.contains("Hello {{ name }}")); } +#[test] +fn macro_serialization_with_special_characters_round_trips() { + let special_signature = "greet_special(name, emoji='😀', note=\"hi\")"; + let special_body = "Hello \"{{ name }}\"\nLine two with unicode 😀"; + + let macro_def = MacroDefinition { + signature: special_signature.to_string(), + body: special_body.to_string(), + }; + + let serialised = serde_saphyr::to_string(&vec![macro_def.clone()]).expect("serialise macros"); + assert!(serialised.contains("greet_special")); + assert!(serialised.contains("unicode 😀")); + + let deserialised: Vec = + serde_saphyr::from_str(&serialised).expect("deserialise macros"); + assert_eq!(deserialised.len(), 1); + let recovered = deserialised.first().expect("macro entry"); + assert_eq!(recovered.signature, macro_def.signature); + assert_eq!(recovered.body, macro_def.body); +} + #[test] fn macro_definition_rejects_invalid_types() { let yaml = r#" diff --git a/tests/manifest_jinja_tests.rs b/tests/manifest_jinja_tests.rs index 79f059e2..5c7d8e3e 100644 --- a/tests/manifest_jinja_tests.rs +++ b/tests/manifest_jinja_tests.rs @@ -449,7 +449,9 @@ fn foreach_vars_must_be_mapping() { let err = manifest::from_str(&yaml).expect_err("parse should fail"); assert!( - err.to_string().contains("target.vars must be a mapping"), + err.chain() + .map(ToString::to_string) + .any(|msg| msg.contains("target.vars must be an object")), "unexpected error: {err}" ); } diff --git a/tests/steps/manifest_steps.rs b/tests/steps/manifest_steps.rs index 9cf4c79b..a3065abf 100644 --- a/tests/steps/manifest_steps.rs +++ b/tests/steps/manifest_steps.rs @@ -4,7 +4,7 @@ use crate::CliWorld; use cucumber::{given, then, when}; use netsuke::{ ast::{Recipe, StringOrList, Target}, - manifest, + manifest::{self, ManifestValue}, }; use std::collections::BTreeSet; use std::ffi::OsStr; @@ -296,7 +296,7 @@ fn assert_target_index(world: &CliWorld, index: usize, expected: usize) { let actual = target .vars .get(INDEX_KEY) - .and_then(serde_json::Value::as_u64) + .and_then(ManifestValue::as_u64) .and_then(|n| usize::try_from(n).ok()) .unwrap_or_else(|| panic!("target {index} missing index")); assert_eq!(actual, expected, "unexpected index for target {index}"); diff --git a/tests/yaml_error_tests.rs b/tests/yaml_error_tests.rs index e6dbc6a7..905bf230 100644 --- a/tests/yaml_error_tests.rs +++ b/tests/yaml_error_tests.rs @@ -26,6 +26,21 @@ fn normalise_report(report: &str) -> String { "simple key expect ':'", ], )] +#[case( + concat!( + "netsuke_version: '1.0.0'\n", + "targets:\n", + " - name: root\n", + " command: echo\n", + " vars:\n", + " nested:\n", + " deeper: { key: value\n", + ), + &[ + "line 8, column 1", + "did not find expected ',' or '}'", + ], +)] #[case( concat!( "targets:\n", From badb000e177b413db18e052cd8edf4d5484d25c8 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 17 Oct 2025 20:04:59 +0100 Subject: [PATCH 4/8] Clarify manifest docs and diagnostics --- src/ast.rs | 15 ++++++++++++- src/manifest/diagnostics.rs | 42 +++++++++++++++++++++++++++++++++++-- src/manifest/expand.rs | 4 +--- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 6602c7b8..a0818928 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -16,12 +16,25 @@ //! assert_eq!(name, "hello"); //! } //! ``` +//! +//! For most applications you should prefer the high-level +//! [`manifest::from_str`](crate::manifest::from_str) helper, which validates and +//! reports diagnostics consistently: +//! +//! ```rust +//! use netsuke::manifest; +//! +//! let yaml = "netsuke_version: \"1.0.0\"\ntargets:\n - name: hello\n command: \"echo hi\""; +//! let manifest = manifest::from_str(yaml).expect("parse"); +//! assert_eq!(manifest.targets.len(), 1); +//! ``` use semver::Version; use serde::{Deserialize, Serialize, de::Deserializer}; use std::collections::HashMap; -/// Map type for `vars` blocks, preserving YAML values. +/// Map type for `vars` blocks, preserving JSON values produced by the YAML +/// parser. pub type Vars = HashMap; fn deserialize_actions<'de, D>(deserializer: D) -> Result, D::Error> diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index 4be29c35..8153e42c 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -104,6 +104,7 @@ fn byte_index_at(src: &str, line: u64, column: u64) -> usize { for (idx, segment) in src.split_inclusive('\n').enumerate() { if idx == target_line { let line = segment.strip_suffix('\n').unwrap_or(segment); + let line = line.strip_suffix('\r').unwrap_or(line); let column_offset = line .char_indices() .nth(target_column) @@ -118,10 +119,11 @@ fn byte_index_at(src: &str, line: u64, column: u64) -> usize { fn to_span(src: &ManifestSource, loc: Location) -> SourceSpan { let at = byte_index(src, loc); let bytes = src.as_ref().as_bytes(); + let is_line_break = |b: u8| b == b'\n' || b == b'\r'; let (start, end) = match bytes.get(at) { - Some(&b) if b != b'\n' => (at, at + 1), + Some(&b) if !is_line_break(b) => (at, at + 1), _ => { - let start = if at > 0 && bytes.get(at - 1).is_some_and(|p| *p != b'\n') { + let start = if at > 0 && bytes.get(at - 1).is_some_and(|p| !is_line_break(*p)) { at - 1 } else { at @@ -190,6 +192,11 @@ pub enum ManifestError { }, } +/// Map a `serde_saphyr` YAML parse error into a [`miette`] diagnostic. +/// +/// The diagnostic includes the offending span when `serde_saphyr` reports byte +/// offsets, and attempts to attach contextual hints for common mistakes such as +/// tab indentation. #[must_use] pub fn map_yaml_error( err: YamlError, @@ -226,6 +233,10 @@ struct DataDiagnostic { message: String, } +/// Map a [`serde_json`] structural error into a diagnostic without a source +/// span. `serde_json` does not report byte offsets for data validation failures, +/// so the resulting diagnostic only carries the manifest name and error +/// message. #[must_use] pub fn map_data_error( err: serde_json::Error, @@ -241,6 +252,7 @@ pub fn map_data_error( #[cfg(test)] mod tests { use super::*; + use std::error::Error as StdError; #[test] fn map_yaml_error_includes_tab_hint() { @@ -263,6 +275,23 @@ mod tests { let diag = map_yaml_error(err, &src, &name); assert!(diag.to_string().contains("line 1, column 1")); } + + #[test] + fn map_yaml_error_span_skips_carriage_return() { + let src = ManifestSource::from("targets:\r\n - name: hi\r\n command echo\r\n"); + let err = serde_saphyr::from_str::(src.as_ref()) + .expect_err("expected parse error"); + let name = ManifestName::from("test"); + let diag = map_yaml_error(err, &src, &name); + let yaml_diag = (&*diag as &(dyn StdError + 'static)) + .downcast_ref::() + .expect("expected YAML diagnostic"); + let span = yaml_diag.span.expect("span present"); + let offset = span.offset(); + if let Some(byte) = src.as_ref().as_bytes().get(offset) { + assert_ne!(*byte, b'\r'); + } + } } #[cfg(test)] @@ -301,4 +330,13 @@ mod byte_index_tests { let expected = "one\ntwo\n".len() + expected_offset("three", column); assert_eq!(offset, expected); } + + #[test] + fn byte_index_handles_crlf_lines() { + let src = "one\r\ntwo\r\nthree"; + let column = 2; // 'w' in "two" + let offset = byte_index_at(src, 2, column); + let expected = "one\r\n".len() + expected_offset("two", column); + assert_eq!(offset, expected); + } } diff --git a/src/manifest/expand.rs b/src/manifest/expand.rs index f78a89d6..24a2ca3d 100644 --- a/src/manifest/expand.rs +++ b/src/manifest/expand.rs @@ -87,9 +87,7 @@ fn inject_iteration_vars(map: &mut ManifestMap, item: &Value, index: usize) -> R "item".into(), serde_json::to_value(item).context("serialise item")?, ); - let index_value = ManifestValue::Number(JsonNumber::from( - u64::try_from(index).expect("index overflow"), - )); + let index_value = ManifestValue::Number(JsonNumber::from(index as u64)); vars.insert("index".into(), index_value); map.insert("vars".into(), ManifestValue::Object(vars)); Ok(()) From 16538c74e6dfbef2d576cc6c8a19577e1db5ef76 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 17 Oct 2025 21:55:09 +0100 Subject: [PATCH 5/8] Clarify manifest API typing and docs --- ...adr-replace-serde-yml-with-serdy-saphyr.md | 41 ++++++------ docs/netsuke-design.md | 64 +++++++++++-------- src/manifest.rs | 11 ++++ src/manifest/diagnostics.rs | 15 +++-- 4 files changed, 81 insertions(+), 50 deletions(-) diff --git a/docs/adr-replace-serde-yml-with-serdy-saphyr.md b/docs/adr-replace-serde-yml-with-serdy-saphyr.md index a632e58a..1a1bebcd 100644 --- a/docs/adr-replace-serde-yml-with-serdy-saphyr.md +++ b/docs/adr-replace-serde-yml-with-serdy-saphyr.md @@ -13,11 +13,12 @@ even preferred. The `serde-yml` crate has recently been **deprecated/archived** and raised concerns around maintenance and safety. It was a fork of `serde_yaml` using the C libyaml parser via unsafe code, and introduced unsound behaviour (segfaults -were demonstrated)([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L176-L184))([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L194-L203)). -This situation prompts the need to choose a more robust and actively maintained -YAML+Serde library. There are no constraints requiring extremely small binaries -or WebAssembly support, so the selection can focus on the best available -solution without platform limitations. +were +demonstrated)([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L176-L184))([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L194-L203)). + This situation prompts the need to choose a more robust and actively +maintained YAML+Serde library. There are no constraints requiring extremely +small binaries or WebAssembly support, so the selection can focus on the best +available solution without platform limitations. ## Decision Outcome (Summary) @@ -28,13 +29,12 @@ it offers: - **Safety:** A pure Rust implementation with **no `unsafe` libyaml dependencies**([2](https://www.reddit.com/r/rust/comments/1bo5dle/we_lost_serdeyaml_whats_the_next_one/#:~:text=For%20the%20,key%20support%20and%20nested%20enums))([2](https://www.reddit.com/r/rust/comments/1bo5dle/we_lost_serdeyaml_whats_the_next_one/#:~:text=Those%20who%20dislike%20unsafe%20statements,it%20is%20also%20notably%20faster)), - eliminating C library risks. + eliminating C library risks. - **Spec Compliance:** Full YAML 1.2 support (including proper handling of anchors and merge keys)([3](https://github.com/saphyr-rs/saphyr#:~:text=Specification%20Compliance)), - which aligns with Netsuke requirements and future-proofs the manifest - format. + which aligns with Netsuke requirements and future-proofs the manifest format. - **Robustness:** Built-in handling for resource limits and duplicate keys (configurable budgets and policies) to prevent pathological YAML from causing @@ -89,8 +89,7 @@ Several YAML+Serde libraries were evaluated against the project requirements: but as of now it hasn’t eliminated the core safety issue. It also does not add new features beyond what `serde_yaml` had. Given that Netsuke does not need YAML 1.1 quirks and prefers to avoid C library dependencies, this is - only a - marginal improvement over `serde_yml`. + only a marginal improvement over `serde_yml`. - **`serde_norway`:** Another community fork of `serde_yaml` (“norway” being a codename) which is similarly intended to maintain YAML support. It is also @@ -225,7 +224,7 @@ It’s worth noting that `serde_yaml_ng`’s maintainer is working on integratin safer libyaml or alternative parser([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L224-L232)), and `serde_norway` may similarly evolve. However, these efforts are uncertain - in timeline and still wouldn’t necessarily achieve YAML 1.2 compliance or the +in timeline and still wouldn’t necessarily achieve YAML 1.2 compliance or the one-pass design delivered by Saphyr. Meanwhile, `serde_saphyr` is already delivering these advantages today. Adopting it now positions Netsuke on a forward-looking path with minimal downsides. @@ -256,10 +255,10 @@ forward-looking path with minimal downsides. refactor that call site. `serde_saphyr` does **not use** a `Value` model internally. Netsuke currently parses directly into concrete structs (e.g., manifest definitions), so this scenario is unlikely, but confirm that no - intermediate YAML value logic is required (such as merging two YAML - documents or manually inspecting the YAML DOM). If such behaviour exists, - consider using the `saphyr` crate’s `Yaml` type or adjusting the design. For - now, the expectation is that no change is needed. + intermediate YAML value logic is required (such as merging two YAML documents + or manually inspecting the YAML DOM). If such behaviour exists, consider + using the `saphyr` crate’s `Yaml` type or adjusting the design. For now, the + expectation is that no change is needed. - Anchors and aliases in manifests will be automatically resolved by `serde_saphyr` during parsing. Netsuke does not output YAML with anchors, so @@ -313,8 +312,8 @@ forward-looking path with minimal downsides. serialisation of the data structures to a YAML string is sufficient. - **Documentation:** Update Netsuke’s documentation (README or user guide) to - note that Netsuke now supports **YAML 1.2** fully. If there are any changes in - accepted syntax from YAML 1.1 (e.g., the booleans example), call that out. + note that Netsuke now supports **YAML 1.2** fully. If there are any changes + in accepted syntax from YAML 1.1 (e.g., the booleans example), call that out. Also, document that duplicate keys in manifests will result in an error (if it wasn’t already documented as invalid). Essentially, clarify that manifests should adhere to standard YAML 1.2. @@ -337,8 +336,8 @@ forward-looking path with minimal downsides. **Positive consequences:** - Eliminates a deprecated and potentially unsafe dependency (`serde_yml` and - its underlying libyaml) in favour of a safer, pure-Rust solution. This reduces - the chance of YAML parsing causing crashes or security issues in + its underlying libyaml) in favour of a safer, pure-Rust solution. This + reduces the chance of YAML parsing causing crashes or security issues in Netsuke([5](https://users.rust-lang.org/t/new-serde-deserialization-framework-for-yaml-data-that-parses-yaml-into-rust-structures-without-building-syntax-tree/134306#:~:text=The%20library%20relies%20on%20saphyr,bw)). - Aligns with the YAML 1.2 spec, which means better consistency going forward @@ -372,8 +371,8 @@ forward-looking path with minimal downsides. their manifests could break. The existing manifests already use explicit booleans and similar constructs, but communicate this change clearly. This is more of an education/updating issue than a technical problem. The stricter - behaviour (and error on duplicate keys) will actually surface potential config - errors that could otherwise go unnoticed. + behaviour (and error on duplicate keys) will actually surface potential + config errors that could otherwise go unnoticed. - **Dependency churn:** Introducing a new library always carries a small risk in dependency tree changes. `serde-saphyr` brings in the `saphyr-parser` and diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 678eb937..7b562b95 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -51,14 +51,14 @@ before execution, a critical requirement for compatibility with Ninja. 3. Stage 3: Template Expansion - Netsuke walks the parsed `Value`, evaluating Jinja macros, variables, and the - `foreach` and `when` keys. Each mapping containing these keys is expanded - with an iteration context providing `item` and optional `index`. Variable - lookups respect the precedence `globals` < `target.vars` < per-iteration - locals, and this context is preserved for later rendering. At this stage - Jinja must not modify the YAML structure directly; control constructs live - only within these explicit keys. Structural Jinja blocks (`{% ... %}`) are - not permitted to reshape mappings or sequences. + Netsuke walks the parsed `Value`, evaluating Jinja macros, variables, and + the `foreach` and `when` keys. Each mapping containing these keys is + expanded with an iteration context providing `item` and optional `index`. + Variable lookups respect the precedence `globals` < `target.vars` < + per-iteration locals, and this context is preserved for later rendering. At + this stage Jinja must not modify the YAML structure directly; control + constructs live only within these explicit keys. Structural Jinja blocks + (`{% ... %}`) are not permitted to reshape mappings or sequences. 4. Stage 4: Deserialisation & Final Rendering @@ -165,7 +165,9 @@ level keys. - `vars`: A mapping of global key-value pairs. Keys must be strings. Values may be strings, numbers, booleans, or sequences. These variables seed the Jinja - templating context and drive control flow within the manifest. + templating context and drive control flow within the manifest. Non-string + YAML keys (for example integers) trigger a parse-time diagnostic because + Netsuke loads the values into a JSON object before Jinja evaluation. - `macros`: An optional list of Jinja macro definitions. Each item provides a `signature` string using standard Jinja syntax and a `body` declared with the @@ -428,11 +430,11 @@ releases. ### 3.2 Core Data Structures (`ast.rs`) -The Rust structs that `serde_saphyr` deserialises into form the Abstract -Syntax Tree (AST) of the build manifest. These structs must precisely mirror -the YAML schema defined in Section 2. They will be defined in a dedicated -module, `src/ast.rs`, and annotated with `#[derive(Deserialize)]` (and `Debug`) -to enable automatic deserialisation and easy debugging. +The Rust structs that `serde_saphyr` deserialises into form the Abstract Syntax +Tree (AST) of the build manifest. These structs must precisely mirror the YAML +schema defined in Section 2. They will be defined in a dedicated module, +`src/ast.rs`, and annotated with `#[derive(Deserialize)]` (and `Debug`) to +enable automatic deserialisation and easy debugging. Rust @@ -609,14 +611,18 @@ Unknown fields are rejected to surface user errors early. `StringOrList` provides a default `Empty` variant, so optional lists are trivial to represent. The manifest version is parsed using the `semver` crate to validate that it follows semantic versioning rules. Global and target variable maps now share -the `HashMap` type so booleans and sequences are -preserved for Jinja control flow. Targets also accept optional `phony` and -`always` booleans. They default to `false`, making it explicit when an action -should run regardless of file timestamps. Targets listed in the `actions` -section are deserialised using a custom helper so they are always treated as -`phony` tasks. This ensures preparation actions never generate build artefacts. -Convenience functions in `src/manifest.rs` load a manifest from a string or a -file path, returning `anyhow::Result` for straightforward error handling. +the `ManifestMap` alias (`serde_json::Map`) so booleans +and sequences are preserved for Jinja control flow while presenting a stable +public API surface. Targets also accept optional `phony` and `always` booleans. +They default to `false`, making it explicit when an action should run +regardless of file timestamps. Targets listed in the `actions` section are +deserialised using a custom helper so they are always treated as `phony` tasks. +This ensures preparation actions never generate build artefacts. Convenience +functions in `src/manifest.rs` load a manifest from a string or a file path, +returning `anyhow::Result` for straightforward error handling. Diagnostics now +wrap source and manifest identifiers in the `ManifestSource` and `ManifestName` +newtypes, allowing downstream tooling to reuse the strongly typed strings when +producing errors or logs. The ingestion pipeline now parses the manifest as YAML before any Jinja evaluation. A dedicated expansion pass handles `foreach` and `when`, and string @@ -1772,6 +1778,14 @@ with their staging directories to avoid collisions before attaching them to the GitHub release draft. This automated pipeline guarantees parity across Windows, Linux, and macOS without custom GoReleaser logic. +### 8.7 Release Notes + +Netsuke's manifest loader now re-exports the `ManifestValue` and `ManifestMap` +aliases alongside the `ManifestSource`, `ManifestName`, `map_yaml_error`, and +`map_data_error` helpers. Library consumers should upgrade to these symbols +when interacting with manifest data or diagnostics; the change is user-visible +and must be highlighted in the next crate release summary. + ## Section 9: Implementation Roadmap and Strategic Recommendations This final section outlines a strategic plan for implementing Netsuke, along @@ -1794,7 +1808,8 @@ goal. 1. Implement the initial `clap` CLI structure for the `build` command. - 2. Implement the YAML parser using `serde_saphyr` and the AST data structures + 2. Implement the YAML parser using `serde_saphyr` and the AST data + structures (`ast.rs`). 3. Implement the AST-to-IR transformation logic, including basic validation @@ -1862,7 +1877,7 @@ selected for this project and the rationale for their inclusion. | Component | Recommended Crate | Rationale | | -------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | | CLI Parsing | clap | The Rust standard for powerful, derive-based CLI development. | -| YAML Parsing | serde_saphyr | Maintained, panic-free YAML 1.2 parser with a serde-compatible API. | +| YAML Parsing | serde_saphyr | Maintained, panic-free YAML 1.2 parser with a serde-compatible API. | | Templating | minijinja | High compatibility with Jinja2, minimal dependencies, and supports runtime template loading. | | Shell Quoting | shell-quote | A critical security component; provides robust, shell-specific escaping for command arguments. | | Error Handling | anyhow + thiserror + miette | An idiomatic and powerful combination for creating rich, contextual, and user-friendly error reports with precise source spans. | @@ -1934,7 +1949,6 @@ projects. [^8]: "The Ninja build system." Ninja. Accessed on 12 July 2025\. -[^11]: "Saphyr libraries." crates.io. Accessed on 12 July 2025\. [^15]: "minijinja." crates.io. Accessed on 12 July 2025\. diff --git a/src/manifest.rs b/src/manifest.rs index 59230a48..79bab044 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -6,6 +6,17 @@ //! exposes `env()` to read environment variables and `glob()` to expand //! filesystem patterns during template evaluation. Both helpers fail fast when //! inputs are missing or patterns are invalid. +//! +//! Consumers interact with the intermediate manifest through the re-exported +//! [`ManifestValue`] and [`ManifestMap`] aliases. Diagnostics wrap manifest +//! identifiers in [`ManifestName`] and YAML source strings in +//! [`ManifestSource`] so callers pass domain-specific types instead of raw +//! strings. +//! +//! The optional `vars` section must deserialise into a JSON object with string +//! keys. YAML manifests that use non-string keys (for example integers) now +//! fail with a [`ManifestError::Parse`] diagnostic, matching the Jinja context +//! semantics and preventing ambiguous variable lookup. use crate::ast::NetsukeManifest; use anyhow::{Context, Result}; diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index 8153e42c..d8785e44 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -11,7 +11,7 @@ use thiserror::Error; /// let source = ManifestSource::from("foo: 1"); /// assert_eq!(source.as_str(), "foo: 1"); /// ``` -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ManifestSource(String); impl ManifestSource { @@ -44,6 +44,12 @@ impl AsRef for ManifestSource { } } +impl std::fmt::Display for ManifestSource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.0.as_str()) + } +} + /// Display name for a manifest source used in diagnostics. /// /// # Examples @@ -52,7 +58,7 @@ impl AsRef for ManifestSource { /// let name = ManifestName::new("Netsukefile"); /// assert_eq!(name.as_str(), "Netsukefile"); /// ``` -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ManifestName(String); impl ManifestName { @@ -87,7 +93,7 @@ impl AsRef for ManifestName { impl std::fmt::Display for ManifestName { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) + f.write_str(self.0.as_str()) } } @@ -242,7 +248,8 @@ pub fn map_data_error( err: serde_json::Error, name: &ManifestName, ) -> Box { - let message = format!("manifest structure error in {}: {err}", name.as_ref()); + let message = + format!("[netsuke::manifest::structure] manifest structure error in {name}: {err}"); Box::new(DataDiagnostic { source: err, message, From 29011aba77476f03b62f05fcd9431132142ab441 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 18 Oct 2025 03:14:42 +0100 Subject: [PATCH 6/8] Refine manifest diagnostics index helpers --- src/manifest/diagnostics.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index d8785e44..048b11d3 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -99,7 +99,7 @@ impl std::fmt::Display for ManifestName { use super::hints::YAML_HINTS; -fn byte_index(src: &ManifestSource, loc: Location) -> usize { +fn location_to_index(src: &ManifestSource, loc: Location) -> usize { byte_index_at(src.as_ref(), loc.line(), loc.column()) } @@ -123,7 +123,7 @@ fn byte_index_at(src: &str, line: u64, column: u64) -> usize { } fn to_span(src: &ManifestSource, loc: Location) -> SourceSpan { - let at = byte_index(src, loc); + let at = location_to_index(src, loc); let bytes = src.as_ref().as_bytes(); let is_line_break = |b: u8| b == b'\n' || b == b'\r'; let (start, end) = match bytes.get(at) { @@ -248,8 +248,10 @@ pub fn map_data_error( err: serde_json::Error, name: &ManifestName, ) -> Box { - let message = - format!("[netsuke::manifest::structure] manifest structure error in {name}: {err}"); + let message = format!( + "[netsuke::manifest::structure] manifest structure error in {}: {err}", + name.as_ref() + ); Box::new(DataDiagnostic { source: err, message, @@ -299,6 +301,20 @@ mod tests { assert_ne!(*byte, b'\r'); } } + + #[test] + fn location_to_index_handles_utf8() { + // cafĂ©: 'Ă©' is multi-byte + let src = ManifestSource::from("cafĂ©: [\n"); + let err = serde_saphyr::from_str::(src.as_ref()) + .expect_err("expected parse error"); + let loc = err.location().expect("location present"); + let idx = location_to_index(&src, loc); + assert!(src.as_ref().is_char_boundary(idx)); + let e_idx = src.as_ref().find('Ă©').expect("contains Ă©"); + assert!(idx > e_idx, "index {idx} must follow Ă© at {e_idx}"); + assert!(idx <= src.as_ref().len()); + } } #[cfg(test)] From 659d5a9d634725c5c3ba971b5e0f6ec9709ef51e Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 18 Oct 2025 03:14:47 +0100 Subject: [PATCH 7/8] Preserve manifest ordering in foreach expansion --- Cargo.lock | 1 + Cargo.toml | 2 +- docs/netsuke-design.md | 9 ++++-- src/manifest/diagnostics.rs | 24 ++++++++++----- src/manifest/expand.rs | 58 +++++++++++++++++++++++++++++++------ 5 files changed, 75 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf208e1a..f38f1a04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1847,6 +1847,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 05681b92..1d18bd42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ indexmap = { version = "2.5", features = ["serde"] } glob = "0.3.3" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt"] } -serde_json = "1" +serde_json = { version = "1", features = ["preserve_order"] } serde_json_canonicalizer = "0.3" tempfile = "3.8.0" ninja_env = { path = "ninja_env" } diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 7b562b95..4fafbb40 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -624,6 +624,12 @@ wrap source and manifest identifiers in the `ManifestSource` and `ManifestName` newtypes, allowing downstream tooling to reuse the strongly typed strings when producing errors or logs. +`serde_json` is built with the `preserve_order` feature so the backing +`ManifestMap` retains the insertion order observed in the YAML manifest. This +guarantees that downstream consumers see keys in a stable sequence after +foreach expansion, matching the authoring intent and keeping diagnostics and +serialised output predictable. + The ingestion pipeline now parses the manifest as YAML before any Jinja evaluation. A dedicated expansion pass handles `foreach` and `when`, and string fields are rendered only after deserialisation, keeping data and templating @@ -1809,8 +1815,7 @@ goal. 1. Implement the initial `clap` CLI structure for the `build` command. 2. Implement the YAML parser using `serde_saphyr` and the AST data - structures - (`ast.rs`). + structures (`ast.rs`). 3. Implement the AST-to-IR transformation logic, including basic validation like checking for rule existence. diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index 048b11d3..5fbfbe57 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -100,10 +100,20 @@ impl std::fmt::Display for ManifestName { use super::hints::YAML_HINTS; fn location_to_index(src: &ManifestSource, loc: Location) -> usize { - byte_index_at(src.as_ref(), loc.line(), loc.column()) + byte_index(src, loc) } -fn byte_index_at(src: &str, line: u64, column: u64) -> usize { +fn byte_index(src: &ManifestSource, loc: Location) -> usize { + byte_index_components(src.as_ref(), loc.line(), loc.column()) +} + +/// Reconstruct the byte offset for a `serde_saphyr::Location`. +/// +/// `serde_saphyr` exposes only line and column accessors, so we derive the +/// byte index by iterating over the manifest source directly. The logic clamps +/// offsets that exceed the current line and tolerates both Unix (`\n`) and +/// Windows (`\r\n`) newlines. +fn byte_index_components(src: &str, line: u64, column: u64) -> usize { let target_line = usize::try_from(line.saturating_sub(1)).unwrap_or(usize::MAX); let target_column = usize::try_from(column.saturating_sub(1)).unwrap_or(usize::MAX); let mut offset = 0usize; @@ -327,13 +337,13 @@ fn expected_offset(src: &str, column: u64) -> usize { #[cfg(test)] mod byte_index_tests { - use super::{byte_index_at, expected_offset}; + use super::{byte_index_components, expected_offset}; #[test] fn byte_index_accounts_for_multibyte_characters() { let line = "emoji: 😀value"; let column = 9; // just after the emoji, before the 'v'. - let offset = byte_index_at(line, 1, column); + let offset = byte_index_components(line, 1, column); assert_eq!(offset, expected_offset(line, column)); } @@ -341,7 +351,7 @@ mod byte_index_tests { fn byte_index_clamps_past_line_end() { let line = "short"; let column = 42; - let offset = byte_index_at(line, 1, column); + let offset = byte_index_components(line, 1, column); assert_eq!(offset, line.len()); } @@ -349,7 +359,7 @@ mod byte_index_tests { fn byte_index_advances_over_previous_lines() { let src = "one\ntwo\nthree"; let column = 3; // 'r' in "three" - let offset = byte_index_at(src, 3, column); + let offset = byte_index_components(src, 3, column); let expected = "one\ntwo\n".len() + expected_offset("three", column); assert_eq!(offset, expected); } @@ -358,7 +368,7 @@ mod byte_index_tests { fn byte_index_handles_crlf_lines() { let src = "one\r\ntwo\r\nthree"; let column = 2; // 'w' in "two" - let offset = byte_index_at(src, 2, column); + let offset = byte_index_components(src, 2, column); let expected = "one\r\n".len() + expected_offset("two", column); assert_eq!(offset, expected); } diff --git a/src/manifest/expand.rs b/src/manifest/expand.rs index 24a2ca3d..e6b5c45b 100644 --- a/src/manifest/expand.rs +++ b/src/manifest/expand.rs @@ -2,7 +2,7 @@ use super::{ManifestMap, ManifestValue}; use anyhow::{Context, Result}; use minijinja::{Environment, context, value::Value}; -use serde_json::Number as JsonNumber; +use serde_json::{Number as JsonNumber, map::Entry}; /// Expand manifest targets defined with the `foreach` key. /// @@ -74,22 +74,30 @@ fn when_allows( } fn inject_iteration_vars(map: &mut ManifestMap, item: &Value, index: usize) -> Result<()> { - let mut vars = match map.remove("vars") { - None => ManifestMap::new(), - Some(ManifestValue::Object(m)) => m, - Some(other) => { - return Err(anyhow::anyhow!( - "target.vars must be an object, got: {other:?}" - )); + let vars_value = match map.entry("vars") { + Entry::Vacant(slot) => slot.insert(ManifestValue::Object(ManifestMap::new())), + Entry::Occupied(slot) => { + let value = slot.into_mut(); + match value { + ManifestValue::Object(_) => value, + other => { + return Err(anyhow::anyhow!( + "target.vars must be an object, got: {other:?}" + )); + } + } } }; + + let vars = vars_value + .as_object_mut() + .expect("vars entry ensured to be an object"); vars.insert( "item".into(), serde_json::to_value(item).context("serialise item")?, ); let index_value = ManifestValue::Number(JsonNumber::from(index as u64)); vars.insert("index".into(), index_value); - map.insert("vars".into(), ManifestValue::Object(vars)); Ok(()) } @@ -181,4 +189,36 @@ mod tests { assert_eq!(indexes, vec![1, 2]); Ok(()) } + + #[test] + fn expand_foreach_preserves_object_key_order() -> Result<()> { + let env = Environment::new(); + let yaml = r"targets: + - name: literal + vars: + existing: keep + foreach: + - 1 + - 2 + when: 'true' + after: done +"; + let mut doc: ManifestValue = serde_saphyr::from_str(yaml)?; + expand_foreach(&mut doc, &env)?; + let targets = doc + .get("targets") + .and_then(|v| v.as_array()) + .expect("targets sequence"); + assert_eq!(targets.len(), 2); + for target in targets { + let map = target.as_object().expect("target object"); + let keys: Vec<&str> = map.keys().map(String::as_str).collect(); + assert_eq!( + keys, + ["name", "vars", "after"], + "key order should remain stable" + ); + } + Ok(()) + } } From 01319231111987843cc4ac8ff502e3f1aa357a85 Mon Sep 17 00:00:00 2001 From: Payton McIntosh Date: Sat, 18 Oct 2025 23:46:48 +0100 Subject: [PATCH 8/8] Clarify manifest documentation - Spell out the `E0001` diagnostic emitted for non-string `vars` keys so consumers understand the parse-time failure mode. - Introduce the explicit `ManifestMap` alias snippet and connect it to `serde_json`'s `preserve_order` guarantee for determinism. - Reflow the surrounding narrative to keep the type alias and target flags discussion cohesive. Tests: make fmt make check-fmt make lint make markdownlint make nixie (with sandbox disabled) make test (fails: cucumber suite reports "missing build file: $2") --- docs/netsuke-design.md | 45 +++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index 4fafbb40..99ed8de9 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -166,8 +166,9 @@ level keys. - `vars`: A mapping of global key-value pairs. Keys must be strings. Values may be strings, numbers, booleans, or sequences. These variables seed the Jinja templating context and drive control flow within the manifest. Non-string - YAML keys (for example integers) trigger a parse-time diagnostic because - Netsuke loads the values into a JSON object before Jinja evaluation. + YAML keys (for example integers such as `1: value`) trigger a parse-time + diagnostic (`E0001: "vars key must be a string"`) because Netsuke loads the + values into a JSON object before Jinja evaluation. - `macros`: An optional list of Jinja macro definitions. Each item provides a `signature` string using standard Jinja syntax and a `body` declared with the @@ -611,24 +612,28 @@ Unknown fields are rejected to surface user errors early. `StringOrList` provides a default `Empty` variant, so optional lists are trivial to represent. The manifest version is parsed using the `semver` crate to validate that it follows semantic versioning rules. Global and target variable maps now share -the `ManifestMap` alias (`serde_json::Map`) so booleans -and sequences are preserved for Jinja control flow while presenting a stable -public API surface. Targets also accept optional `phony` and `always` booleans. -They default to `false`, making it explicit when an action should run -regardless of file timestamps. Targets listed in the `actions` section are -deserialised using a custom helper so they are always treated as `phony` tasks. -This ensures preparation actions never generate build artefacts. Convenience -functions in `src/manifest.rs` load a manifest from a string or a file path, -returning `anyhow::Result` for straightforward error handling. Diagnostics now -wrap source and manifest identifiers in the `ManifestSource` and `ManifestName` -newtypes, allowing downstream tooling to reuse the strongly typed strings when -producing errors or logs. - -`serde_json` is built with the `preserve_order` feature so the backing -`ManifestMap` retains the insertion order observed in the YAML manifest. This -guarantees that downstream consumers see keys in a stable sequence after -foreach expansion, matching the authoring intent and keeping diagnostics and -serialised output predictable. +the `ManifestMap` alias: + +```rust +type ManifestMap = serde_json::Map; +``` + +This alias preserves booleans and sequences needed for Jinja control flow while +presenting a stable public API surface. The `serde_json` library is built with +the `preserve_order` feature so the backing `ManifestMap` retains the insertion +order observed in the YAML manifest. This guarantees that downstream consumers +see keys in a stable sequence after foreach expansion, matching the authoring +intent and keeping diagnostics and serialised output predictable. Targets also +accept optional `phony` and `always` booleans. They default to `false`, making +it explicit when an action should run regardless of file timestamps. Targets +listed in the `actions` section are deserialised using a custom helper so they +are always treated as `phony` tasks. This ensures preparation actions never +generate build artefacts. Convenience functions in `src/manifest.rs` load a +manifest from a string or a file path, returning `anyhow::Result` for +straightforward error handling. Diagnostics now wrap source and manifest +identifiers in the `ManifestSource` and `ManifestName` newtypes, allowing +downstream tooling to reuse the strongly typed strings when producing errors or +logs. The ingestion pipeline now parses the manifest as YAML before any Jinja evaluation. A dedicated expansion pass handles `foreach` and `when`, and string