diff --git a/Cargo.lock b/Cargo.lock index 49965b16..f38f1a04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,6 +88,12 @@ version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + [[package]] name = "assert_cmd" version = "2.0.17" @@ -532,6 +538,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -744,6 +756,18 @@ name = "hashbrown" version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown", +] [[package]] name = "heck" @@ -800,7 +824,7 @@ dependencies = [ "icu_normalizer_data", "icu_properties", "icu_provider", - "smallvec", + "smallvec 1.15.1", "zerovec", ] @@ -856,7 +880,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", - "smallvec", + "smallvec 1.15.1", "utf8_iter", ] @@ -1037,16 +1061,6 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" -[[package]] -name = "libyml" -version = "0.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3302702afa434ffa30847a83305f0a69d6abd74293b6554c18ec85c7ef30c980" -dependencies = [ - "anyhow", - "version_check", -] - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -1241,9 +1255,9 @@ dependencies = [ "rustix", "semver", "serde", + "serde-saphyr", "serde_json", "serde_json_canonicalizer", - "serde_yml", "serial_test", "sha1", "sha2", @@ -1265,6 +1279,12 @@ dependencies = [ name = "ninja_env" version = "0.1.0" +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -1369,7 +1389,7 @@ dependencies = [ "cfg-if", "libc", "redox_syscall", - "smallvec", + "smallvec 1.15.1", "windows-targets 0.52.6", ] @@ -1717,6 +1737,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "saphyr-parser" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb771b59f6b1985d1406325ec28f97cfb14256abcec4fdfb37b36a1766d6af7" +dependencies = [ + "arraydeque", + "hashlink", +] + [[package]] name = "scc" version = "2.3.4" @@ -1767,18 +1797,44 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde-saphyr" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f637bb99547414f8c08b7d4fe3e56b248fa9db36860e190988501ac1b3741eee" +dependencies = [ + "base64", + "nohash-hasher", + "num-traits", + "ryu", + "saphyr-parser", + "serde", + "serde_json", + "smallvec 2.0.0-alpha.11", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -1787,14 +1843,16 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.141" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] @@ -1808,21 +1866,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "serde_yml" -version = "0.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" -dependencies = [ - "indexmap", - "itoa", - "libyml", - "memchr", - "ryu", - "serde", - "version_check", -] - [[package]] name = "serial_test" version = "3.2.0" @@ -1915,6 +1958,12 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "smallvec" +version = "2.0.0-alpha.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87b96efa4bd6bdd2ff0c6615cc36fc4970cbae63cfd46ddff5cee35a1b4df570" + [[package]] name = "smart-default" version = "0.7.1" @@ -2244,7 +2293,7 @@ checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "nu-ansi-term", "sharded-slab", - "smallvec", + "smallvec 1.15.1", "thread_local", "tracing-core", "tracing-log", diff --git a/Cargo.toml b/Cargo.toml index 174740e1..1d18bd42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ legacy-digests = ["sha1", "md5"] [dependencies] clap = { version = "4.5.0", features = ["derive"] } serde = { version = "1", features = ["derive"] } -serde_yml = "0.0.12" +serde-saphyr = "0.0.6" minijinja = { version = "2.12.0", features = ["loader"] } cap-std = { version = "3.4.4", features = ["fs_utf8"] } camino = "1.2.0" @@ -37,7 +37,7 @@ indexmap = { version = "2.5", features = ["serde"] } glob = "0.3.3" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt"] } -serde_json = "1" +serde_json = { version = "1", features = ["preserve_order"] } serde_json_canonicalizer = "0.3" tempfile = "3.8.0" ninja_env = { path = "ninja_env" } diff --git a/docs/adr-replace-serde-yml-with-serdy-saphyr.md b/docs/adr-replace-serde-yml-with-serdy-saphyr.md index a632e58a..1a1bebcd 100644 --- a/docs/adr-replace-serde-yml-with-serdy-saphyr.md +++ b/docs/adr-replace-serde-yml-with-serdy-saphyr.md @@ -13,11 +13,12 @@ even preferred. The `serde-yml` crate has recently been **deprecated/archived** and raised concerns around maintenance and safety. It was a fork of `serde_yaml` using the C libyaml parser via unsafe code, and introduced unsound behaviour (segfaults -were demonstrated)([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L176-L184))([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L194-L203)). -This situation prompts the need to choose a more robust and actively maintained -YAML+Serde library. There are no constraints requiring extremely small binaries -or WebAssembly support, so the selection can focus on the best available -solution without platform limitations. +were +demonstrated)([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L176-L184))([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L194-L203)). + This situation prompts the need to choose a more robust and actively +maintained YAML+Serde library. There are no constraints requiring extremely +small binaries or WebAssembly support, so the selection can focus on the best +available solution without platform limitations. ## Decision Outcome (Summary) @@ -28,13 +29,12 @@ it offers: - **Safety:** A pure Rust implementation with **no `unsafe` libyaml dependencies**([2](https://www.reddit.com/r/rust/comments/1bo5dle/we_lost_serdeyaml_whats_the_next_one/#:~:text=For%20the%20,key%20support%20and%20nested%20enums))([2](https://www.reddit.com/r/rust/comments/1bo5dle/we_lost_serdeyaml_whats_the_next_one/#:~:text=Those%20who%20dislike%20unsafe%20statements,it%20is%20also%20notably%20faster)), - eliminating C library risks. + eliminating C library risks. - **Spec Compliance:** Full YAML 1.2 support (including proper handling of anchors and merge keys)([3](https://github.com/saphyr-rs/saphyr#:~:text=Specification%20Compliance)), - which aligns with Netsuke requirements and future-proofs the manifest - format. + which aligns with Netsuke requirements and future-proofs the manifest format. - **Robustness:** Built-in handling for resource limits and duplicate keys (configurable budgets and policies) to prevent pathological YAML from causing @@ -89,8 +89,7 @@ Several YAML+Serde libraries were evaluated against the project requirements: but as of now it hasn’t eliminated the core safety issue. It also does not add new features beyond what `serde_yaml` had. Given that Netsuke does not need YAML 1.1 quirks and prefers to avoid C library dependencies, this is - only a - marginal improvement over `serde_yml`. + only a marginal improvement over `serde_yml`. - **`serde_norway`:** Another community fork of `serde_yaml` (“norway” being a codename) which is similarly intended to maintain YAML support. It is also @@ -225,7 +224,7 @@ It’s worth noting that `serde_yaml_ng`’s maintainer is working on integratin safer libyaml or alternative parser([1](https://github.com/acatton/serde-yaml-ng/blob/3628102977f3ec9e02b95ef32fcec30b3df91390/README.md#L224-L232)), and `serde_norway` may similarly evolve. However, these efforts are uncertain - in timeline and still wouldn’t necessarily achieve YAML 1.2 compliance or the +in timeline and still wouldn’t necessarily achieve YAML 1.2 compliance or the one-pass design delivered by Saphyr. Meanwhile, `serde_saphyr` is already delivering these advantages today. Adopting it now positions Netsuke on a forward-looking path with minimal downsides. @@ -256,10 +255,10 @@ forward-looking path with minimal downsides. refactor that call site. `serde_saphyr` does **not use** a `Value` model internally. Netsuke currently parses directly into concrete structs (e.g., manifest definitions), so this scenario is unlikely, but confirm that no - intermediate YAML value logic is required (such as merging two YAML - documents or manually inspecting the YAML DOM). If such behaviour exists, - consider using the `saphyr` crate’s `Yaml` type or adjusting the design. For - now, the expectation is that no change is needed. + intermediate YAML value logic is required (such as merging two YAML documents + or manually inspecting the YAML DOM). If such behaviour exists, consider + using the `saphyr` crate’s `Yaml` type or adjusting the design. For now, the + expectation is that no change is needed. - Anchors and aliases in manifests will be automatically resolved by `serde_saphyr` during parsing. Netsuke does not output YAML with anchors, so @@ -313,8 +312,8 @@ forward-looking path with minimal downsides. serialisation of the data structures to a YAML string is sufficient. - **Documentation:** Update Netsuke’s documentation (README or user guide) to - note that Netsuke now supports **YAML 1.2** fully. If there are any changes in - accepted syntax from YAML 1.1 (e.g., the booleans example), call that out. + note that Netsuke now supports **YAML 1.2** fully. If there are any changes + in accepted syntax from YAML 1.1 (e.g., the booleans example), call that out. Also, document that duplicate keys in manifests will result in an error (if it wasn’t already documented as invalid). Essentially, clarify that manifests should adhere to standard YAML 1.2. @@ -337,8 +336,8 @@ forward-looking path with minimal downsides. **Positive consequences:** - Eliminates a deprecated and potentially unsafe dependency (`serde_yml` and - its underlying libyaml) in favour of a safer, pure-Rust solution. This reduces - the chance of YAML parsing causing crashes or security issues in + its underlying libyaml) in favour of a safer, pure-Rust solution. This + reduces the chance of YAML parsing causing crashes or security issues in Netsuke([5](https://users.rust-lang.org/t/new-serde-deserialization-framework-for-yaml-data-that-parses-yaml-into-rust-structures-without-building-syntax-tree/134306#:~:text=The%20library%20relies%20on%20saphyr,bw)). - Aligns with the YAML 1.2 spec, which means better consistency going forward @@ -372,8 +371,8 @@ forward-looking path with minimal downsides. their manifests could break. The existing manifests already use explicit booleans and similar constructs, but communicate this change clearly. This is more of an education/updating issue than a technical problem. The stricter - behaviour (and error on duplicate keys) will actually surface potential config - errors that could otherwise go unnoticed. + behaviour (and error on duplicate keys) will actually surface potential + config errors that could otherwise go unnoticed. - **Dependency churn:** Introducing a new library always carries a small risk in dependency tree changes. `serde-saphyr` brings in the `saphyr-parser` and diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index db003c56..99ed8de9 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -45,19 +45,20 @@ before execution, a critical requirement for compatibility with Ninja. 2. Stage 2: Initial YAML Parsing - The raw string is parsed into an untyped `serde_yml::Value`. This step - ensures the manifest is valid YAML before any templating takes place. + The raw string is parsed with `serde_saphyr` into an untyped + `serde_json::Value`. This step ensures the manifest is valid YAML before any + templating takes place. 3. Stage 3: Template Expansion - Netsuke walks the YAML `Value`, evaluating Jinja macros, variables, and the - `foreach` and `when` keys. Each mapping containing these keys is expanded - with an iteration context providing `item` and optional `index`. Variable - lookups respect the precedence `globals` < `target.vars` < per-iteration - locals, and this context is preserved for later rendering. At this stage - Jinja must not modify the YAML structure directly; control constructs live - only within these explicit keys. Structural Jinja blocks (`{% ... %}`) are - not permitted to reshape mappings or sequences. + Netsuke walks the parsed `Value`, evaluating Jinja macros, variables, and + the `foreach` and `when` keys. Each mapping containing these keys is + expanded with an iteration context providing `item` and optional `index`. + Variable lookups respect the precedence `globals` < `target.vars` < + per-iteration locals, and this context is preserved for later rendering. At + this stage Jinja must not modify the YAML structure directly; control + constructs live only within these explicit keys. Structural Jinja blocks + (`{% ... %}`) are not permitted to reshape mappings or sequences. 4. Stage 4: Deserialisation & Final Rendering @@ -164,7 +165,10 @@ level keys. - `vars`: A mapping of global key-value pairs. Keys must be strings. Values may be strings, numbers, booleans, or sequences. These variables seed the Jinja - templating context and drive control flow within the manifest. + templating context and drive control flow within the manifest. Non-string + YAML keys (for example integers such as `1: value`) trigger a parse-time + diagnostic (`E0001: "vars key must be a string"`) because Netsuke loads the + values into a JSON object before Jinja evaluation. - `macros`: An optional list of Jinja macro definitions. Each item provides a `signature` string using standard Jinja syntax and a `body` declared with the @@ -397,45 +401,41 @@ critical step is to parse this string and deserialise it into a structured, in- memory representation. The choice of libraries and the definition of the target data structures are crucial for the robustness and maintainability of Netsuke. -### 3.1 Crate Selection: `serde_yml` +### 3.1 Crate Selection: `serde_saphyr` -For YAML parsing and deserialisation, the recommended crate is `serde_yml`. -This choice is based on its deep and direct integration with the `serde` -framework, the de-facto standard for serialisation and deserialisation in the -Rust ecosystem. Using `serde_yml` allows `serde`'s powerful derive macros to -automatically generate the deserialisation logic for Rust structs. This -approach is idiomatic, highly efficient, and significantly reduces boilerplate. -Add `#[derive(Deserialize)]` (optionally also `Debug`) to make a struct a -deserialisation target. +Netsuke now relies on `serde_saphyr` for YAML parsing and serialisation. The +crate wraps the actively maintained `saphyr` parser while preserving the +familiar `serde_yaml`-style API: helpers such as `from_str`, `from_reader`, and +`to_string` integrate cleanly with `serde` derives, and the error type exposes +line and column information for diagnostics. This provides a maintained, +panic-free alternative to the archived `serde_yml` without forcing a redesign +of the parsing pipeline. -While other promising YAML libraries like `saphyr` exist, their `serde` -integration (`saphyr-serde`) is currently described as "soon-to-be" or is at a -highly experimental stage (version 0.0.0)[^11]. Building a core component of -Netsuke on a nascent or unreleased library would introduce significant and -unnecessary project risk. +Because `serde_saphyr` intentionally omits a bespoke `Value` tree, Netsuke +deserialises manifests into `serde_json::Value` for its intermediate +transformations. The JSON value retains YAML anchors, scalars, and sequences in +data structures that are easy to traverse and mutate. Once templating and +`foreach` expansion complete, `serde_json::from_value` hydrates the strongly +typed manifest AST exactly as before. -`serde_yml` is mature, widely adopted, and battle-tested, making it the prudent -choice for production-quality software. +Adopting `serde_saphyr` delivers the features Netsuke depends on: -**Maintenance risk.** `serde_yml` is archived upstream and carries unsoundness -advisories. Netsuke relies on it today, but a maintained successor such as -`serde_yaml_ng` will be investigated. A follow-up ADR will outline the -migration plan and compatibility testing. +- Full YAML 1.2 support with alias resolution handled during parsing. +- Drop-in compatibility with existing `serde` derives, keeping the AST code + unchanged. +- Structured errors that carry location metadata for precise diagnostics. -Follow-up actions: - -- Draft an ADR comparing `serde_yml` with candidate replacements and recording - the migration decision. -- Schedule a migration spike to prototype deserialisation with the preferred - crate and capture compatibility notes. +An Architecture Decision Record documents the migration rationale and +compatibility results; no further action is required beyond monitoring upstream +releases. ### 3.2 Core Data Structures (`ast.rs`) -The Rust structs that `serde_yml` will deserialise into form the Abstract -Syntax Tree (AST) of the build manifest. These structs must precisely mirror -the YAML schema defined in Section 2. They will be defined in a dedicated -module, `src/ast.rs`, and annotated with `#[derive(Deserialize)]` (and `Debug`) -to enable automatic deserialisation and easy debugging. +The Rust structs that `serde_saphyr` deserialises into form the Abstract Syntax +Tree (AST) of the build manifest. These structs must precisely mirror the YAML +schema defined in Section 2. They will be defined in a dedicated module, +`src/ast.rs`, and annotated with `#[derive(Deserialize)]` (and `Debug`) to +enable automatic deserialisation and easy debugging. Rust @@ -451,7 +451,7 @@ pub struct NetsukeManifest { pub netsuke_version: Version, #[serde(default)] - pub vars: HashMap, + pub vars: HashMap, #[serde(default)] pub rules: Vec, @@ -503,7 +503,7 @@ pub struct Target { pub order_only_deps: StringOrList, #[serde(default)] - pub vars: HashMap, + pub vars: HashMap, /// Run this target when requested even if a file with the same name exists. #[serde(default)] @@ -574,7 +574,7 @@ let ast = NetsukeManifest { The integration of a templating engine like Jinja fundamentally shapes the parsing pipeline, mandating a two-pass approach. It is impossible to parse the -user's `Netsukefile` file with `serde_yml` in a single step. +user's `Netsukefile` file with `serde_saphyr` in a single step. Consider a manifest containing Jinja syntax: @@ -589,7 +589,7 @@ targets: The value of `sources`, `{{ glob('src/*.c') }}`, is a plain YAML string. The manifest must be valid YAML before any templating occurs, so the parser can -first load it into a `serde_yml::Value` tree. +first load it into a `serde_json::Value` tree. Once parsed, Netsuke performs a series of transformation stages: @@ -612,14 +612,28 @@ Unknown fields are rejected to surface user errors early. `StringOrList` provides a default `Empty` variant, so optional lists are trivial to represent. The manifest version is parsed using the `semver` crate to validate that it follows semantic versioning rules. Global and target variable maps now share -the `HashMap` type so booleans and sequences are -preserved for Jinja control flow. Targets also accept optional `phony` and -`always` booleans. They default to `false`, making it explicit when an action -should run regardless of file timestamps. Targets listed in the `actions` -section are deserialised using a custom helper so they are always treated as -`phony` tasks. This ensures preparation actions never generate build artefacts. -Convenience functions in `src/manifest.rs` load a manifest from a string or a -file path, returning `anyhow::Result` for straightforward error handling. +the `ManifestMap` alias: + +```rust +type ManifestMap = serde_json::Map; +``` + +This alias preserves booleans and sequences needed for Jinja control flow while +presenting a stable public API surface. The `serde_json` library is built with +the `preserve_order` feature so the backing `ManifestMap` retains the insertion +order observed in the YAML manifest. This guarantees that downstream consumers +see keys in a stable sequence after foreach expansion, matching the authoring +intent and keeping diagnostics and serialised output predictable. Targets also +accept optional `phony` and `always` booleans. They default to `false`, making +it explicit when an action should run regardless of file timestamps. Targets +listed in the `actions` section are deserialised using a custom helper so they +are always treated as `phony` tasks. This ensures preparation actions never +generate build artefacts. Convenience functions in `src/manifest.rs` load a +manifest from a string or a file path, returning `anyhow::Result` for +straightforward error handling. Diagnostics now wrap source and manifest +identifiers in the `ManifestSource` and `ManifestName` newtypes, allowing +downstream tooling to reuse the strongly typed strings when producing errors or +logs. The ingestion pipeline now parses the manifest as YAML before any Jinja evaluation. A dedicated expansion pass handles `foreach` and `when`, and string @@ -1775,6 +1789,14 @@ with their staging directories to avoid collisions before attaching them to the GitHub release draft. This automated pipeline guarantees parity across Windows, Linux, and macOS without custom GoReleaser logic. +### 8.7 Release Notes + +Netsuke's manifest loader now re-exports the `ManifestValue` and `ManifestMap` +aliases alongside the `ManifestSource`, `ManifestName`, `map_yaml_error`, and +`map_data_error` helpers. Library consumers should upgrade to these symbols +when interacting with manifest data or diagnostics; the change is user-visible +and must be highlighted in the next crate release summary. + ## Section 9: Implementation Roadmap and Strategic Recommendations This final section outlines a strategic plan for implementing Netsuke, along @@ -1797,8 +1819,8 @@ goal. 1. Implement the initial `clap` CLI structure for the `build` command. - 2. Implement the YAML parser using `serde_yml` and the AST data structures - (`ast.rs`). + 2. Implement the YAML parser using `serde_saphyr` and the AST data + structures (`ast.rs`). 3. Implement the AST-to-IR transformation logic, including basic validation like checking for rule existence. @@ -1822,7 +1844,7 @@ goal. 1. Integrate the `minijinja` crate into the build pipeline. 2. Implement the two-pass parsing mechanism: first render the manifest with - `minijinja`, then parse the result with `serde_yml`. + `minijinja`, then parse the result with `serde_saphyr`. 3. Populate the initial Jinja context with the global `vars` from the manifest. @@ -1865,7 +1887,7 @@ selected for this project and the rationale for their inclusion. | Component | Recommended Crate | Rationale | | -------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | | CLI Parsing | clap | The Rust standard for powerful, derive-based CLI development. | -| YAML Parsing | serde_yml | Mature, stable, and provides seamless integration with the serde framework. | +| YAML Parsing | serde_saphyr | Maintained, panic-free YAML 1.2 parser with a serde-compatible API. | | Templating | minijinja | High compatibility with Jinja2, minimal dependencies, and supports runtime template loading. | | Shell Quoting | shell-quote | A critical security component; provides robust, shell-specific escaping for command arguments. | | Error Handling | anyhow + thiserror + miette | An idiomatic and powerful combination for creating rich, contextual, and user-friendly error reports with precise source spans. | @@ -1937,7 +1959,6 @@ projects. [^8]: "The Ninja build system." Ninja. Accessed on 12 July 2025\. -[^11]: "Saphyr libraries." crates.io. Accessed on 12 July 2025\. [^15]: "minijinja." crates.io. Accessed on 12 July 2025\. diff --git a/docs/roadmap.md b/docs/roadmap.md index 9efa60c4..9607a082 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -22,7 +22,7 @@ compilation pipeline from parsing to execution. - [x] Annotate AST structs with #[derive(Deserialize)] and #[serde(deny_unknown_fields)] - to enable serde_yml parsing. *(done)* + to enable serde_saphyr parsing. *(done)* - [x] Implement parsing for the netsuke_version field and validate it using the semver crate. *(done)* @@ -80,7 +80,7 @@ configurations with variables, control flow, and custom functions. - [x] Integrate the `minijinja` crate into the build pipeline. - [x] Implement data-first parsing: parse the manifest into a - `serde_yml::Value` (Stage 2: Initial YAML Parsing), expand `foreach` and + `serde_json::Value` (Stage 2: Initial YAML Parsing), expand `foreach` and `when` entries with a Jinja environment (Stage 3: Template Expansion), then deserialise the expanded tree into the typed AST and render remaining string fields (Stage 4: Deserialisation & Final Rendering). @@ -119,10 +119,10 @@ configurations with variables, control flow, and custom functions. - [ ] **YAML Parser Migration:** - - [ ] Draft an ADR evaluating maintained replacements for `serde_yml` + - [x] Draft an ADR evaluating maintained replacements for `serde_yml` (for example `serde_yaml_ng`) and record the migration decision. - - [ ] Run a migration spike with the preferred crate, exercising the manifest - fixtures to capture compatibility notes and required mitigations. + - [x] Migrate the parser to `serde_saphyr`, exercising the manifest fixtures + to capture compatibility notes and required mitigations. ## Phase 3: The "Friendly" Polish 🛡️ diff --git a/src/ast.rs b/src/ast.rs index 431fda2d..a0818928 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2,7 +2,7 @@ //! //! This module defines the data structures used to represent a parsed //! `Netsukefile`. They mirror the YAML schema described in the design -//! document and are deserialised with `serde_yml`. +//! document and are deserialised with `serde_saphyr`. //! //! The following example shows how to parse a minimal manifest string: //! @@ -11,18 +11,31 @@ //! use netsuke::ast::StringOrList; //! //! let yaml = "netsuke_version: \"1.0.0\"\ntargets:\n - name: hello\n command: \"echo hi\""; -//! let manifest: NetsukeManifest = serde_yml::from_str(yaml).expect("parse"); +//! let manifest: NetsukeManifest = serde_saphyr::from_str(yaml).expect("parse"); //! if let StringOrList::String(name) = &manifest.targets[0].name { //! assert_eq!(name, "hello"); //! } //! ``` +//! +//! For most applications you should prefer the high-level +//! [`manifest::from_str`](crate::manifest::from_str) helper, which validates and +//! reports diagnostics consistently: +//! +//! ```rust +//! use netsuke::manifest; +//! +//! let yaml = "netsuke_version: \"1.0.0\"\ntargets:\n - name: hello\n command: \"echo hi\""; +//! let manifest = manifest::from_str(yaml).expect("parse"); +//! assert_eq!(manifest.targets.len(), 1); +//! ``` use semver::Version; use serde::{Deserialize, Serialize, de::Deserializer}; use std::collections::HashMap; -/// Map type for `vars` blocks, preserving YAML values. -pub type Vars = HashMap; +/// Map type for `vars` blocks, preserving JSON values produced by the YAML +/// parser. +pub type Vars = HashMap; fn deserialize_actions<'de, D>(deserializer: D) -> Result, D::Error> where @@ -52,7 +65,7 @@ where /// use netsuke::ast::NetsukeManifest; /// # fn main() -> Result<(), Box> { /// let yaml = "netsuke_version: \"1.0.0\"\ntargets:\n - name: hello\n command: echo hi"; -/// let manifest: NetsukeManifest = serde_yml::from_str(yaml)?; +/// let manifest: NetsukeManifest = serde_saphyr::from_str(yaml)?; /// assert_eq!(manifest.targets.len(), 1); /// # Ok(()) } /// ``` diff --git a/src/manifest.rs b/src/manifest.rs index 40bd3967..79bab044 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -6,58 +6,24 @@ //! exposes `env()` to read environment variables and `glob()` to expand //! filesystem patterns during template evaluation. Both helpers fail fast when //! inputs are missing or patterns are invalid. +//! +//! Consumers interact with the intermediate manifest through the re-exported +//! [`ManifestValue`] and [`ManifestMap`] aliases. Diagnostics wrap manifest +//! identifiers in [`ManifestName`] and YAML source strings in +//! [`ManifestSource`] so callers pass domain-specific types instead of raw +//! strings. +//! +//! The optional `vars` section must deserialise into a JSON object with string +//! keys. YAML manifests that use non-string keys (for example integers) now +//! fail with a [`ManifestError::Parse`] diagnostic, matching the Jinja context +//! semantics and preventing ambiguous variable lookup. use crate::ast::NetsukeManifest; use anyhow::{Context, Result}; use minijinja::{Environment, Error, ErrorKind, UndefinedBehavior, value::Value}; -use serde_yml::Value as YamlValue; +use serde::de::Error as _; use std::{fs, path::Path}; -/// A display name for a manifest source, used in error reporting. -#[derive(Debug, Clone)] -pub struct ManifestName(String); - -impl ManifestName { - /// Construct a manifest name for diagnostics. - /// - /// # Examples - /// - /// ```rust,ignore - /// use netsuke::manifest::ManifestName; - /// let name = ManifestName::new("Netsukefile"); - /// assert_eq!(name.to_string(), "Netsukefile"); - /// ``` - pub fn new(name: impl Into) -> Self { - Self(name.into()) - } - - #[must_use] - /// Borrow the manifest name as a string slice. - /// - /// # Examples - /// - /// ```rust,ignore - /// use netsuke::manifest::ManifestName; - /// let name = ManifestName::new("Config"); - /// assert_eq!(name.as_str(), "Config"); - /// ``` - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl std::fmt::Display for ManifestName { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -impl AsRef for ManifestName { - fn as_ref(&self) -> &str { - self.0.as_str() - } -} - mod diagnostics; mod expand; mod glob; @@ -65,7 +31,12 @@ mod hints; mod jinja_macros; mod render; -pub use diagnostics::{ManifestError, map_yaml_error}; +pub type ManifestValue = serde_json::Value; +pub type ManifestMap = serde_json::Map; + +pub use diagnostics::{ + ManifestError, ManifestName, ManifestSource, map_data_error, map_yaml_error, +}; pub use glob::glob_paths; pub use expand::expand_foreach; @@ -114,9 +85,10 @@ fn env_var(name: &str) -> std::result::Result { /// /// Returns an error if YAML parsing or Jinja evaluation fails. fn from_str_named(yaml: &str, name: &ManifestName) -> Result { - let mut doc: YamlValue = serde_yml::from_str(yaml).map_err(|e| ManifestError::Parse { - source: map_yaml_error(e, yaml, name.as_ref()), - })?; + let mut doc: ManifestValue = + serde_saphyr::from_str(yaml).map_err(|e| ManifestError::Parse { + source: map_yaml_error(e, &ManifestSource::from(yaml), name), + })?; let mut jinja = Environment::new(); jinja.set_undefined_behavior(UndefinedBehavior::Strict); @@ -125,13 +97,18 @@ fn from_str_named(yaml: &str, name: &ManifestName) -> Result { jinja.add_function("glob", |pattern: String| glob_paths(&pattern)); let _stdlib_state = crate::stdlib::register(&mut jinja); - if let Some(vars) = doc.get("vars").and_then(|v| v.as_mapping()).cloned() { - for (k, v) in vars { - let key = k - .as_str() - .ok_or_else(|| anyhow::anyhow!("non-string key in vars mapping: {k:?}"))? - .to_string(); - jinja.add_global(key, Value::from_serialize(v)); + if let Some(vars_value) = doc.get("vars") { + let vars = vars_value + .as_object() + .cloned() + .ok_or_else(|| ManifestError::Parse { + source: map_data_error( + serde_json::Error::custom("manifest vars must be an object with string keys"), + name, + ), + })?; + for (key, value) in vars { + jinja.add_global(key, Value::from_serialize(value)); } } @@ -140,8 +117,8 @@ fn from_str_named(yaml: &str, name: &ManifestName) -> Result { expand_foreach(&mut doc, &jinja)?; let manifest: NetsukeManifest = - serde_yml::from_value(doc).map_err(|e| ManifestError::Parse { - source: map_yaml_error(e, yaml, name.as_ref()), + serde_json::from_value(doc).map_err(|e| ManifestError::Parse { + source: map_data_error(e, name), })?; render_manifest(manifest, &jinja) diff --git a/src/manifest/diagnostics.rs b/src/manifest/diagnostics.rs index bce2a147..5fbfbe57 100644 --- a/src/manifest/diagnostics.rs +++ b/src/manifest/diagnostics.rs @@ -1,17 +1,145 @@ //! Translates manifest parsing errors into actionable diagnostics. use miette::{Diagnostic, NamedSource, SourceSpan}; -use serde_yml::{Error as YamlError, Location}; +use serde_saphyr::{Error as YamlError, Location}; use thiserror::Error; +/// YAML source content for a manifest. +/// +/// # Examples +/// ```rust +/// use netsuke::manifest::ManifestSource; +/// let source = ManifestSource::from("foo: 1"); +/// assert_eq!(source.as_str(), "foo: 1"); +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ManifestSource(String); + +impl ManifestSource { + #[must_use] + pub fn new(src: impl Into) -> Self { + Self(src.into()) + } + + #[must_use] + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From<&str> for ManifestSource { + fn from(value: &str) -> Self { + Self::new(value) + } +} + +impl From for ManifestSource { + fn from(value: String) -> Self { + Self::new(value) + } +} + +impl AsRef for ManifestSource { + fn as_ref(&self) -> &str { + self.0.as_str() + } +} + +impl std::fmt::Display for ManifestSource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.0.as_str()) + } +} + +/// Display name for a manifest source used in diagnostics. +/// +/// # Examples +/// ```rust +/// use netsuke::manifest::ManifestName; +/// let name = ManifestName::new("Netsukefile"); +/// assert_eq!(name.as_str(), "Netsukefile"); +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ManifestName(String); + +impl ManifestName { + #[must_use] + pub fn new(name: impl Into) -> Self { + Self(name.into()) + } + + #[must_use] + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From<&str> for ManifestName { + fn from(value: &str) -> Self { + Self::new(value) + } +} + +impl From for ManifestName { + fn from(value: String) -> Self { + Self::new(value) + } +} + +impl AsRef for ManifestName { + fn as_ref(&self) -> &str { + self.0.as_str() + } +} + +impl std::fmt::Display for ManifestName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.0.as_str()) + } +} + use super::hints::YAML_HINTS; -fn to_span(src: &str, loc: Location) -> SourceSpan { - let at = loc.index(); - let bytes = src.as_bytes(); +fn location_to_index(src: &ManifestSource, loc: Location) -> usize { + byte_index(src, loc) +} + +fn byte_index(src: &ManifestSource, loc: Location) -> usize { + byte_index_components(src.as_ref(), loc.line(), loc.column()) +} + +/// Reconstruct the byte offset for a `serde_saphyr::Location`. +/// +/// `serde_saphyr` exposes only line and column accessors, so we derive the +/// byte index by iterating over the manifest source directly. The logic clamps +/// offsets that exceed the current line and tolerates both Unix (`\n`) and +/// Windows (`\r\n`) newlines. +fn byte_index_components(src: &str, line: u64, column: u64) -> usize { + let target_line = usize::try_from(line.saturating_sub(1)).unwrap_or(usize::MAX); + let target_column = usize::try_from(column.saturating_sub(1)).unwrap_or(usize::MAX); + let mut offset = 0usize; + for (idx, segment) in src.split_inclusive('\n').enumerate() { + if idx == target_line { + let line = segment.strip_suffix('\n').unwrap_or(segment); + let line = line.strip_suffix('\r').unwrap_or(line); + let column_offset = line + .char_indices() + .nth(target_column) + .map_or(line.len(), |(byte_idx, _)| byte_idx); + return offset + column_offset; + } + offset += segment.len(); + } + src.len() +} + +fn to_span(src: &ManifestSource, loc: Location) -> SourceSpan { + let at = location_to_index(src, loc); + let bytes = src.as_ref().as_bytes(); + let is_line_break = |b: u8| b == b'\n' || b == b'\r'; let (start, end) = match bytes.get(at) { - Some(&b) if b != b'\n' => (at, at + 1), + Some(&b) if !is_line_break(b) => (at, at + 1), _ => { - let start = if at > 0 && bytes.get(at - 1).is_some_and(|p| *p != b'\n') { + let start = if at > 0 && bytes.get(at - 1).is_some_and(|p| !is_line_break(*p)) { at - 1 } else { at @@ -39,16 +167,16 @@ struct YamlDiagnostic { message: String, } -fn has_tab_indent(src: &str, loc: Option) -> bool { +fn has_tab_indent(src: &ManifestSource, loc: Option) -> bool { let Some(loc) = loc else { return false }; - let line_idx = loc.line().saturating_sub(1); - let line = src.lines().nth(line_idx).unwrap_or(""); + let line_idx = usize::try_from(loc.line().saturating_sub(1)).unwrap_or(usize::MAX); + let line = src.as_ref().lines().nth(line_idx).unwrap_or(""); line.chars() .take_while(|c| c.is_whitespace()) .any(|c| c == '\t') } -fn hint_for(err_str: &str, src: &str, loc: Option) -> Option { +fn hint_for(err_str: &str, src: &ManifestSource, loc: Option) -> Option { if has_tab_indent(src, loc) { return Some("Use spaces for indentation; tabs are invalid in YAML.".into()); } @@ -80,11 +208,16 @@ pub enum ManifestError { }, } +/// Map a `serde_saphyr` YAML parse error into a [`miette`] diagnostic. +/// +/// The diagnostic includes the offending span when `serde_saphyr` reports byte +/// offsets, and attempts to attach contextual hints for common mistakes such as +/// tab indentation. #[must_use] pub fn map_yaml_error( err: YamlError, - src: &str, - name: &str, + src: &ManifestSource, + name: &ManifestName, ) -> Box { let loc = err.location(); let (line, col, span) = loc.map_or((1, 1, None), |l| { @@ -99,7 +232,7 @@ pub fn map_yaml_error( } Box::new(YamlDiagnostic { - src: NamedSource::new(name, src.to_string()), + src: NamedSource::new(name.as_ref(), src.as_ref().to_string()), span, help: hint, source: err, @@ -107,24 +240,136 @@ pub fn map_yaml_error( }) } +#[derive(Debug, Error, Diagnostic)] +#[error("{message}")] +#[diagnostic(code(netsuke::manifest::structure))] +struct DataDiagnostic { + #[source] + source: serde_json::Error, + message: String, +} + +/// Map a [`serde_json`] structural error into a diagnostic without a source +/// span. `serde_json` does not report byte offsets for data validation failures, +/// so the resulting diagnostic only carries the manifest name and error +/// message. +#[must_use] +pub fn map_data_error( + err: serde_json::Error, + name: &ManifestName, +) -> Box { + let message = format!( + "[netsuke::manifest::structure] manifest structure error in {}: {err}", + name.as_ref() + ); + Box::new(DataDiagnostic { + source: err, + message, + }) +} + #[cfg(test)] mod tests { use super::*; + use std::error::Error as StdError; #[test] fn map_yaml_error_includes_tab_hint() { - let src = "\tkey: value\n"; - let err = serde_yml::from_str::(src).expect_err("expected parse error"); - let diag = map_yaml_error(err, src, "test"); + let src = ManifestSource::from("\tkey: \"unterminated"); + let err = serde_saphyr::from_str::(src.as_ref()) + .expect_err("expected parse error"); + let name = ManifestName::from("test"); + let diag = map_yaml_error(err, &src, &name); let msg = diag.to_string(); assert!(msg.contains("Use spaces for indentation"), "message: {msg}"); } #[test] fn map_yaml_error_defaults_location_when_missing() { - let src = ":"; - let err = serde_yml::from_str::(src).expect_err("expected parse error"); - let diag = map_yaml_error(err, src, "test"); + let src = ManifestSource::from("foo: [1"); + let err = serde_saphyr::Error::Eof { + location: serde_saphyr::Location::UNKNOWN, + }; + let name = ManifestName::from("test"); + let diag = map_yaml_error(err, &src, &name); assert!(diag.to_string().contains("line 1, column 1")); } + + #[test] + fn map_yaml_error_span_skips_carriage_return() { + let src = ManifestSource::from("targets:\r\n - name: hi\r\n command echo\r\n"); + let err = serde_saphyr::from_str::(src.as_ref()) + .expect_err("expected parse error"); + let name = ManifestName::from("test"); + let diag = map_yaml_error(err, &src, &name); + let yaml_diag = (&*diag as &(dyn StdError + 'static)) + .downcast_ref::() + .expect("expected YAML diagnostic"); + let span = yaml_diag.span.expect("span present"); + let offset = span.offset(); + if let Some(byte) = src.as_ref().as_bytes().get(offset) { + assert_ne!(*byte, b'\r'); + } + } + + #[test] + fn location_to_index_handles_utf8() { + // café: 'é' is multi-byte + let src = ManifestSource::from("café: [\n"); + let err = serde_saphyr::from_str::(src.as_ref()) + .expect_err("expected parse error"); + let loc = err.location().expect("location present"); + let idx = location_to_index(&src, loc); + assert!(src.as_ref().is_char_boundary(idx)); + let e_idx = src.as_ref().find('é').expect("contains é"); + assert!(idx > e_idx, "index {idx} must follow é at {e_idx}"); + assert!(idx <= src.as_ref().len()); + } +} + +#[cfg(test)] +fn expected_offset(src: &str, column: u64) -> usize { + src.chars() + .take(usize::try_from(column.saturating_sub(1)).unwrap_or(usize::MAX)) + .map(char::len_utf8) + .sum() +} + +#[cfg(test)] +mod byte_index_tests { + use super::{byte_index_components, expected_offset}; + + #[test] + fn byte_index_accounts_for_multibyte_characters() { + let line = "emoji: 😀value"; + let column = 9; // just after the emoji, before the 'v'. + let offset = byte_index_components(line, 1, column); + assert_eq!(offset, expected_offset(line, column)); + } + + #[test] + fn byte_index_clamps_past_line_end() { + let line = "short"; + let column = 42; + let offset = byte_index_components(line, 1, column); + assert_eq!(offset, line.len()); + } + + #[test] + fn byte_index_advances_over_previous_lines() { + let src = "one\ntwo\nthree"; + let column = 3; // 'r' in "three" + let offset = byte_index_components(src, 3, column); + let expected = "one\ntwo\n".len() + expected_offset("three", column); + assert_eq!(offset, expected); + } + + #[test] + fn byte_index_handles_crlf_lines() { + let src = "one\r\ntwo\r\nthree"; + let column = 2; // 'w' in "two" + let offset = byte_index_components(src, 2, column); + let expected = "one\r\n".len() + expected_offset("two", column); + assert_eq!(offset, expected); + } } diff --git a/src/manifest/expand.rs b/src/manifest/expand.rs index caf882da..e6b5c45b 100644 --- a/src/manifest/expand.rs +++ b/src/manifest/expand.rs @@ -1,7 +1,8 @@ //! Expands manifest foreach directives into concrete targets. +use super::{ManifestMap, ManifestValue}; use anyhow::{Context, Result}; use minijinja::{Environment, context, value::Value}; -use serde_yml::{Mapping as YamlMapping, Value as YamlValue}; +use serde_json::{Number as JsonNumber, map::Entry}; /// Expand manifest targets defined with the `foreach` key. /// @@ -9,15 +10,15 @@ use serde_yml::{Mapping as YamlMapping, Value as YamlValue}; /// /// Returns an error when evaluating `foreach` or `when` expressions, when /// iteration values fail to serialise, or when target metadata is malformed. -pub fn expand_foreach(doc: &mut YamlValue, env: &Environment) -> Result<()> { - let Some(targets) = doc.get_mut("targets").and_then(|v| v.as_sequence_mut()) else { +pub fn expand_foreach(doc: &mut ManifestValue, env: &Environment) -> Result<()> { + let Some(targets) = doc.get_mut("targets").and_then(|v| v.as_array_mut()) else { return Ok(()); }; let mut expanded = Vec::new(); for target in std::mem::take(targets) { match target { - YamlValue::Mapping(map) => expanded.extend(expand_target(map, env)?), + ManifestValue::Object(map) => expanded.extend(expand_target(map, env)?), other => expanded.push(other), } } @@ -26,28 +27,27 @@ pub fn expand_foreach(doc: &mut YamlValue, env: &Environment) -> Result<()> { Ok(()) } -fn expand_target(map: YamlMapping, env: &Environment) -> Result> { - let foreach_key = YamlValue::String("foreach".into()); - if let Some(expr_val) = map.get(&foreach_key) { +fn expand_target(map: ManifestMap, env: &Environment) -> Result> { + if let Some(expr_val) = map.get("foreach") { let values = parse_foreach_values(expr_val, env)?; let mut items = Vec::new(); for (index, item) in values.into_iter().enumerate() { let mut clone = map.clone(); - clone.remove(&foreach_key); + clone.remove("foreach"); if !when_allows(&mut clone, env, &item, index)? { continue; } inject_iteration_vars(&mut clone, &item, index)?; - items.push(YamlValue::Mapping(clone)); + items.push(ManifestValue::Object(clone)); } Ok(items) } else { - Ok(vec![YamlValue::Mapping(map)]) + Ok(vec![ManifestValue::Object(map)]) } } -fn parse_foreach_values(expr_val: &YamlValue, env: &Environment) -> Result> { - if let Some(seq) = expr_val.as_sequence() { +fn parse_foreach_values(expr_val: &ManifestValue, env: &Environment) -> Result> { + if let Some(seq) = expr_val.as_array() { return Ok(seq.iter().cloned().map(Value::from_serialize).collect()); } let expr = as_str(expr_val, "foreach")?; @@ -59,13 +59,12 @@ fn parse_foreach_values(expr_val: &YamlValue, env: &Environment) -> Result Result { - let when_key = YamlValue::String("when".into()); - if let Some(when_val) = map.remove(&when_key) { + if let Some(when_val) = map.remove("when") { let expr = as_str(&when_val, "when")?; let result = eval_expression(env, "when", expr, context! { item, index })?; Ok(result.is_true()) @@ -74,30 +73,35 @@ fn when_allows( } } -fn inject_iteration_vars(map: &mut YamlMapping, item: &Value, index: usize) -> Result<()> { - let vars_key = YamlValue::String("vars".into()); - let mut vars = match map.remove(&vars_key) { - None => YamlMapping::new(), - Some(YamlValue::Mapping(m)) => m, - Some(other) => { - return Err(anyhow::anyhow!( - "target.vars must be a mapping, got: {other:?}" - )); +fn inject_iteration_vars(map: &mut ManifestMap, item: &Value, index: usize) -> Result<()> { + let vars_value = match map.entry("vars") { + Entry::Vacant(slot) => slot.insert(ManifestValue::Object(ManifestMap::new())), + Entry::Occupied(slot) => { + let value = slot.into_mut(); + match value { + ManifestValue::Object(_) => value, + other => { + return Err(anyhow::anyhow!( + "target.vars must be an object, got: {other:?}" + )); + } + } } }; + + let vars = vars_value + .as_object_mut() + .expect("vars entry ensured to be an object"); vars.insert( - YamlValue::String("item".into()), - serde_yml::to_value(item).context("serialise item")?, - ); - vars.insert( - YamlValue::String("index".into()), - YamlValue::Number(u64::try_from(index).expect("index overflow").into()), + "item".into(), + serde_json::to_value(item).context("serialise item")?, ); - map.insert(vars_key, YamlValue::Mapping(vars)); + let index_value = ManifestValue::Number(JsonNumber::from(index as u64)); + vars.insert("index".into(), index_value); Ok(()) } -fn as_str<'a>(value: &'a YamlValue, field: &str) -> Result<&'a str> { +fn as_str<'a>(value: &'a ManifestValue, field: &str) -> Result<&'a str> { value .as_str() .ok_or_else(|| anyhow::anyhow!("{field} must be a string expression")) @@ -118,7 +122,7 @@ mod tests { #[test] fn expand_foreach_expands_sequence_values() -> Result<()> { let env = Environment::new(); - let mut doc: YamlValue = serde_yml::from_str( + let mut doc: ManifestValue = serde_saphyr::from_str( "targets: - name: literal foreach: @@ -130,25 +134,22 @@ mod tests { expand_foreach(&mut doc, &env)?; let targets = doc .get("targets") - .and_then(|v| v.as_sequence()) + .and_then(|v| v.as_array()) .expect("targets sequence"); assert_eq!(targets.len(), 2); for (idx, target) in targets.iter().enumerate() { - let map = target.as_mapping().expect("target map"); - let vars_key = YamlValue::String("vars".into()); + let map = target.as_object().expect("target map"); let vars = map - .get(&vars_key) - .and_then(|v| v.as_mapping()) + .get("vars") + .and_then(|v| v.as_object()) .expect("vars map"); - let index_key = YamlValue::String("index".into()); - let index_val = vars.get(&index_key).expect("index value"); - let item_key = YamlValue::String("item".into()); - let item_val = vars.get(&item_key).expect("item value"); - let YamlValue::Number(index_num) = index_val else { + let index_val = vars.get("index").expect("index value"); + let item_val = vars.get("item").expect("item value"); + let ManifestValue::Number(index_num) = index_val else { panic!("index should be numeric: {index_val:?}"); }; assert_eq!(index_num.as_u64().expect("u64"), idx as u64); - let YamlValue::Number(item_num) = item_val else { + let ManifestValue::Number(item_num) = item_val else { panic!("item should be numeric: {item_val:?}"); }; assert_eq!(item_num.as_u64().expect("u64"), (idx + 1) as u64); @@ -159,7 +160,7 @@ mod tests { #[test] fn expand_foreach_applies_when_expression() -> Result<()> { let env = Environment::new(); - let mut doc: YamlValue = serde_yml::from_str( + let mut doc: ManifestValue = serde_saphyr::from_str( "targets: - name: literal foreach: '[1, 2, 3]' @@ -168,20 +169,18 @@ mod tests { expand_foreach(&mut doc, &env)?; let targets = doc .get("targets") - .and_then(|v| v.as_sequence()) + .and_then(|v| v.as_array()) .expect("targets sequence"); assert_eq!(targets.len(), 2); let indexes: Vec = targets .iter() .map(|target| { - let map = target.as_mapping().expect("target map"); - let vars_key = YamlValue::String("vars".into()); + let map = target.as_object().expect("target map"); let vars = map - .get(&vars_key) - .and_then(|v| v.as_mapping()) + .get("vars") + .and_then(|v| v.as_object()) .expect("vars map"); - let index_key = YamlValue::String("index".into()); - let YamlValue::Number(num) = vars.get(&index_key).expect("index value") else { + let ManifestValue::Number(num) = vars.get("index").expect("index value") else { panic!("index missing"); }; num.as_u64().expect("u64") @@ -190,4 +189,36 @@ mod tests { assert_eq!(indexes, vec![1, 2]); Ok(()) } + + #[test] + fn expand_foreach_preserves_object_key_order() -> Result<()> { + let env = Environment::new(); + let yaml = r"targets: + - name: literal + vars: + existing: keep + foreach: + - 1 + - 2 + when: 'true' + after: done +"; + let mut doc: ManifestValue = serde_saphyr::from_str(yaml)?; + expand_foreach(&mut doc, &env)?; + let targets = doc + .get("targets") + .and_then(|v| v.as_array()) + .expect("targets sequence"); + assert_eq!(targets.len(), 2); + for target in targets { + let map = target.as_object().expect("target object"); + let keys: Vec<&str> = map.keys().map(String::as_str).collect(); + assert_eq!( + keys, + ["name", "vars", "after"], + "key order should remain stable" + ); + } + Ok(()) + } } diff --git a/src/manifest/jinja_macros.rs b/src/manifest/jinja_macros.rs index 52ae9a63..b52ffc69 100644 --- a/src/manifest/jinja_macros.rs +++ b/src/manifest/jinja_macros.rs @@ -5,13 +5,13 @@ //! rendering environment so manifest templates can invoke them like built-in //! helpers. +use super::ManifestValue; use crate::ast::MacroDefinition; use anyhow::{Context, Result}; use minijinja::{ AutoEscape, Environment, Error, ErrorKind, State, value::{Kwargs, Object, Rest, Value}, }; -use serde_yml::Value as YamlValue; use std::{ mem, ptr::NonNull, @@ -97,12 +97,12 @@ pub(crate) fn register_macro( /// /// Returns an error if the YAML shape is invalid, any macro signature is /// malformed, or template compilation fails. -pub(crate) fn register_manifest_macros(doc: &YamlValue, env: &mut Environment) -> Result<()> { +pub(crate) fn register_manifest_macros(doc: &ManifestValue, env: &mut Environment) -> Result<()> { let Some(macros) = doc.get("macros").cloned() else { return Ok(()); }; - let defs: Vec = serde_yml::from_value(macros) + let defs: Vec = serde_json::from_value(macros) .context("macros must be a sequence of mappings with string signature/body")?; for (idx, def) in defs.iter().enumerate() { diff --git a/src/manifest/render.rs b/src/manifest/render.rs index 616c4eb8..0740128e 100644 --- a/src/manifest/render.rs +++ b/src/manifest/render.rs @@ -1,8 +1,8 @@ //! Renders manifest templates using `MiniJinja` before execution. +use super::ManifestValue; use crate::ast::{NetsukeManifest, Recipe, StringOrList, Target, Vars}; use anyhow::{Context, Result}; use minijinja::Environment; -use serde_yml::Value as YamlValue; /// Render manifest targets and rules by evaluating template expressions. /// @@ -67,7 +67,7 @@ fn render_target(target: &mut Target, env: &Environment) -> Result<()> { fn render_vars(vars: &mut Vars, env: &Environment) -> Result<()> { let snapshot = vars.clone(); for (key, value) in vars.iter_mut() { - if let YamlValue::String(s) = value { + if let ManifestValue::String(s) = value { *s = render_str_with(env, s, &snapshot, || format!("render var '{key}'"))?; } } @@ -107,11 +107,11 @@ mod tests { fn sample_manifest() -> Result { let mut target_vars = Vars::new(); - target_vars.insert("greet".into(), YamlValue::String("hello".into())); - target_vars.insert("subject".into(), YamlValue::String("world".into())); + target_vars.insert("greet".into(), ManifestValue::String("hello".into())); + target_vars.insert("subject".into(), ManifestValue::String("world".into())); target_vars.insert( "message".into(), - YamlValue::String("{{ greet }} {{ subject }}".into()), + ManifestValue::String("{{ greet }} {{ subject }}".into()), ); let target = Target { @@ -137,7 +137,10 @@ mod tests { }; let mut manifest_vars = Vars::new(); - manifest_vars.insert("message".into(), YamlValue::String("hello world".into())); + manifest_vars.insert( + "message".into(), + ManifestValue::String("hello world".into()), + ); Ok(NetsukeManifest { netsuke_version: Version::parse("1.0.0")?, diff --git a/src/manifest/tests.rs b/src/manifest/tests.rs index dcf2b2b9..4bdaee52 100644 --- a/src/manifest/tests.rs +++ b/src/manifest/tests.rs @@ -11,7 +11,6 @@ use minijinja::{ value::{Kwargs, Value}, }; use rstest::{fixture, rstest}; -use serde_yml::value::Mapping; fn render_with(env: &Environment, template: &str) -> AnyResult { Ok(env.render_str(template, ())?) @@ -122,12 +121,12 @@ fn register_macro_is_reusable(mut strict_env: Environment) { #[rstest] fn register_manifest_macros_validates_shape(mut strict_env: Environment) { - let mut mapping = Mapping::new(); + let mut mapping = ManifestMap::new(); mapping.insert( - YamlValue::from("macros"), - YamlValue::from(vec![YamlValue::from(42)]), + "macros".into(), + ManifestValue::Array(vec![ManifestValue::from(42)]), ); - let doc = YamlValue::Mapping(mapping); + let doc = ManifestValue::Object(mapping); let err = register_manifest_macros(&doc, &mut strict_env).expect_err("shape error"); assert!( err.to_string() @@ -136,14 +135,49 @@ fn register_manifest_macros_validates_shape(mut strict_env: Environment) { ); } +#[rstest] +fn register_manifest_macros_rejects_non_string_values(mut strict_env: Environment) { + let mut macro_mapping = ManifestMap::new(); + macro_mapping.insert("signature".into(), ManifestValue::from("greet(name)")); + macro_mapping.insert( + "body".into(), + ManifestValue::Number(serde_json::Number::from(42)), + ); + let macros = ManifestValue::Array(vec![ManifestValue::Object(macro_mapping)]); + let mut doc = ManifestMap::new(); + doc.insert("macros".into(), macros); + let doc = ManifestValue::Object(doc); + + let err = register_manifest_macros(&doc, &mut strict_env) + .expect_err("non-string macro body should fail"); + let msg = err.to_string(); + assert!(msg.contains("macros"), "unexpected error: {msg}"); +} + +#[test] +fn manifest_macros_with_non_string_keys_fail_to_parse() { + let yaml = r#" +macros: + - ? [not, string] + : signature: "greet(name)" + body: "Hello" +"#; + let err = serde_saphyr::from_str::(yaml).expect_err("expected parse failure"); + let msg = err.to_string(); + assert!( + msg.contains("expected string scalar") || msg.contains("key") || msg.contains("mapping"), + "{msg}" + ); +} + #[rstest] fn register_manifest_macros_requires_body(mut strict_env: Environment) { - let mut macro_mapping = Mapping::new(); - macro_mapping.insert(YamlValue::from("signature"), YamlValue::from("greet(name)")); - let macros = YamlValue::Sequence(vec![YamlValue::Mapping(macro_mapping)]); - let mut doc = Mapping::new(); - doc.insert(YamlValue::from("macros"), macros); - let doc = YamlValue::Mapping(doc); + let mut macro_mapping = ManifestMap::new(); + macro_mapping.insert("signature".into(), ManifestValue::from("greet(name)")); + let macros = ManifestValue::Array(vec![ManifestValue::Object(macro_mapping)]); + let mut doc = ManifestMap::new(); + doc.insert("macros".into(), macros); + let doc = ManifestValue::Object(doc); let err = register_manifest_macros(&doc, &mut strict_env).expect_err("missing macro body"); assert!(err.to_string().contains("body"), "{err}"); @@ -151,7 +185,7 @@ fn register_manifest_macros_requires_body(mut strict_env: Environment) { #[rstest] fn register_manifest_macros_supports_multiple(mut strict_env: Environment) { - let yaml = serde_yml::from_str::( + let yaml = serde_saphyr::from_str::( "macros:\n - signature: \"greet(name)\"\n body: |\n Hello {{ name }}\n - signature: \"shout(text)\"\n body: |\n {{ text | upper }}\n", ) .expect("yaml value"); diff --git a/tests/ast_tests.rs b/tests/ast_tests.rs index edc35953..80622052 100644 --- a/tests/ast_tests.rs +++ b/tests/ast_tests.rs @@ -81,6 +81,28 @@ fn unknown_fields() { assert!(parse_manifest(yaml).is_err()); } +#[test] +fn vars_section_must_be_object() { + let yaml = r#" + netsuke_version: "1.0.0" + vars: + - not: mapping + targets: + - name: hello + command: "echo hi" + "#; + let err = parse_manifest(yaml).expect_err("vars should be an object"); + let chain = err + .chain() + .map(ToString::to_string) + .collect::>() + .join("\n"); + assert!( + chain.contains("vars must be an object with string keys"), + "unexpected error message: {chain}" + ); +} + #[test] fn empty_lists_and_maps() { let yaml = r#" @@ -217,11 +239,33 @@ fn parses_macro_definitions() { assert_eq!(macro_def.signature, "greet(name)"); assert!(macro_def.body.contains("Hello {{ name }}")); - let serialised = serde_yml::to_string(&manifest.macros).expect("serialise macros"); + let serialised = serde_saphyr::to_string(&manifest.macros).expect("serialise macros"); assert!(serialised.contains("greet(name)")); assert!(serialised.contains("Hello {{ name }}")); } +#[test] +fn macro_serialization_with_special_characters_round_trips() { + let special_signature = "greet_special(name, emoji='😀', note=\"hi\")"; + let special_body = "Hello \"{{ name }}\"\nLine two with unicode 😀"; + + let macro_def = MacroDefinition { + signature: special_signature.to_string(), + body: special_body.to_string(), + }; + + let serialised = serde_saphyr::to_string(&vec![macro_def.clone()]).expect("serialise macros"); + assert!(serialised.contains("greet_special")); + assert!(serialised.contains("unicode 😀")); + + let deserialised: Vec = + serde_saphyr::from_str(&serialised).expect("deserialise macros"); + assert_eq!(deserialised.len(), 1); + let recovered = deserialised.first().expect("macro entry"); + assert_eq!(recovered.signature, macro_def.signature); + assert_eq!(recovered.body, macro_def.body); +} + #[test] fn macro_definition_rejects_invalid_types() { let yaml = r#" diff --git a/tests/manifest_jinja_tests.rs b/tests/manifest_jinja_tests.rs index 79f059e2..5c7d8e3e 100644 --- a/tests/manifest_jinja_tests.rs +++ b/tests/manifest_jinja_tests.rs @@ -449,7 +449,9 @@ fn foreach_vars_must_be_mapping() { let err = manifest::from_str(&yaml).expect_err("parse should fail"); assert!( - err.to_string().contains("target.vars must be a mapping"), + err.chain() + .map(ToString::to_string) + .any(|msg| msg.contains("target.vars must be an object")), "unexpected error: {err}" ); } diff --git a/tests/steps/manifest_steps.rs b/tests/steps/manifest_steps.rs index bfce808d..a3065abf 100644 --- a/tests/steps/manifest_steps.rs +++ b/tests/steps/manifest_steps.rs @@ -4,7 +4,7 @@ use crate::CliWorld; use cucumber::{given, then, when}; use netsuke::{ ast::{Recipe, StringOrList, Target}, - manifest, + manifest::{self, ManifestValue}, }; use std::collections::BTreeSet; use std::ffi::OsStr; @@ -296,7 +296,7 @@ fn assert_target_index(world: &CliWorld, index: usize, expected: usize) { let actual = target .vars .get(INDEX_KEY) - .and_then(serde_yml::Value::as_u64) + .and_then(ManifestValue::as_u64) .and_then(|n| usize::try_from(n).ok()) .unwrap_or_else(|| panic!("target {index} missing index")); assert_eq!(actual, expected, "unexpected index for target {index}"); diff --git a/tests/yaml_error_tests.rs b/tests/yaml_error_tests.rs index 4f1ad259..905bf230 100644 --- a/tests/yaml_error_tests.rs +++ b/tests/yaml_error_tests.rs @@ -15,13 +15,31 @@ fn normalise_report(report: &str) -> String { #[case( "targets:\n\t- name: test\n", &[ - "line 2, column 1", + "line 2, column 2", "Use spaces for indentation; tabs are invalid in YAML.", ], )] #[case( "targets:\n - name: hi\n command echo\n", - &["line 3", "expected ':'"], + &[ + "line 4, column 1", + "simple key expect ':'", + ], +)] +#[case( + concat!( + "netsuke_version: '1.0.0'\n", + "targets:\n", + " - name: root\n", + " command: echo\n", + " vars:\n", + " nested:\n", + " deeper: { key: value\n", + ), + &[ + "line 8, column 1", + "did not find expected ',' or '}'", + ], )] #[case( concat!( @@ -41,24 +59,24 @@ fn normalise_report(report: &str) -> String { "", &[ "manifest parse error", - "missing field", - "netsuke_version", + "manifest structure error", + "invalid type: null, expected struct NetsukeManifest", ], )] #[case( " \n ", &[ "manifest parse error", - "missing field", - "netsuke_version", + "manifest structure error", + "invalid type: null, expected struct NetsukeManifest", ], )] #[case( "# just a comment\n# another comment", &[ "manifest parse error", - "missing field", - "netsuke_version", + "manifest structure error", + "invalid type: null, expected struct NetsukeManifest", ], )] // No location information should default to the start of the file.