Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ itertools = "0.12"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["fmt"] }
serde_json = "1"
serde_json_canonicalizer = "0.3"
tempfile = "3.8.0"

[lints.clippy]
Expand Down
44 changes: 22 additions & 22 deletions docs/netsuke-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,8 @@ use std::path::{Path, PathBuf};
/// The complete, static build graph.
pub struct BuildGraph {
/// A map of all unique actions (rules) in the build.
/// The key is a hash of the action's properties to enable deduplication.
/// The key is a hash of a canonical JSON serialisation of the action's
/// properties to enable deduplication.
pub actions: HashMap<String, Action>,

/// A map of all target files to be built. The key is the output path.
Expand Down Expand Up @@ -1109,9 +1110,10 @@ action identifier and carries the `phony` and `always` flags verbatim from the
manifest. No Ninja specific placeholders are stored in the IR to keep the
representation portable.

- Actions are deduplicated using a SHA-256 hash of their recipe and metadata.
Identical commands therefore share the same identifier which keeps the IR
deterministic for snapshot tests.
- Actions are deduplicated using a SHA-256 hash of a canonical JSON
serialisation of their recipe and metadata. Identical commands therefore
share the same identifier which keeps the IR deterministic for snapshot
tests.
- Multiple rule references in a single target are not yet supported. The IR
generator reports `IrGenError::MultipleRules` when encountered.
- Duplicate output files are rejected. Attempting to define the same output
Expand Down Expand Up @@ -1261,29 +1263,27 @@ libraries.[^27]
Rust

```rust
// In src/ir.rs
use thiserror::Error;
use std::path::PathBuf;
// In src/ir.rs use thiserror::Error; use std::path::PathBuf;

#[derive(Debug, Error)]
pub enum IrGenError {
#[error("rule not found: {rule_name} for target {target_name}")]
RuleNotFound {
target_name: String,
rule_name: String,
},
#[error("rule '{rule_name}' referenced by target '{target_name}' was not found")]
RuleNotFound { target_name: String, rule_name: String },

#[error("multiple rules for target '{target_name}': {rules:?}")]
MultipleRules { target_name: String, rules: Vec<String> },

#[error("No rules specified for target {target_name}")]
EmptyRule { target_name: String },

#[error("duplicate target outputs: {outputs:?}")]
DuplicateOutput { outputs: Vec<String> },

#[error("circular dependency detected: {cycle:?}")]
CircularDependency {
cycle: Vec<PathBuf>,
},

#[error("dependency not found: {dependency_name} for target {target_name}")]
DependencyNotFound {
target_name: String,
dependency_name: String,
},
}
CircularDependency { cycle: Vec<PathBuf> },

#[error("failed to serialise action: {0}")]
ActionSerialisation(#[from] serde_json::Error), }
```

- `anyhow`: This crate will be used in the main application logic (`main.rs`)
Expand Down
93 changes: 28 additions & 65 deletions src/hasher.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//! Action hashing utilities.
//!
//! This module provides the [`ActionHasher`] type used to compute a stable
//! SHA-256 digest for [`Action`] definitions. The hash is used to deduplicate
//! identical actions when generating the build graph.
//! [`ActionHasher`] computes stable SHA-256 digests for [`Action`] definitions.
//! Each action is serialised to canonical JSON before hashing, ensuring
//! identical actions map to the same digest even as the struct evolves.
//!
//! # Examples
//!
Expand All @@ -23,80 +23,43 @@
//! assert!(!hash.is_empty());
//! ```

use itoa::Buffer;
use sha2::{Digest, Sha256};

use crate::ast::{Recipe, StringOrList};
use crate::ir::Action;
use serde_json_canonicalizer::to_writer;
use std::io::{self, Write};

/// Computes stable digests for [`Action`] definitions.
pub struct ActionHasher;

impl ActionHasher {
/// Calculate the hash of an [`Action`].
#[must_use]
pub fn hash(action: &Action) -> String {
let mut hasher = Sha256::new();
Self::hash_recipe(&mut hasher, &action.recipe);
Self::hash_optional_fields(&mut hasher, action);
format!("{:x}", hasher.finalize())
}

fn hash_recipe(hasher: &mut Sha256, recipe: &Recipe) {
match recipe {
Recipe::Command { command } => {
hasher.update(b"cmd");
Self::update_with_len(hasher, command.as_bytes());
}
Recipe::Script { script } => {
hasher.update(b"scr");
Self::update_with_len(hasher, script.as_bytes());
}
Recipe::Rule { rule } => {
hasher.update(b"rule");
Self::hash_rule_reference(hasher, rule);
}
}
}
struct DigestWriter<'a, D: Digest>(&'a mut D);

fn hash_optional_fields(hasher: &mut Sha256, action: &Action) {
Self::hash_optional_string(hasher, action.description.as_ref());
Self::hash_optional_string(hasher, action.depfile.as_ref());
Self::hash_optional_string(hasher, action.deps_format.as_ref());
Self::hash_optional_string(hasher, action.pool.as_ref());
hasher.update(if action.restat { b"1" } else { b"0" });
impl<D: Digest> Write for DigestWriter<'_, D> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.0.update(buf);
Ok(buf.len())
}

fn hash_rule_reference(hasher: &mut Sha256, rule: &StringOrList) {
match rule {
StringOrList::String(r) => Self::update_with_len(hasher, r.as_bytes()),
StringOrList::List(list) => {
// Preserve the original sequence so that different orders
// generate distinct hashes.
for r in list {
Self::update_with_len(hasher, r.as_bytes());
}
}
StringOrList::Empty => {}
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}

fn hash_optional_string(hasher: &mut Sha256, value: Option<&String>) {
match value {
Some(v) => {
hasher.update(b"1");
Self::update_with_len(hasher, v.as_bytes());
}
None => hasher.update(b"0"),
impl ActionHasher {
/// Calculate the hash of an [`Action`].
///
/// Returns a lowercase hex-encoded SHA-256 of the action's canonical JSON.
///
/// # Errors
///
/// Returns an error if the action cannot be serialised to JSON.
pub fn hash(action: &Action) -> Result<String, serde_json::Error> {
let mut hasher = Sha256::new();
{
// Canonical JSON: compact formatting with sorted keys.
let mut writer = DigestWriter(&mut hasher);
to_writer(action, &mut writer)?;
}
}

fn update_with_len(hasher: &mut Sha256, bytes: &[u8]) {
// Write the length prefix into a stack buffer to avoid heap allocation.
let mut buf = Buffer::new();
let len_str = buf.format(bytes.len());
hasher.update(len_str.as_bytes());
hasher.update(b":");
hasher.update(bytes);
Ok(format!("{:x}", hasher.finalize()))
}
}
26 changes: 18 additions & 8 deletions src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,18 @@ pub struct BuildGraph {
}

/// A reusable command analogous to a Ninja rule.
#[derive(Debug, Clone, PartialEq)]
use serde::Serialize;

#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct Action {
pub recipe: Recipe,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub depfile: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deps_format: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub pool: Option<String>,
pub restat: bool,
}
Expand Down Expand Up @@ -96,6 +102,9 @@ pub enum IrGenError {

#[error("circular dependency detected: {cycle:?}")]
CircularDependency { cycle: Vec<PathBuf> },

#[error("failed to serialise action: {0}")]
ActionSerialisation(#[from] serde_json::Error),
}

impl BuildGraph {
Expand All @@ -109,7 +118,7 @@ impl BuildGraph {
let mut graph = Self::default();
let mut rule_map = HashMap::new();

Self::process_rules(manifest, &mut graph.actions, &mut rule_map);
Self::process_rules(manifest, &mut graph.actions, &mut rule_map)?;
Self::process_targets(manifest, &mut graph.actions, &mut graph.targets, &rule_map)?;
Self::process_defaults(manifest, &mut graph.default_targets);

Expand All @@ -122,11 +131,12 @@ impl BuildGraph {
manifest: &NetsukeManifest,
actions: &mut HashMap<String, Action>,
rule_map: &mut HashMap<String, String>,
) {
) -> Result<(), IrGenError> {
for rule in &manifest.rules {
let hash = register_action(actions, rule.recipe.clone(), rule.description.clone());
let hash = register_action(actions, rule.recipe.clone(), rule.description.clone())?;
rule_map.insert(rule.name.clone(), hash);
}
Ok(())
}

fn process_targets(
Expand All @@ -141,7 +151,7 @@ impl BuildGraph {
let action_id = match &target.recipe {
Recipe::Rule { rule } => resolve_rule(rule, rule_map, &target_name)?,
Recipe::Command { .. } | Recipe::Script { .. } => {
register_action(actions, target.recipe.clone(), None)
register_action(actions, target.recipe.clone(), None)?
}
};

Expand Down Expand Up @@ -183,7 +193,7 @@ fn register_action(
actions: &mut HashMap<String, Action>,
recipe: Recipe,
description: Option<String>,
) -> String {
) -> Result<String, IrGenError> {
let action = Action {
recipe,
description,
Expand All @@ -192,9 +202,9 @@ fn register_action(
pool: None,
restat: false,
};
let hash = ActionHasher::hash(&action);
let hash = ActionHasher::hash(&action).map_err(IrGenError::ActionSerialisation)?;
actions.entry(hash.clone()).or_insert(action);
hash
Ok(hash)
}

fn map_string_or_list<T, F>(sol: &StringOrList, f: F) -> Vec<T>
Expand Down
15 changes: 8 additions & 7 deletions tests/hasher_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use rstest::rstest;
pool: None,
restat: false,
},
"a0f6e2cd3b9b3cee0bf94a7d53bce56cf4178dfe907bb1cb7c832f47846baf38"
"0fe3670f0746dcec34768df158d814ac099e416b6045e7e213d0aabd6aa761cb"
)]
#[case(
Action {
Expand All @@ -26,7 +26,7 @@ use rstest::rstest;
pool: None,
restat: true,
},
"cf8e97357820acf6f66037dcf977ee36c88c2811d60342db30c99507d24a0d60"
"9b0289f92ea0e374eecdaf50c8c9080547635aaff38d07fe2a278af6894c3207"
)]
#[case(
Action {
Expand All @@ -37,7 +37,7 @@ use rstest::rstest;
pool: None,
restat: false,
},
"69f72afccc2aa5a709af1139a9c7ef5f4f72e57cf5376e6c043e575f68f2ef8d"
"9733343b512253e636fbacfea40ef4f5771d49409fcda026aec7c7ce2f5405ec"
)]
#[case(
Action {
Expand All @@ -48,7 +48,7 @@ use rstest::rstest;
pool: None,
restat: false,
},
"c28b5c0b7f20bf1093cbab990976b904268f173413f54b7007166b2c02f498f3"
"9b53c477668394e59eca5b34416ef7ad7fb5799ca96dd283e81d7acda6c56006"
)]
#[case(
Action {
Expand All @@ -59,7 +59,7 @@ use rstest::rstest;
pool: None,
restat: false,
},
"28adc0857704aa0c54c3bc624cb2dc70c101c3936987b20ae520a20319f591c2"
"57023b1c00f7daf410d3d2077346e38014d3612c278aadef73a8484c94bdcb77"
)]
// Order of rule names influences the digest.
#[case(
Expand All @@ -71,8 +71,9 @@ use rstest::rstest;
pool: None,
restat: true,
},
"b93ff0102089f1f1a3fe9eec082b59d5aab58271a40724ccdfdaade6a68fe340"
"d5f1a262a95b75db3a7a79a5855eb27b6b430833e7ba93538502a16ebd03f50b"
)]
fn hash_action_is_stable(#[case] action: Action, #[case] expected: &str) {
assert_eq!(ActionHasher::hash(&action), expected);
let digest = ActionHasher::hash(&action).expect("hash action");
assert_eq!(digest, expected);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
source: tests/ninja_snapshot_tests.rs
expression: ninja_content
---
rule ca3067639652d0018b982cd2fc8262e3a02f4404f60148b8493de0f656d9b1a2
rule d3cc8be04150cb4e2d9ccbdbe94cf9f2e8ade54bb4701b8faf99cafeb456a75d
command = python3 -c 'import os,sys; open(sys.argv[1],"a").close()' $out

build out/a: ca3067639652d0018b982cd2fc8262e3a02f4404f60148b8493de0f656d9b1a2 in/a
build out/a: d3cc8be04150cb4e2d9ccbdbe94cf9f2e8ade54bb4701b8faf99cafeb456a75d in/a
Loading