From e6e12b03e14037ae935f4bbc29f24dff6a165442 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 10:46:13 -0400 Subject: [PATCH 01/24] feat(relay/git): object-store backend with CAS pointer + content-addressed packs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces `crates/sprout-relay/src/api/git/store.rs` — the object-store half of the git-on-object-storage protocol (docs/git-on-object-storage.md). API surface: - `put_pack` / `put_manifest` — create-only (`If-None-Match: *`), content-addressed, idempotent. Maps A1: writes that would overwrite a content-addressed key (i.e. identical bytes) succeed silently. - `get` / `get_verified` — `get_verified` enforces A1 detectability by re-hashing returned bytes against the expected SHA-256 digest. - `get_pointer` — HEAD+GET, returns `Option<(ETag, Bytes)>` (None on 404). - `put_pointer(key, body, Precond)` — the CAS primitive (§Push step 7). Returns `CasOutcome::Won(ETag)` or `CasOutcome::LostRace`; 412 is a *semantic* result, not an error. Sharp edge documented at module level: `rust-s3` 0.37 + `fail-on-err` (shared with `sprout-media`) makes non-2xx arrive as `S3Error::HttpFailWithBody`. `classify_cas` maps 412 → `LostRace` and bubbles everything else. Verified empirically against MinIO in `probe::probe_412_surfacing` and `probe::probe_full_roundtrip` — both gated on `SPROUT_GIT_S3_PROBE=1` so CI doesn't depend on a live bucket. Empirically verified: - `If-None-Match: *` collision → `HttpFailWithBody(412, _)` on MinIO. - `If-Match` wrong ETag → `HttpFailWithBody(412, _)`. - MinIO returns the new object ETag in the PUT response, so callers can chain `Won → IfMatch → Won` without a HEAD round-trip. The full chain is asserted in `probe_full_roundtrip`. - `get_verified` rejects digest mismatch. 196 relay tests green (unfiltered). `cargo clippy --all-targets -D warnings` and `cargo fmt --check` clean. Probe tests run on demand only. Co-authored-with: Eva (ticket), Quinn (CAS interface coordination) Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- Cargo.lock | 1 + crates/sprout-relay/Cargo.toml | 1 + crates/sprout-relay/src/api/git/mod.rs | 1 + crates/sprout-relay/src/api/git/store.rs | 438 +++++++++++++++++++++++ 4 files changed, 441 insertions(+) create mode 100644 crates/sprout-relay/src/api/git/store.rs diff --git a/Cargo.lock b/Cargo.lock index ce1606e13..f3298c6e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4225,6 +4225,7 @@ dependencies = [ "nostr", "rand 0.10.1", "redis", + "rust-s3", "serde", "serde_json", "serde_yaml", diff --git a/crates/sprout-relay/Cargo.toml b/crates/sprout-relay/Cargo.toml index 997e5de47..9d60d8734 100644 --- a/crates/sprout-relay/Cargo.toml +++ b/crates/sprout-relay/Cargo.toml @@ -46,6 +46,7 @@ base64 = "0.22" sprout-sdk = { workspace = true } sprout-workflow = { workspace = true, features = ["reqwest"] } sprout-media = { workspace = true } +s3 = { version = "0.37", package = "rust-s3", default-features = false, features = ["tokio-rustls-tls", "fail-on-err", "tags"] } bytes = "1" infer = "0.19" serde_yaml = { workspace = true } diff --git a/crates/sprout-relay/src/api/git/mod.rs b/crates/sprout-relay/src/api/git/mod.rs index 538122bc4..a406c8aff 100644 --- a/crates/sprout-relay/src/api/git/mod.rs +++ b/crates/sprout-relay/src/api/git/mod.rs @@ -24,6 +24,7 @@ use crate::state::AppState; pub mod hook; pub mod policy; +pub mod store; pub mod transport; pub use transport::git_router; diff --git a/crates/sprout-relay/src/api/git/store.rs b/crates/sprout-relay/src/api/git/store.rs new file mode 100644 index 000000000..2e13adaaa --- /dev/null +++ b/crates/sprout-relay/src/api/git/store.rs @@ -0,0 +1,438 @@ +//! Object-store backend for git-on-object-storage. +//! +//! Implements the create-only, content-addressed write discipline (axiom A1) +//! and the CAS pointer swap (axiom A3) described in +//! `docs/git-on-object-storage.md`. +//! +//! ## The 412 sharp edge +//! +//! `rust-s3 = "0.37"` is shared across the workspace with `sprout-media`. The +//! `fail-on-err` Cargo feature is unified ON across the build graph, which +//! means non-2xx responses arrive here as `S3Error::HttpFailWithBody(code, +//! body)` *before* the caller sees `ResponseData`. The pointer-CAS path treats +//! the precondition-failure status (412) as a *semantic* result (`LostRace`), +//! not an error — see `classify_cas`. Empirically verified against MinIO in +//! `probe::probe_412_surfacing`. +//! +//! ## Content addressing (A1) +//! +//! Pack and manifest keys are the SHA-256 of their bytes. Writes use +//! `If-None-Match: *` so the same key is never overwritten. Readers verify +//! object bytes against the expected digest on `get_verified`; any mismatch is +//! *detectable*, not silent — that is what A1's "create-only + content-address" +//! discipline buys us, independent of bucket immutability features. + +#![allow(dead_code)] // wired in by the push path in a follow-up commit + +use bytes::Bytes; +use s3::creds::Credentials; +use s3::error::S3Error; +use s3::{Bucket, Region}; +use sha2::{Digest, Sha256}; + +/// Opaque object-store ETag (used for `If-Match` on pointer CAS). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ETag(pub String); + +/// Precondition for `put_pointer`. +#[derive(Debug, Clone)] +pub enum Precond { + /// Create-only: succeed iff the pointer does not yet exist. + IfNoneMatchStar, + /// CAS: succeed iff the current ETag matches. + IfMatch(ETag), +} + +/// Result of a CAS pointer write. +/// +/// `LostRace` is *not* an error — it is the standard outcome of a losing CAS +/// and must be classified here so callers can decide retry vs. non-ff. On +/// `Won`, the returned `ETag` is the PUT response's ETag and can be fed +/// directly into the next `IfMatch` round (verified empirically against MinIO +/// in `probe::probe_full_roundtrip`). If a backend ever omits the response +/// ETag, the value will be empty and the next `IfMatch(empty)` will be +/// rejected as a normal `LostRace` — no silent corruption, only a forced +/// retry via `get_pointer`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CasOutcome { + /// CAS succeeded; the new pointer ETag (suitable for the next `IfMatch`). + Won(ETag), + /// CAS lost the race (server returned 412). + LostRace, +} + +/// Errors that are *actually* errors — `LostRace` is not one. +#[derive(Debug, thiserror::Error)] +pub enum StoreError { + /// The requested key does not exist. + #[error("object not found: {0}")] + NotFound(String), + /// A1 detectability fired: the bytes at `key` do not hash to `expected`. + #[error("digest mismatch on {key}: expected {expected}, got {actual}")] + DigestMismatch { + /// Object key that was read. + key: String, + /// Digest the caller expected (the content-addressed key). + expected: String, + /// Digest computed from the returned bytes. + actual: String, + }, + /// Any other backend / transport error. + #[error("s3 backend error: {0}")] + Backend(#[from] S3Error), +} + +/// Object-store client for git refs. +pub struct GitStore { + bucket: Box, +} + +impl GitStore { + /// Build a client against an S3-compatible endpoint (e.g. MinIO). + /// + /// Uses path-style addressing for MinIO compatibility; AWS S3 accepts both. + pub fn new( + endpoint: &str, + access_key: &str, + secret_key: &str, + bucket_name: &str, + ) -> Result { + let region = Region::Custom { + region: "us-east-1".into(), + endpoint: endpoint.into(), + }; + let creds = Credentials::new(Some(access_key), Some(secret_key), None, None, None) + .map_err(|e| StoreError::Backend(S3Error::Credentials(e)))?; + let bucket = Bucket::new(bucket_name, region, creds) + .map_err(StoreError::Backend)? + .with_path_style(); + Ok(Self { bucket }) + } + + /// Create-only write of a content-addressed object (pack or manifest). + /// + /// Idempotent: a collision on the same content-addressed key is treated as + /// success, because the bytes are guaranteed to match (the key is their + /// digest). This is the §Push step 2 / step 6 primitive. + pub async fn put_immutable( + &self, + key: &str, + bytes: &[u8], + content_type: &str, + ) -> Result<(), StoreError> { + let mut headers = axum::http::HeaderMap::new(); + headers.insert(axum::http::header::IF_NONE_MATCH, "*".parse().unwrap()); + match self + .bucket + .put_object_with_content_type_and_headers(key, bytes, content_type, Some(headers)) + .await + { + Ok(resp) if (200..300).contains(&resp.status_code()) => Ok(()), + // 412 on a content-addressed key means the key already holds the same + // bytes (by construction). Treat as success — A1 is preserved. + Err(S3Error::HttpFailWithBody(412, _)) => Ok(()), + Ok(resp) => Err(StoreError::Backend(S3Error::HttpFailWithBody( + resp.status_code(), + "unexpected status".into(), + ))), + Err(e) => Err(StoreError::Backend(e)), + } + } + + /// `put_immutable` for a pack object (content-type `application/x-git-pack`). + pub async fn put_pack(&self, key: &str, bytes: &[u8]) -> Result<(), StoreError> { + self.put_immutable(key, bytes, "application/x-git-pack") + .await + } + + /// `put_immutable` for a manifest object (JSON). + pub async fn put_manifest(&self, key: &str, bytes: &[u8]) -> Result<(), StoreError> { + self.put_immutable(key, bytes, "application/json").await + } + + /// GET an object without digest verification. + /// + /// Prefer `get_verified` for pack/manifest reads — that is what enforces A1 + /// detectability. This raw `get` exists for the pointer (whose key is not a + /// digest). + pub async fn get(&self, key: &str) -> Result { + match self.bucket.get_object(key).await { + Ok(resp) => Ok(Bytes::from(resp.to_vec())), + Err(S3Error::HttpFailWithBody(404, _)) => Err(StoreError::NotFound(key.into())), + Err(e) => Err(StoreError::Backend(e)), + } + } + + /// GET an object and verify its bytes hash to `expected_digest` (hex SHA-256). + /// + /// This is the read-side enforcement of A1 — any deviation from the + /// content-addressed invariant becomes a `DigestMismatch` error, never a + /// silent corruption. + pub async fn get_verified( + &self, + key: &str, + expected_digest: &str, + ) -> Result { + let bytes = self.get(key).await?; + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let actual = hex::encode(hasher.finalize()); + if actual != expected_digest { + return Err(StoreError::DigestMismatch { + key: key.into(), + expected: expected_digest.into(), + actual, + }); + } + Ok(bytes) + } + + /// GET the pointer object, returning its ETag and bytes. + /// + /// Returns `Ok(None)` if the pointer does not exist (first-push case). + pub async fn get_pointer(&self, key: &str) -> Result, StoreError> { + // `get_object` does not surface the response ETag in 0.37; do a HEAD first + // to capture the ETag, then GET. We accept the extra round-trip — pointer + // objects are tiny and the read path already pays one network hop for the + // manifest after this. Alternative would be `request_with_url`-level access, + // which entangles us with rust-s3 internals. + let head = match self.bucket.head_object(key).await { + Ok((info, status)) if (200..300).contains(&status) => info, + Ok((_, 404)) => return Ok(None), + Err(S3Error::HttpFailWithBody(404, _)) => return Ok(None), + Ok((_, status)) => { + return Err(StoreError::Backend(S3Error::HttpFailWithBody( + status, + "unexpected head status".into(), + ))) + } + Err(e) => return Err(StoreError::Backend(e)), + }; + let etag = head + .e_tag + .ok_or_else(|| StoreError::Backend(S3Error::HttpFail))?; + let bytes = self.get(key).await?; + Ok(Some((ETag(etag), bytes))) + } + + /// Write the pointer under a precondition (§Push step 7 — the CAS). + /// + /// Returns `CasOutcome::LostRace` on 412 (the standard losing outcome). + /// On `CasOutcome::Won`, the returned `ETag` is read from the response + /// headers — callers use it as the `If-Match` value for the next CAS. + pub async fn put_pointer( + &self, + key: &str, + body: &[u8], + precond: Precond, + ) -> Result { + let mut headers = axum::http::HeaderMap::new(); + match &precond { + Precond::IfNoneMatchStar => { + headers.insert(axum::http::header::IF_NONE_MATCH, "*".parse().unwrap()); + } + Precond::IfMatch(ETag(tag)) => { + headers.insert( + axum::http::header::IF_MATCH, + tag.parse().map_err(|_| { + StoreError::Backend(S3Error::HttpFailWithBody( + 400, + format!("invalid etag {tag}"), + )) + })?, + ); + } + } + let result = self + .bucket + .put_object_with_content_type_and_headers(key, body, "application/json", Some(headers)) + .await; + Self::classify_cas(result) + } + + /// Map a rust-s3 PUT outcome to a `CasOutcome`. + /// + /// 412 → `LostRace`. 2xx → `Won(etag)` (etag read from response headers, + /// empty if missing — callers must tolerate empty etag and re-HEAD if they + /// need it strictly). Everything else bubbles as `StoreError::Backend`. + fn classify_cas( + result: Result, + ) -> Result { + match result { + Ok(resp) if (200..300).contains(&resp.status_code()) => { + let headers = resp.headers(); + let etag = headers + .get("etag") + .or_else(|| headers.get("ETag")) + .cloned() + .unwrap_or_default(); + Ok(CasOutcome::Won(ETag(etag))) + } + Err(S3Error::HttpFailWithBody(412, _)) => Ok(CasOutcome::LostRace), + Ok(resp) => Err(StoreError::Backend(S3Error::HttpFailWithBody( + resp.status_code(), + "unexpected status".into(), + ))), + Err(e) => Err(StoreError::Backend(e)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn classify_cas_412_is_lost_race() { + let r = Err(S3Error::HttpFailWithBody(412, "PreconditionFailed".into())); + assert_eq!(GitStore::classify_cas(r).unwrap(), CasOutcome::LostRace); + } + + #[test] + fn classify_cas_other_4xx_bubbles() { + let r = Err(S3Error::HttpFailWithBody(403, "AccessDenied".into())); + assert!(matches!( + GitStore::classify_cas(r), + Err(StoreError::Backend(S3Error::HttpFailWithBody(403, _))) + )); + } +} + +#[cfg(test)] +mod probe { + //! Empirical probe of rust-s3 + `fail-on-err` + MinIO surfacing of 412. + //! + //! Run manually: + //! SPROUT_GIT_S3_PROBE=1 cargo test -p sprout-relay --lib \ + //! api::git::store::probe -- --nocapture --test-threads=1 + //! + //! Pre-req: `docker compose up minio` and the `sprout-git` bucket exists. + + use super::*; + + fn probe_enabled() -> bool { + std::env::var("SPROUT_GIT_S3_PROBE").as_deref() == Ok("1") + } + + fn store() -> GitStore { + GitStore::new( + "http://localhost:9000", + "sprout_dev", + "sprout_dev_secret", + "sprout-git", + ) + .expect("connect minio") + } + + fn sha256_hex(b: &[u8]) -> String { + let mut h = Sha256::new(); + h.update(b); + hex::encode(h.finalize()) + } + + #[tokio::test] + async fn probe_412_surfacing() { + if !probe_enabled() { + eprintln!("skipping: set SPROUT_GIT_S3_PROBE=1 to run against live MinIO"); + return; + } + let st = store(); + let key = format!("probe/cas-{}.txt", uuid::Uuid::new_v4()); + let mut hdrs = axum::http::HeaderMap::new(); + hdrs.insert(axum::http::header::IF_NONE_MATCH, "*".parse().unwrap()); + let r1 = st + .bucket + .put_object_with_content_type_and_headers( + &key, + b"first", + "text/plain", + Some(hdrs.clone()), + ) + .await; + assert!((200..300).contains(&r1.expect("first ok").status_code())); + let r2 = st + .bucket + .put_object_with_content_type_and_headers(&key, b"second", "text/plain", Some(hdrs)) + .await; + assert!(matches!(r2, Err(S3Error::HttpFailWithBody(412, _)))); + let _ = st.bucket.delete_object(&key).await; + } + + #[tokio::test] + async fn probe_full_roundtrip() { + if !probe_enabled() { + return; + } + let st = store(); + + // 1. put_immutable + get_verified happy path. + let bytes = b"hello, git on object store".to_vec(); + let key = format!("packs/{}", sha256_hex(&bytes)); + st.put_pack(&key, &bytes).await.expect("put_pack"); + let got = st + .get_verified(&key, &sha256_hex(&bytes)) + .await + .expect("verified read"); + assert_eq!(&got[..], &bytes[..]); + + // 2. put_immutable is idempotent (no error on second call). + st.put_pack(&key, &bytes).await.expect("idempotent"); + + // 3. get_verified detects corruption — wrong expected digest fails. + let bogus = "0".repeat(64); + let err = st.get_verified(&key, &bogus).await.unwrap_err(); + assert!(matches!(err, StoreError::DigestMismatch { .. })); + + // 4. pointer lifecycle: get_pointer (None) → put_pointer(IfNoneMatchStar) + // → get_pointer (Some) → put_pointer(IfMatch correct) → put_pointer(IfMatch wrong, LostRace). + let pkey = format!("pointers/{}.json", uuid::Uuid::new_v4()); + assert!(st.get_pointer(&pkey).await.expect("get none").is_none()); + + let p1 = br#"{"manifest":"d1"}"#; + let r = st + .put_pointer(&pkey, p1, Precond::IfNoneMatchStar) + .await + .expect("first cas"); + let e1 = match r { + CasOutcome::Won(e) => e, + CasOutcome::LostRace => panic!("first INM* should win"), + }; + eprintln!("Won.etag from PUT response: {:?}", e1.0); + + // Second INM* must lose. + let r = st + .put_pointer(&pkey, b"{}", Precond::IfNoneMatchStar) + .await + .expect("second cas"); + assert_eq!(r, CasOutcome::LostRace, "second INM* must lose"); + + // Chain CAS directly on the PUT-returned ETag (no HEAD round-trip). + // MinIO returns the ETag in the PUT response; this proves callers can + // chain `Won → IfMatch → Won` without re-reading the pointer. + assert!(!e1.0.is_empty(), "MinIO should populate PUT response ETag"); + let p2 = br#"{"manifest":"d2"}"#; + let r = st + .put_pointer(&pkey, p2, Precond::IfMatch(e1.clone())) + .await + .expect("cas2"); + let e2 = match r { + CasOutcome::Won(e) => e, + CasOutcome::LostRace => panic!("IfMatch with fresh etag should win"), + }; + + // Stale IfMatch (reuse the *first* etag, which has been superseded) → LostRace. + let r = st + .put_pointer(&pkey, b"{}", Precond::IfMatch(e1)) + .await + .expect("cas3"); + assert_eq!(r, CasOutcome::LostRace, "stale IfMatch must lose"); + + // get_pointer's etag matches the most recent PUT-returned etag. + let (etag_now, _body) = st.get_pointer(&pkey).await.expect("get").expect("exists"); + assert_eq!(etag_now, e2, "get_pointer etag matches PUT-response etag"); + + // Cleanup. + let _ = st.bucket.delete_object(&pkey).await; + let _ = st.bucket.delete_object(&key).await; + } +} From 4b8345fc2491bec407ea7a8ce4f5e8f56f10aed5 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 10:53:50 -0400 Subject: [PATCH 02/24] fix(relay/git/store): atomic pointer GET, constructive A1, conformance probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three review hits from Max, Perci, and Mari, plus Mari's probe API: 1. **`get_pointer` is now a single GET** that reads the ETag from the GET response headers — not HEAD+GET. The two-call version could straddle a concurrent writer (HEAD's ETag and GET's body describing different pointer versions); a caller predicating `IfMatch(etag)` on the HEAD's ETag would be using a version it never actually read. Verified empirically that MinIO populates `etag` on GET responses (`probe::probe_get_exposes_etag`). 2. **`put_pack`/`put_manifest` no longer accept a key parameter.** The key is derived inside `put_immutable` as `/`. This makes the idempotency claim *constructive* rather than trusted: a 412 collision means the stored bytes' digest equals these bytes' digest (the key), so by A1 the stored bytes equal these bytes. A buggy caller passing the wrong key can no longer silently break A1 detectability on read. `put_pack(bytes) -> Result` returns the key. 3. **`run_conformance_probe(ProbeConfig) -> Result`** — public method per Mari's requested shape. Four phases: `sequential` (A1+A2), `if_match_race` (A3), `if_none_match_race` (A1+A3 on the create-only primitive `put_pack`/`put_manifest` use), `etag_consistency` (token round-trips opaquely). Failures carry `phase`, `round`, `key`, `reason`. Defaults: width=32, rounds=3. Tied to the same `put_immutable` path as the production write methods so the gate tests the load-bearing A1 primitive, not just pointer creation. Verified end-to-end against live MinIO at 8-way×2-round in `probe::probe_conformance`. 198 relay tests green (was 196 — 2 new pure unit tests on `classify_cas`), 6 store probe tests green when `SPROUT_GIT_S3_PROBE=1` against MinIO. clippy/fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/store.rs | 476 ++++++++++++++++++++--- 1 file changed, 432 insertions(+), 44 deletions(-) diff --git a/crates/sprout-relay/src/api/git/store.rs b/crates/sprout-relay/src/api/git/store.rs index 2e13adaaa..16ed95e94 100644 --- a/crates/sprout-relay/src/api/git/store.rs +++ b/crates/sprout-relay/src/api/git/store.rs @@ -80,6 +80,60 @@ pub enum StoreError { /// Any other backend / transport error. #[error("s3 backend error: {0}")] Backend(#[from] S3Error), + /// Conformance probe failed — backend does not satisfy A1/A2/A3. + #[error(transparent)] + Probe(ProbeFailure), +} + +/// Configuration for `GitStore::run_conformance_probe`. +/// +/// Defaults: 32-way concurrency, 3 rounds. The probe is a deployment gate — +/// run at startup, fail-closed. See `docs/git-on-object-storage.md` §Conformance. +#[derive(Debug, Clone)] +pub struct ProbeConfig { + /// How many tasks race per round. Must be ≥ 2. + pub race_width: usize, + /// How many rounds to run each race phase. + pub race_rounds: usize, +} + +impl Default for ProbeConfig { + fn default() -> Self { + Self { + race_width: 32, + race_rounds: 3, + } + } +} + +/// Returned on a successful probe run. Kept intentionally thin — failure +/// detail lives in `ProbeFailure` (the error variant). +#[derive(Debug, Clone)] +pub struct ProbeReport { + /// Concurrency used. + pub race_width: usize, + /// Rounds executed per race phase. + pub race_rounds: usize, +} + +/// Failure carrying the phase that failed plus enough context to diagnose. +#[derive(Debug, thiserror::Error)] +#[error("conformance probe failed in phase '{phase}' (round {round}, key {key}): {reason}")] +pub struct ProbeFailure { + /// One of `sequential`, `if_match_race`, `if_none_match_race`, `etag_consistency`. + pub phase: &'static str, + /// Round index (0-based) when this phase ran multiple rounds. + pub round: usize, + /// Object key the failure concerns (or `""` if not key-specific). + pub key: String, + /// Human-readable detail. + pub reason: String, +} + +impl From for StoreError { + fn from(f: ProbeFailure) -> Self { + StoreError::Probe(f) + } } /// Object-store client for git refs. @@ -109,28 +163,43 @@ impl GitStore { Ok(Self { bucket }) } + /// Compute the hex SHA-256 of `bytes`. The content-addressed key. + pub fn content_key(prefix: &str, bytes: &[u8]) -> String { + let mut h = Sha256::new(); + h.update(bytes); + format!("{prefix}/{}", hex::encode(h.finalize())) + } + /// Create-only write of a content-addressed object (pack or manifest). /// - /// Idempotent: a collision on the same content-addressed key is treated as - /// success, because the bytes are guaranteed to match (the key is their - /// digest). This is the §Push step 2 / step 6 primitive. - pub async fn put_immutable( + /// **The caller does not choose the key.** It is derived as + /// `/` inside this method. This makes the + /// idempotency claim *constructive*: a 412 collision means the key already + /// holds bytes whose digest equals `sha256(these bytes)`, so by A1 + /// (content-addressing) the stored bytes equal these bytes. Without this + /// enforcement, a buggy caller passing the wrong key would silently break + /// A1 detectability on read. + /// + /// Returns the key under which the object was written. + async fn put_immutable( &self, - key: &str, + prefix: &str, bytes: &[u8], content_type: &str, - ) -> Result<(), StoreError> { + ) -> Result { + let key = Self::content_key(prefix, bytes); let mut headers = axum::http::HeaderMap::new(); headers.insert(axum::http::header::IF_NONE_MATCH, "*".parse().unwrap()); match self .bucket - .put_object_with_content_type_and_headers(key, bytes, content_type, Some(headers)) + .put_object_with_content_type_and_headers(&key, bytes, content_type, Some(headers)) .await { - Ok(resp) if (200..300).contains(&resp.status_code()) => Ok(()), - // 412 on a content-addressed key means the key already holds the same - // bytes (by construction). Treat as success — A1 is preserved. - Err(S3Error::HttpFailWithBody(412, _)) => Ok(()), + Ok(resp) if (200..300).contains(&resp.status_code()) => Ok(key), + // 412 on a content-addressed key means the key already holds the + // same bytes (by construction — the key is the digest). A1 is + // preserved without a defensive GET. + Err(S3Error::HttpFailWithBody(412, _)) => Ok(key), Ok(resp) => Err(StoreError::Backend(S3Error::HttpFailWithBody( resp.status_code(), "unexpected status".into(), @@ -139,15 +208,16 @@ impl GitStore { } } - /// `put_immutable` for a pack object (content-type `application/x-git-pack`). - pub async fn put_pack(&self, key: &str, bytes: &[u8]) -> Result<(), StoreError> { - self.put_immutable(key, bytes, "application/x-git-pack") + /// Write a pack object. Returns the content-addressed key (`packs/`). + pub async fn put_pack(&self, bytes: &[u8]) -> Result { + self.put_immutable("packs", bytes, "application/x-git-pack") .await } - /// `put_immutable` for a manifest object (JSON). - pub async fn put_manifest(&self, key: &str, bytes: &[u8]) -> Result<(), StoreError> { - self.put_immutable(key, bytes, "application/json").await + /// Write a manifest object. Returns the content-addressed key (`manifests/`). + pub async fn put_manifest(&self, bytes: &[u8]) -> Result { + self.put_immutable("manifests", bytes, "application/json") + .await } /// GET an object without digest verification. @@ -187,32 +257,37 @@ impl GitStore { Ok(bytes) } - /// GET the pointer object, returning its ETag and bytes. + /// GET the pointer object, returning its ETag and bytes *from the same + /// response* — atomic snapshot. /// /// Returns `Ok(None)` if the pointer does not exist (first-push case). + /// + /// **Why one GET, not HEAD-then-GET.** A separate HEAD followed by GET + /// can straddle a concurrent writer: the HEAD's ETag and the GET's body + /// would describe different pointer versions, and a caller that later + /// did `IfMatch(etag_from_head)` would be predicating on a version it + /// never actually read. Reading both fields from the GET response keeps + /// the snapshot consistent (A2: a single GET observes a single committed + /// object). Verified empirically in `probe::probe_get_exposes_etag`. pub async fn get_pointer(&self, key: &str) -> Result, StoreError> { - // `get_object` does not surface the response ETag in 0.37; do a HEAD first - // to capture the ETag, then GET. We accept the extra round-trip — pointer - // objects are tiny and the read path already pays one network hop for the - // manifest after this. Alternative would be `request_with_url`-level access, - // which entangles us with rust-s3 internals. - let head = match self.bucket.head_object(key).await { - Ok((info, status)) if (200..300).contains(&status) => info, - Ok((_, 404)) => return Ok(None), - Err(S3Error::HttpFailWithBody(404, _)) => return Ok(None), - Ok((_, status)) => { - return Err(StoreError::Backend(S3Error::HttpFailWithBody( - status, - "unexpected head status".into(), - ))) + match self.bucket.get_object(key).await { + Ok(resp) => { + let headers = resp.headers(); + let etag = headers + .get("etag") + .or_else(|| headers.get("ETag")) + .cloned() + .ok_or_else(|| { + StoreError::Backend(S3Error::HttpFailWithBody( + 500, + "GET pointer: response missing ETag".into(), + )) + })?; + Ok(Some((ETag(etag), Bytes::from(resp.to_vec())))) } - Err(e) => return Err(StoreError::Backend(e)), - }; - let etag = head - .e_tag - .ok_or_else(|| StoreError::Backend(S3Error::HttpFail))?; - let bytes = self.get(key).await?; - Ok(Some((ETag(etag), bytes))) + Err(S3Error::HttpFailWithBody(404, _)) => Ok(None), + Err(e) => Err(StoreError::Backend(e)), + } } /// Write the pointer under a precondition (§Push step 7 — the CAS). @@ -276,6 +351,277 @@ impl GitStore { Err(e) => Err(StoreError::Backend(e)), } } + + /// Conformance probe — deployment gate per `docs/git-on-object-storage.md` + /// §Conformance. Fail-closed: any phase failure returns + /// `StoreError::Probe(ProbeFailure)` and the caller (relay startup) MUST + /// refuse to come up. + /// + /// Four phases: + /// + /// 1. **`sequential`** — write a content-addressed object, read it back, + /// verify bytes. Tests A1 (content-addressed write) + A2 + /// (read-after-write). + /// 2. **`if_match_race`** — `race_width` parallel `put_pointer` calls + /// predicated on the same ETag. Exactly one must `Won`; the rest must + /// `LostRace`. Tests A3. + /// 3. **`if_none_match_race`** — `race_width` parallel create-only writes + /// against the same digest-shaped key (the same `put_immutable` path + /// `put_pack`/`put_manifest` use). Tests A1 + A3 on the create-only + /// primitive. Counts raw HTTP outcomes (exactly one 2xx, rest 412) and + /// asserts final stored bytes equal the racers' bytes. + /// 4. **`etag_consistency`** — round-trip an ETag from `get_pointer` into + /// `put_pointer(IfMatch(...))` and assert `Won`. Tests that the token + /// is opaque and stable between read and CAS. + pub async fn run_conformance_probe(&self, cfg: ProbeConfig) -> Result { + use std::sync::Arc; + if cfg.race_width < 2 || cfg.race_rounds == 0 { + return Err(ProbeFailure { + phase: "config", + round: 0, + key: String::new(), + reason: format!( + "race_width must be ≥ 2 and race_rounds ≥ 1, got {}/{}", + cfg.race_width, cfg.race_rounds + ), + } + .into()); + } + let nonce = uuid::Uuid::new_v4(); + let pointer_key = format!("probe/pointer-{nonce}"); + + // -- Phase 1: sequential -------------------------------------------------- + for round in 0..cfg.race_rounds { + let body = format!("probe-sequential-{nonce}-{round}").into_bytes(); + let key = self.put_pack(&body).await?; + let got = self + .get_verified(&key, &Self::digest_hex(&body)) + .await + .map_err(|e| ProbeFailure { + phase: "sequential", + round, + key: key.clone(), + reason: format!("read-after-write failed: {e}"), + })?; + if got[..] != body[..] { + return Err(ProbeFailure { + phase: "sequential", + round, + key, + reason: "read-after-write bytes mismatch".into(), + } + .into()); + } + } + + // -- Phase 2: if_match_race ----------------------------------------------- + // Seed the pointer with a known value, then race N IfMatch updates. + let seed = b"probe-pointer-seed".to_vec(); + let _ = self.bucket.delete_object(&pointer_key).await; // ignore 404 + let seed_outcome = self + .put_pointer(&pointer_key, &seed, Precond::IfNoneMatchStar) + .await?; + let mut etag = match seed_outcome { + CasOutcome::Won(e) => e, + CasOutcome::LostRace => { + return Err(ProbeFailure { + phase: "if_match_race", + round: 0, + key: pointer_key, + reason: "could not seed pointer (lost race against self)".into(), + } + .into()) + } + }; + for round in 0..cfg.race_rounds { + let arc_self: Arc<&Self> = Arc::new(self); + let mut tasks = Vec::with_capacity(cfg.race_width); + for i in 0..cfg.race_width { + let me = Arc::clone(&arc_self); + let pkey = pointer_key.clone(); + let et = etag.clone(); + let body = format!("round={round},racer={i},nonce={nonce}").into_bytes(); + tasks.push(async move { me.put_pointer(&pkey, &body, Precond::IfMatch(et)).await }); + } + let outcomes = futures_util::future::join_all(tasks).await; + let mut winners = 0usize; + let mut new_etag: Option = None; + for (i, outcome) in outcomes.into_iter().enumerate() { + match outcome { + Ok(CasOutcome::Won(e)) => { + winners += 1; + new_etag = Some(e); + } + Ok(CasOutcome::LostRace) => {} + Err(e) => { + return Err(ProbeFailure { + phase: "if_match_race", + round, + key: pointer_key, + reason: format!("racer {i}: {e}"), + } + .into()) + } + } + } + if winners != 1 { + return Err(ProbeFailure { + phase: "if_match_race", + round, + key: pointer_key, + reason: format!("expected exactly 1 winner, got {winners}"), + } + .into()); + } + etag = new_etag.expect("winner exists"); + } + + // -- Phase 3: if_none_match_race ------------------------------------------ + // N parallel create-only writes targeting the same digest-shaped key. + // Bypass `put_immutable`'s 412-swallow to count raw outcomes. + for round in 0..cfg.race_rounds { + let body = format!("probe-inm-race-{nonce}-{round}").into_bytes(); + let key = Self::content_key("probe/inm-race", &body); + // Clean slate. + let _ = self.bucket.delete_object(&key).await; + let arc_self: Arc<&Self> = Arc::new(self); + let mut tasks = Vec::with_capacity(cfg.race_width); + for _ in 0..cfg.race_width { + let me = Arc::clone(&arc_self); + let k = key.clone(); + let b = body.clone(); + tasks.push(async move { me.put_immutable_raw(&k, &b).await }); + } + let results = futures_util::future::join_all(tasks).await; + let mut twos = 0usize; + let mut twelves = 0usize; + for (i, r) in results.into_iter().enumerate() { + match r { + Ok(200..=299) => twos += 1, + Ok(412) => twelves += 1, + Ok(code) => { + return Err(ProbeFailure { + phase: "if_none_match_race", + round, + key, + reason: format!("racer {i}: unexpected status {code}"), + } + .into()) + } + Err(e) => { + return Err(ProbeFailure { + phase: "if_none_match_race", + round, + key, + reason: format!("racer {i}: {e}"), + } + .into()) + } + } + } + if twos != 1 || twelves != cfg.race_width - 1 { + return Err(ProbeFailure { + phase: "if_none_match_race", + round, + key, + reason: format!( + "expected 1×2xx + {}×412, got {twos}×2xx + {twelves}×412", + cfg.race_width - 1 + ), + } + .into()); + } + // Final bytes must equal the racers' bytes (content-addressed: any + // winner stored the same bytes by construction). + let read = self + .get_verified(&key, &Self::digest_hex(&body)) + .await + .map_err(|e| ProbeFailure { + phase: "if_none_match_race", + round, + key: key.clone(), + reason: format!("post-race verified read failed: {e}"), + })?; + if read[..] != body[..] { + return Err(ProbeFailure { + phase: "if_none_match_race", + round, + key, + reason: "post-race bytes mismatch".into(), + } + .into()); + } + } + + // -- Phase 4: etag_consistency -------------------------------------------- + // GET pointer, take its ETag, CAS-update with that ETag, expect Won. + // Proves the token round-trips opaquely between read and write. + for round in 0..cfg.race_rounds { + let (et, _bytes) = + self.get_pointer(&pointer_key) + .await? + .ok_or_else(|| ProbeFailure { + phase: "etag_consistency", + round, + key: pointer_key.clone(), + reason: "pointer vanished mid-probe".into(), + })?; + let body = format!("probe-etag-{round}-{nonce}").into_bytes(); + match self + .put_pointer(&pointer_key, &body, Precond::IfMatch(et)) + .await? + { + CasOutcome::Won(_) => {} + CasOutcome::LostRace => { + return Err(ProbeFailure { + phase: "etag_consistency", + round, + key: pointer_key, + reason: "GET-ETag → IfMatch chain lost race in a quiescent probe".into(), + } + .into()) + } + } + } + + // Cleanup pointer (immutable probe writes accumulate by design; the + // bucket's retention policy handles them, not the probe). + let _ = self.bucket.delete_object(&pointer_key).await; + + Ok(ProbeReport { + race_width: cfg.race_width, + race_rounds: cfg.race_rounds, + }) + } + + /// Helper: hex SHA-256 of bytes. + fn digest_hex(bytes: &[u8]) -> String { + let mut h = Sha256::new(); + h.update(bytes); + hex::encode(h.finalize()) + } + + /// Raw create-only PUT exposed for the probe's race-counting phase, where + /// we need to *see* 412 outcomes rather than swallow them as idempotent. + /// Returns the HTTP status code on success-or-412; bubbles other errors. + async fn put_immutable_raw(&self, key: &str, bytes: &[u8]) -> Result { + let mut headers = axum::http::HeaderMap::new(); + headers.insert(axum::http::header::IF_NONE_MATCH, "*".parse().unwrap()); + match self + .bucket + .put_object_with_content_type_and_headers( + key, + bytes, + "application/octet-stream", + Some(headers), + ) + .await + { + Ok(resp) => Ok(resp.status_code()), + Err(S3Error::HttpFailWithBody(412, _)) => Ok(412), + Err(e) => Err(StoreError::Backend(e)), + } + } } #[cfg(test)] @@ -365,18 +711,19 @@ mod probe { } let st = store(); - // 1. put_immutable + get_verified happy path. + // 1. put_pack returns the content-addressed key; get_verified happy path. let bytes = b"hello, git on object store".to_vec(); - let key = format!("packs/{}", sha256_hex(&bytes)); - st.put_pack(&key, &bytes).await.expect("put_pack"); + let key = st.put_pack(&bytes).await.expect("put_pack"); + assert_eq!(key, format!("packs/{}", sha256_hex(&bytes))); let got = st .get_verified(&key, &sha256_hex(&bytes)) .await .expect("verified read"); assert_eq!(&got[..], &bytes[..]); - // 2. put_immutable is idempotent (no error on second call). - st.put_pack(&key, &bytes).await.expect("idempotent"); + // 2. put_pack is idempotent — second call returns the same key. + let key2 = st.put_pack(&bytes).await.expect("idempotent"); + assert_eq!(key, key2); // 3. get_verified detects corruption — wrong expected digest fails. let bogus = "0".repeat(64); @@ -435,4 +782,45 @@ mod probe { let _ = st.bucket.delete_object(&pkey).await; let _ = st.bucket.delete_object(&key).await; } + + /// End-to-end conformance probe against MinIO. This is the same code path + /// that will run at relay startup as a deployment gate. + #[tokio::test] + async fn probe_conformance() { + if !probe_enabled() { + return; + } + let st = store(); + let report = st + .run_conformance_probe(ProbeConfig { + race_width: 8, + race_rounds: 2, + }) + .await + .expect("conformance probe"); + eprintln!("✓ probe report: {report:?}"); + assert_eq!(report.race_width, 8); + assert_eq!(report.race_rounds, 2); + } + + /// Quick probe: confirm rust-s3's `get_object` exposes ETag on the response. + #[tokio::test] + async fn probe_get_exposes_etag() { + if !probe_enabled() { + return; + } + let st = store(); + let key = format!("probe/etag-{}.txt", uuid::Uuid::new_v4()); + st.bucket + .put_object_with_content_type(&key, b"hi", "text/plain") + .await + .expect("put"); + let resp = st.bucket.get_object(&key).await.expect("get"); + let headers = resp.headers(); + eprintln!("GET headers: {headers:?}"); + let etag = headers.get("etag").or_else(|| headers.get("ETag")).cloned(); + assert!(etag.is_some(), "GET response must carry ETag header"); + eprintln!("ETag from GET: {etag:?}"); + let _ = st.bucket.delete_object(&key).await; + } } From ac106d8e9c4741b40335bff5f72be2f6b6960c59 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 10:56:09 -0400 Subject: [PATCH 03/24] fix(relay/git/store): fail closed on missing CAS-response ETag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Perci's tightening note: a `put_pointer` PUT that returns 2xx without an ETag header is not a `Won` — it's a non-conforming backend. The previous `unwrap_or_default()` would hand the caller `ETag("")`, which the next CAS round would treat as a normal `LostRace`. That hides backend mis-conformance behind a single retry instead of failing at admission. Now: missing ETag on a 2xx CAS PUT → `StoreError::Backend(...)` with a diagnostic body. The conformance probe's `etag_consistency` and `if_match_race` phases exercise the path on every round, so a non-conforming backend fails the deployment gate; in production, a mid-stream ETag dropout fails the affected push rather than corrupting chain semantics. 198 relay tests + 6 store probes still green; clippy/fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/store.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/crates/sprout-relay/src/api/git/store.rs b/crates/sprout-relay/src/api/git/store.rs index 16ed95e94..e91451fa1 100644 --- a/crates/sprout-relay/src/api/git/store.rs +++ b/crates/sprout-relay/src/api/git/store.rs @@ -49,10 +49,9 @@ pub enum Precond { /// and must be classified here so callers can decide retry vs. non-ff. On /// `Won`, the returned `ETag` is the PUT response's ETag and can be fed /// directly into the next `IfMatch` round (verified empirically against MinIO -/// in `probe::probe_full_roundtrip`). If a backend ever omits the response -/// ETag, the value will be empty and the next `IfMatch(empty)` will be -/// rejected as a normal `LostRace` — no silent corruption, only a forced -/// retry via `get_pointer`. +/// in `probe::probe_full_roundtrip`). A backend that succeeds on the CAS PUT +/// but omits the response ETag is treated as non-conforming and fails the +/// operation with `StoreError::Backend` — see `classify_cas`. #[derive(Debug, Clone, PartialEq, Eq)] pub enum CasOutcome { /// CAS succeeded; the new pointer ETag (suitable for the next `IfMatch`). @@ -340,7 +339,19 @@ impl GitStore { .get("etag") .or_else(|| headers.get("ETag")) .cloned() - .unwrap_or_default(); + .ok_or_else(|| { + // Fail closed: a CAS that we can't chain (because the + // backend didn't return an ETag) is not a `Won` — it's + // a non-conforming backend. The conformance probe will + // catch this; in production we'd rather refuse than + // hand the caller `ETag("")` and force-fail the next CAS. + StoreError::Backend(S3Error::HttpFailWithBody( + resp.status_code(), + "CAS succeeded but response missing ETag header \ + (backend does not satisfy ETag-token consistency)" + .into(), + )) + })?; Ok(CasOutcome::Won(ETag(etag))) } Err(S3Error::HttpFailWithBody(412, _)) => Ok(CasOutcome::LostRace), From 1262dfd464ffaa605930f087e3f66331fad9fb14 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:03:27 -0400 Subject: [PATCH 04/24] feat(relay/git): manifest schema with canonical JSON serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `crates/sprout-relay/src/api/git/manifest.rs` — the shared type that the push path (Quinn's `cas_publish`) constructs and the read path (Dawn's `hydrate`) consumes. Sibling module so neither owns the schema. Schema (per channel agreement with Sami, Max, Perci, Quinn): ```rust pub struct Manifest { pub version: u32, // = 1 pub head: String, // "refs/heads/main" unprefixed pub refs: BTreeMap, pub packs: Vec, // store keys, sorted pub parent: Option, // previous manifest digest } ``` - HEAD is *published* ref state, not derived at read time. A hydrate-time default would let a clone advertise a branch the writer didn't intend (Inv_RefEffectApplied). - `canonical_bytes()` is deterministic: BTreeMap iterates sorted, packs sorted + deduped defensively, struct field order pinned by declaration, `serde_json::to_vec` emits no whitespace. So `key == sha256(bytes)` is reproducible — A1 stays mechanical. - `from_bytes` rejects unknown `version`. Six unit tests pin: round-trip equality, byte stability under permuted ref insertion order, packs sort+dedup, version rejection, no-parent first push, and a fully byte-pinned canonical string ("any unintended serialization change shifts every manifest digest" → loud failure, not silent). 204/204 relay tests; clippy/fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/manifest.rs | 193 ++++++++++++++++++++ crates/sprout-relay/src/api/git/mod.rs | 1 + 2 files changed, 194 insertions(+) create mode 100644 crates/sprout-relay/src/api/git/manifest.rs diff --git a/crates/sprout-relay/src/api/git/manifest.rs b/crates/sprout-relay/src/api/git/manifest.rs new file mode 100644 index 000000000..2471e889f --- /dev/null +++ b/crates/sprout-relay/src/api/git/manifest.rs @@ -0,0 +1,193 @@ +//! Manifest schema for git-on-object-storage. +//! +//! The manifest is the immutable, content-addressed snapshot of a repo's +//! published state at a single point in time (§System Model). A push commits +//! by CAS-installing a new pointer to a new manifest digest; readers resolve +//! pointer → manifest → packs to hydrate (§Read). +//! +//! ## Canonical serialization +//! +//! `Manifest::canonical_bytes()` produces a deterministic byte sequence so +//! that `key == sha256(bytes)` (A1 detectability): +//! +//! - `refs: BTreeMap` — sorted ref names at serialization. +//! - `packs: Vec` — sorted by `canonical_bytes()` before writing. +//! - Struct field order: `version`, `head`, `refs`, `packs`, `parent` +//! (matches declaration; serde emits in this order). +//! - `serde_json::to_vec` — no whitespace. +//! +//! Round-trip + byte-stability are pinned in unit tests. +//! +//! ## Why HEAD is in the manifest +//! +//! HEAD is *published* ref state (§Implementation Correspondence), not a +//! read-time default. Deriving it ("default to main, fallback to first head") +//! would let a clone advertise a different default branch than the writer +//! intended — `Inv_RefEffectApplied` would not hold. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +/// Current manifest schema version. Bump on incompatible change. +pub const MANIFEST_VERSION: u32 = 1; + +/// A repository's published state. +/// +/// Field order is significant for canonical JSON — do not reorder. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Manifest { + /// Schema version. Must equal [`MANIFEST_VERSION`] on read. + pub version: u32, + /// Symbolic HEAD ref, unprefixed (e.g. `"refs/heads/main"`). No `"ref: "` + /// — that's a Git-protocol formatting concern, applied at hydrate time. + pub head: String, + /// All refs in the published state: refname → 40-char hex oid. + pub refs: BTreeMap, + /// Store keys of every pack covering `refs`. Sorted ascending — + /// `canonical_bytes` enforces this on serialize. + pub packs: Vec, + /// Digest of the manifest this one supersedes, or `None` for the first + /// push to a fresh repo. + pub parent: Option, +} + +/// Errors from manifest (de)serialization. +#[derive(Debug, thiserror::Error)] +pub enum ManifestError { + /// `serde_json` failed to encode or decode. + #[error("manifest serde: {0}")] + Serde(#[from] serde_json::Error), + /// On-the-wire manifest carried a `version` we don't understand. + #[error("unsupported manifest version {got} (expected {expected})")] + UnsupportedVersion { + /// The version we read. + got: u32, + /// The version we support. + expected: u32, + }, +} + +impl Manifest { + /// Serialize to canonical bytes suitable for `put_manifest`. + /// + /// Sorts `packs` defensively (writer is responsible for keeping them + /// sorted, but a misuse should not silently break content-addressing). + pub fn canonical_bytes(&self) -> Result, ManifestError> { + let mut owned = self.clone(); + owned.packs.sort(); + owned.packs.dedup(); + Ok(serde_json::to_vec(&owned)?) + } + + /// Parse from bytes; reject unknown schema versions. + pub fn from_bytes(bytes: &[u8]) -> Result { + let m: Manifest = serde_json::from_slice(bytes)?; + if m.version != MANIFEST_VERSION { + return Err(ManifestError::UnsupportedVersion { + got: m.version, + expected: MANIFEST_VERSION, + }); + } + Ok(m) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample() -> Manifest { + let mut refs = BTreeMap::new(); + refs.insert( + "refs/heads/main".into(), + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".into(), + ); + refs.insert( + "refs/heads/feature".into(), + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".into(), + ); + Manifest { + version: MANIFEST_VERSION, + head: "refs/heads/main".into(), + refs, + packs: vec!["packs/cc".into(), "packs/dd".into()], + parent: Some("ee".repeat(32)), + } + } + + #[test] + fn canonical_bytes_round_trip() { + let m = sample(); + let bytes = m.canonical_bytes().unwrap(); + let back = Manifest::from_bytes(&bytes).unwrap(); + assert_eq!(m, back); + } + + #[test] + fn canonical_bytes_byte_stable_across_ref_insertion_order() { + // Insert refs in opposite orders; canonical bytes must match because + // BTreeMap iterates sorted. + let mut a = sample(); + a.refs.clear(); + a.refs.insert("refs/heads/zzz".into(), "11".repeat(20)); + a.refs.insert("refs/heads/aaa".into(), "22".repeat(20)); + let mut b = sample(); + b.refs.clear(); + b.refs.insert("refs/heads/aaa".into(), "22".repeat(20)); + b.refs.insert("refs/heads/zzz".into(), "11".repeat(20)); + assert_eq!(a.canonical_bytes().unwrap(), b.canonical_bytes().unwrap()); + } + + #[test] + fn canonical_bytes_sorts_and_dedups_packs() { + let mut m = sample(); + m.packs = vec!["packs/dd".into(), "packs/cc".into(), "packs/dd".into()]; + let bytes = m.canonical_bytes().unwrap(); + let back = Manifest::from_bytes(&bytes).unwrap(); + assert_eq!(back.packs, vec!["packs/cc", "packs/dd"]); + } + + #[test] + fn rejects_unknown_version() { + let mut m = sample(); + m.version = 999; + let bytes = serde_json::to_vec(&m).unwrap(); + let err = Manifest::from_bytes(&bytes).unwrap_err(); + assert!(matches!( + err, + ManifestError::UnsupportedVersion { got: 999, .. } + )); + } + + #[test] + fn first_push_has_no_parent() { + let mut m = sample(); + m.parent = None; + let bytes = m.canonical_bytes().unwrap(); + let back = Manifest::from_bytes(&bytes).unwrap(); + assert!(back.parent.is_none()); + } + + /// Pin the exact byte shape so any unintended change to serialization + /// (field order, whitespace, key ordering) triggers a failure rather than + /// silently shifting the manifest digest. + #[test] + fn canonical_bytes_pinned() { + let mut refs = BTreeMap::new(); + refs.insert("refs/heads/main".into(), "a".repeat(40)); + let m = Manifest { + version: 1, + head: "refs/heads/main".into(), + refs, + packs: vec!["packs/p1".into()], + parent: None, + }; + let bytes = m.canonical_bytes().unwrap(); + let s = std::str::from_utf8(&bytes).unwrap(); + assert_eq!( + s, + r#"{"version":1,"head":"refs/heads/main","refs":{"refs/heads/main":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"},"packs":["packs/p1"],"parent":null}"# + ); + } +} diff --git a/crates/sprout-relay/src/api/git/mod.rs b/crates/sprout-relay/src/api/git/mod.rs index a406c8aff..9783a7941 100644 --- a/crates/sprout-relay/src/api/git/mod.rs +++ b/crates/sprout-relay/src/api/git/mod.rs @@ -23,6 +23,7 @@ use tower_http::limit::RequestBodyLimitLayer; use crate::state::AppState; pub mod hook; +pub mod manifest; pub mod policy; pub mod store; pub mod transport; From 162d3ec62eb0c94a5f5439bb52eb3d55dfded466 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:24:38 -0400 Subject: [PATCH 05/24] feat(relay/git/manifest): pub validate() + shared is_safe_refname/is_hex_oid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sami flagged on cas_publish review: write side filters malformed OIDs but not refnames or detached HEAD; read side (hydrate) rejects exactly those. Net = "valid CAS, un-clone-able output" — manifest commits successfully, every subsequent clone 5xx'es. Tyler's 9/10 bar fails. Fail at write, not at every read. Solution: lift the validation predicates that already live in `hydrate.rs` into `manifest.rs` as `pub` items, plus add `Manifest::validate()` for one-shot write-side checking. - `pub fn is_safe_refname(s) -> bool` — moved from hydrate, identical predicate. Symmetric protection: writers reject before put_manifest, readers double-check defense-in-depth on hydrate. - `pub fn is_hex_oid(s) -> bool` — same move, same shape (accepts SHA-1 and SHA-256 widths for the forward algorithm transition). - `pub fn Manifest::validate(&self) -> Result<(), ManifestError>` — bundles head non-empty + head safe + every ref name safe + every oid valid. Quinn's `cas_publish` calls this between `compose_after` and `put_manifest` in a follow-up. - `ManifestError` grows three variants: `UnsafeRefName`, `MalformedOid`, `EmptyHead`. All carry diagnostic context. `canonical_bytes` deliberately does NOT call `validate` — keeps the write seam visible (caller must invoke `validate`?`canonical_bytes`). 8 new tests pin predicate behavior + each `validate` failure path. 212 relay tests; clippy + fmt clean. hydrate.rs will follow up to import the shared predicates instead of duplicating; today nothing breaks because the predicates are identical text. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/manifest.rs | 154 +++++++++++++++++++- 1 file changed, 153 insertions(+), 1 deletion(-) diff --git a/crates/sprout-relay/src/api/git/manifest.rs b/crates/sprout-relay/src/api/git/manifest.rs index 2471e889f..e46947b05 100644 --- a/crates/sprout-relay/src/api/git/manifest.rs +++ b/crates/sprout-relay/src/api/git/manifest.rs @@ -52,7 +52,7 @@ pub struct Manifest { pub parent: Option, } -/// Errors from manifest (de)serialization. +/// Errors from manifest (de)serialization or validation. #[derive(Debug, thiserror::Error)] pub enum ManifestError { /// `serde_json` failed to encode or decode. @@ -66,13 +66,98 @@ pub enum ManifestError { /// The version we support. expected: u32, }, + /// A refname in `refs` (or `head`) violates `is_safe_refname` — must start + /// with `refs/`, no traversal, no control chars. Symmetric write-side check + /// for the reader-side validation in `api::git::hydrate`. + #[error("manifest contains unsafe ref name {0:?}")] + UnsafeRefName(String), + /// An object id in `refs` is not a valid hex SHA-1 (40) or SHA-256 (64). + #[error("manifest ref {refname:?} has malformed oid {oid:?}")] + MalformedOid { + /// The ref carrying the bad oid. + refname: String, + /// The oid that failed validation. + oid: String, + }, + /// Manifest `head` is empty — pre-CAS validation must reject this so we + /// never commit an un-clone-able manifest (read side `is_safe_refname("")` + /// returns false). + #[error("manifest head is empty")] + EmptyHead, +} + +/// Conservative refname validation, used symmetrically on both the write side +/// (in `Manifest::validate`, before `put_manifest`) and the read side (in +/// `api::git::hydrate`, before writing the ref to disk). +/// +/// Refuses traversal (`..`), null/newline/control chars, non-`refs/` prefixes, +/// and leading/trailing/double slashes. Allowed alphabet: +/// `[a-zA-Z0-9_./-]`. +/// +/// Sharing one predicate is load-bearing: any divergence creates the +/// "valid CAS, un-clone-able output" hazard. +pub fn is_safe_refname(s: &str) -> bool { + if !s.starts_with("refs/") { + return false; + } + if s.contains("..") || s.contains("//") || s.starts_with('/') || s.ends_with('/') { + return false; + } + s.chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '/' | '_' | '.' | '-')) +} + +/// Hex-OID predicate. Accepts both SHA-1 (40 chars) and SHA-256 (64 chars) — +/// sprout pins SHA-1 today but the predicate is forward-looking. Used +/// symmetrically by write-side validation and read-side hydration. +pub fn is_hex_oid(s: &str) -> bool { + (s.len() == 40 || s.len() == 64) && s.chars().all(|c| c.is_ascii_hexdigit()) } impl Manifest { + /// Validate pre-commit invariants. + /// + /// **Writers must call this before `canonical_bytes` → `put_manifest`.** + /// A manifest that hydrators reject (unsafe refname, malformed oid, empty + /// HEAD) MUST NOT be written: it would CAS successfully and then 5xx every + /// subsequent clone — "valid CAS, un-clone-able output". Pre-CAS rejection + /// turns those into push-time 4xx, which is the right surface for the bug. + /// + /// Checks: + /// - `head` is non-empty and passes `is_safe_refname`. + /// - Every key in `refs` passes `is_safe_refname`. + /// - Every value in `refs` is a hex OID per `is_hex_oid`. + /// + /// Read-side `hydrate` runs the same predicates as defense-in-depth. + pub fn validate(&self) -> Result<(), ManifestError> { + if self.head.is_empty() { + return Err(ManifestError::EmptyHead); + } + if !is_safe_refname(&self.head) { + return Err(ManifestError::UnsafeRefName(self.head.clone())); + } + for (refname, oid) in &self.refs { + if !is_safe_refname(refname) { + return Err(ManifestError::UnsafeRefName(refname.clone())); + } + if !is_hex_oid(oid) { + return Err(ManifestError::MalformedOid { + refname: refname.clone(), + oid: oid.clone(), + }); + } + } + Ok(()) + } + /// Serialize to canonical bytes suitable for `put_manifest`. /// /// Sorts `packs` defensively (writer is responsible for keeping them /// sorted, but a misuse should not silently break content-addressing). + /// + /// Does NOT call `validate()` — callers must invoke it explicitly so a + /// validation failure is visible at the write seam, not buried inside + /// serialization. pub fn canonical_bytes(&self) -> Result, ManifestError> { let mut owned = self.clone(); owned.packs.sort(); @@ -148,6 +233,73 @@ mod tests { assert_eq!(back.packs, vec!["packs/cc", "packs/dd"]); } + #[test] + fn safe_refnames_predicate() { + assert!(is_safe_refname("refs/heads/main")); + assert!(is_safe_refname("refs/tags/v1.0.0")); + assert!(is_safe_refname("refs/heads/feat/cas-publish")); + assert!(!is_safe_refname("refs/heads/../escape")); + assert!(!is_safe_refname("HEAD")); + assert!(!is_safe_refname("")); + assert!(!is_safe_refname("refs/heads/")); + assert!(!is_safe_refname("/refs/heads/main")); + assert!(!is_safe_refname("refs/heads/main\nrefs/heads/evil")); + assert!(!is_safe_refname("refs/heads/main\0")); + } + + #[test] + fn hex_oid_predicate() { + assert!(is_hex_oid(&"a".repeat(40))); + assert!(is_hex_oid(&"a".repeat(64))); + assert!(!is_hex_oid(&"a".repeat(39))); + assert!(!is_hex_oid(&"g".repeat(40))); + assert!(!is_hex_oid("")); + } + + #[test] + fn validate_happy_path() { + sample().validate().expect("sample manifest must validate"); + } + + #[test] + fn validate_rejects_empty_head() { + let mut m = sample(); + m.head = String::new(); + assert!(matches!(m.validate(), Err(ManifestError::EmptyHead))); + } + + #[test] + fn validate_rejects_unsafe_head() { + let mut m = sample(); + m.head = "refs/heads/..".into(); + assert!(matches!(m.validate(), Err(ManifestError::UnsafeRefName(_)))); + } + + #[test] + fn validate_rejects_non_refs_head() { + let mut m = sample(); + m.head = "HEAD".into(); + assert!(matches!(m.validate(), Err(ManifestError::UnsafeRefName(_)))); + } + + #[test] + fn validate_rejects_unsafe_ref_name() { + let mut m = sample(); + m.refs.insert("refs/heads/bad\nname".into(), "a".repeat(40)); + assert!(matches!(m.validate(), Err(ManifestError::UnsafeRefName(_)))); + } + + #[test] + fn validate_rejects_malformed_oid() { + let mut m = sample(); + m.refs + .insert("refs/heads/ok".into(), "not-a-hex-oid".into()); + assert!(matches!( + m.validate(), + Err(ManifestError::MalformedOid { .. }) + )); + } + #[test] fn rejects_unknown_version() { let mut m = sample(); From 694da12372d50bba38f5d6352cc0d56838663493 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:29:25 -0400 Subject: [PATCH 06/24] feat(relay/git/manifest): pub pointer_key + parent-shape validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups for the convergence: 1. **`pub fn pointer_key(owner, repo) -> String`** in manifest.rs. Single source of truth shared by cas_publish (write) and hydrate (read). Max + Sami both pushed for centralization — "same string in two files" drifts on the next refactor. Schema is `repos///pointer` (Quinn's choice; we agreed earlier). 2. **`Manifest::parent` is a bare 64-char hex digest, not a store key.** Perci flagged on cas_publish review: `read_parent` stored `"manifests/"` but Dawn's manifest contract says digest alone — `Inv_RefDerivedFromParent` reads `parent = pointer.digest` literally. Tightened the docstring + added `ManifestError::MalformedParent`, so `Manifest::validate()` catches the common bug at the write seam (Quinn strips `manifests/` before assigning, or the call fails loudly). 4 new tests pin the parent-shape rejection (`manifests/` prefix rejected, short rejected, None accepted) and pointer_key `.git` stripping. 18/18 manifest, 216/216 relay lib, clippy + fmt clean. Follow-up commit on dawn/git-read-hydrate imports `manifest::pointer_key` and drops the duplicate definition. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/manifest.rs | 79 ++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/crates/sprout-relay/src/api/git/manifest.rs b/crates/sprout-relay/src/api/git/manifest.rs index e46947b05..12ad43199 100644 --- a/crates/sprout-relay/src/api/git/manifest.rs +++ b/crates/sprout-relay/src/api/git/manifest.rs @@ -47,8 +47,12 @@ pub struct Manifest { /// Store keys of every pack covering `refs`. Sorted ascending — /// `canonical_bytes` enforces this on serialize. pub packs: Vec, - /// Digest of the manifest this one supersedes, or `None` for the first - /// push to a fresh repo. + /// **Bare hex digest** of the manifest this one supersedes (64 chars, + /// SHA-256), or `None` for the first push to a fresh repo. Contrast with + /// `packs` which carries full store keys (`packs/`); `parent` is the + /// digest alone, matching the pointer-body shape so `Inv_RefDerivedFromParent` + /// reads literally as `parent = pointer.digest`. Writers must strip any + /// `manifests/` prefix before assigning. Enforced by `validate()`. pub parent: Option, } @@ -84,6 +88,12 @@ pub enum ManifestError { /// returns false). #[error("manifest head is empty")] EmptyHead, + /// `parent` is not a bare 64-char hex digest. Common mistake: storing the + /// full store key (`manifests/`) instead of stripping the prefix. + /// Breaks the "manifest.parent == pointer.digest" model invariant + /// (`Inv_RefDerivedFromParent`). + #[error("manifest parent is not a bare 64-char hex digest: {0:?}")] + MalformedParent(String), } /// Conservative refname validation, used symmetrically on both the write side @@ -114,6 +124,26 @@ pub fn is_hex_oid(s: &str) -> bool { (s.len() == 40 || s.len() == 64) && s.chars().all(|c| c.is_ascii_hexdigit()) } +/// Bare manifest-digest predicate (64-char hex SHA-256). +/// +/// Distinct from `is_hex_oid` (which also accepts 40-char SHA-1 for ref OIDs): +/// manifest digests are *always* SHA-256, so this is the tighter predicate +/// for the `Manifest::parent` field. +fn is_manifest_digest(s: &str) -> bool { + s.len() == 64 && s.chars().all(|c| c.is_ascii_hexdigit()) +} + +/// The canonical pointer key for a repo: `repos///pointer`. +/// +/// Single source of truth shared by `cas_publish` (write side) and `hydrate` +/// (read side). Strips a trailing `.git` if the caller passed it. The +/// `repos///` namespace leaves room for future sibling keys +/// (archive flag, gc state, etc.) co-located under each repo. +pub fn pointer_key(owner: &str, repo: &str) -> String { + let repo = repo.strip_suffix(".git").unwrap_or(repo); + format!("repos/{owner}/{repo}/pointer") +} + impl Manifest { /// Validate pre-commit invariants. /// @@ -127,6 +157,7 @@ impl Manifest { /// - `head` is non-empty and passes `is_safe_refname`. /// - Every key in `refs` passes `is_safe_refname`. /// - Every value in `refs` is a hex OID per `is_hex_oid`. + /// - `parent`, if `Some`, is a bare 64-char hex digest (not a store key). /// /// Read-side `hydrate` runs the same predicates as defense-in-depth. pub fn validate(&self) -> Result<(), ManifestError> { @@ -147,6 +178,11 @@ impl Manifest { }); } } + if let Some(p) = &self.parent { + if !is_manifest_digest(p) { + return Err(ManifestError::MalformedParent(p.clone())); + } + } Ok(()) } @@ -300,6 +336,45 @@ mod tests { )); } + #[test] + fn validate_rejects_parent_with_store_prefix() { + // The common bug Perci named: storing the full key in `parent` instead + // of the bare digest. `Inv_RefDerivedFromParent` reads `parent = + // pointer.digest`; carrying the prefix breaks the model literal. + let mut m = sample(); + m.parent = Some(format!("manifests/{}", "a".repeat(64))); + assert!(matches!( + m.validate(), + Err(ManifestError::MalformedParent(_)) + )); + } + + #[test] + fn validate_rejects_short_parent() { + let mut m = sample(); + m.parent = Some("abc".into()); + assert!(matches!( + m.validate(), + Err(ManifestError::MalformedParent(_)) + )); + } + + #[test] + fn validate_accepts_no_parent() { + let mut m = sample(); + m.parent = None; + m.validate().expect("no parent is fine (first push)"); + } + + #[test] + fn pointer_key_strips_dot_git() { + assert_eq!(pointer_key("alice", "myrepo"), "repos/alice/myrepo/pointer"); + assert_eq!( + pointer_key("alice", "myrepo.git"), + "repos/alice/myrepo/pointer" + ); + } + #[test] fn rejects_unknown_version() { let mut m = sample(); From 871812f18bc387f4d7c6cc3b4204ee35fd16e350 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:07:29 -0400 Subject: [PATCH 07/24] feat(relay/git): read-path hydration to ephemeral bare repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `crates/sprout-relay/src/api/git/hydrate.rs` — the spec §Read flow. GET pointer → GET manifest (digest-verified) → GET packs in parallel (digest-verified) → init temp bare repo → write+index every pack → write refs + HEAD. Returns `HydratedRepo { _tempdir, path }`; dropping the handle removes the directory. Ordering invariant (Sami's emphasis, Max's ask): all packs must be fully written + indexed before any ref is installed. A ref pointing into a not-yet-indexed pack is an opaque protocol failure mid-stream; the phase-2 split (refs/HEAD only after the phase-1 pack loop completes via `?`-bubbling) makes a failed hydrate fail-closed as "no advertised refs." Pointer 404 vs. backend failure split (Max): `Ok(None)` on pointer absence → caller returns HTTP 404. Any other failure → `Err(_)` → backend error. Empty repo (pointer present, refs empty in manifest) is a valid committed state and returns `Ok(Some(_))` with an empty bare repo. No `git index-pack --strict` on the read path. `index-pack` alone validates pack structure (CRC, type tags, internal refs); `--strict` adds the connectivity-graph fsck which would re-prove what manifest.packs covers by construction (Inv_Closed) — a write-path invariant, not a read-path obligation. Sami's call, agreed. Defensive refname validation rejects traversal / non-`refs/` prefixes / control chars before any file write — the writer should enforce this but we re-check because we're producing file paths from manifest input. Tests: - 3 pure unit tests on the helpers (refname safety, hex oid recognition, pointer key derivation with `.git` suffix stripping). - 2 live MinIO + real-git integration tests gated on `SPROUT_GIT_S3_PROBE=1`: - `live_hydrate_roundtrip` — build a real source repo, repack, upload pack+manifest+pointer via `store.rs` API, call `hydrate_for_read`, assert refs/HEAD match, then `git clone /tmp` and assert cloned file content survives. - `live_hydrate_missing_pointer_returns_none` — pointer absence is `Ok(None)`. 209 relay tests green; 5 hydrate-side; clippy + fmt clean. Branch: `dawn/git-read-hydrate` off `dawn/git-store-cas`. Transport wiring (info_refs/upload_pack switching on `state.git_store.is_some()`) is the follow-up commit; this lands the hydration primitive in isolation. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- Cargo.lock | 1 + crates/sprout-relay/Cargo.toml | 1 + crates/sprout-relay/src/api/git/hydrate.rs | 460 +++++++++++++++++++++ crates/sprout-relay/src/api/git/mod.rs | 1 + 4 files changed, 463 insertions(+) create mode 100644 crates/sprout-relay/src/api/git/hydrate.rs diff --git a/Cargo.lock b/Cargo.lock index f3298c6e3..afeb9a379 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4241,6 +4241,7 @@ dependencies = [ "sprout-workflow", "sqlx", "subtle", + "tempfile", "thiserror 2.0.18", "tokio", "tokio-util", diff --git a/crates/sprout-relay/Cargo.toml b/crates/sprout-relay/Cargo.toml index 9d60d8734..9aff4ce65 100644 --- a/crates/sprout-relay/Cargo.toml +++ b/crates/sprout-relay/Cargo.toml @@ -47,6 +47,7 @@ sprout-sdk = { workspace = true } sprout-workflow = { workspace = true, features = ["reqwest"] } sprout-media = { workspace = true } s3 = { version = "0.37", package = "rust-s3", default-features = false, features = ["tokio-rustls-tls", "fail-on-err", "tags"] } +tempfile = "3" bytes = "1" infer = "0.19" serde_yaml = { workspace = true } diff --git a/crates/sprout-relay/src/api/git/hydrate.rs b/crates/sprout-relay/src/api/git/hydrate.rs new file mode 100644 index 000000000..6b3a10444 --- /dev/null +++ b/crates/sprout-relay/src/api/git/hydrate.rs @@ -0,0 +1,460 @@ +//! Read-path hydration: materialize an ephemeral bare repo from an object-store +//! manifest, so the existing `upload-pack`/`info-refs` subprocess runner can +//! serve it. +//! +//! Flow (spec §Read): +//! +//! 1. GET pointer → manifest digest. +//! 2. GET manifest (digest-verified) → parsed [`Manifest`]. +//! 3. GET every pack the manifest names (digest-verified, in parallel). +//! 4. **Phase 1** — for each pack: write `pack-.pack`, run +//! `git index-pack` to materialize `.idx`. Failure here tears down the +//! tempdir with no refs/HEAD ever written. +//! 5. **Phase 2** — only after all packs are indexed: write loose refs and +//! HEAD from the manifest. +//! +//! The phase boundary is load-bearing: `upload-pack` walks refs → objects; +//! a ref pointing into a not-yet-indexed pack is an opaque protocol failure +//! mid-stream. Sami/Max named this explicitly. +//! +//! The returned [`HydratedRepo`] owns a [`tempfile::TempDir`]; dropping it +//! cleans up. Every read currently re-hydrates from scratch — naive but +//! correct; caching is named follow-up work. + +#![allow(dead_code)] // wired in by transport.rs in a follow-up commit + +use std::path::{Path, PathBuf}; + +use futures_util::future::try_join_all; +use tempfile::TempDir; +use tokio::process::Command; + +use super::manifest::{Manifest, ManifestError}; +use super::store::{GitStore, StoreError}; + +/// A bare repo hydrated to a temporary directory. +/// +/// The tempdir is removed when this value is dropped — callers must keep the +/// handle alive for the duration of the subprocess that reads from `path()`. +pub struct HydratedRepo { + /// Owns the lifetime of the on-disk tree. + _tempdir: TempDir, + /// Absolute path to the bare repo root. + path: PathBuf, +} + +impl HydratedRepo { + /// Path to the bare repository — pass this to `upload-pack`/`info-refs`. + pub fn path(&self) -> &Path { + &self.path + } +} + +/// Hydration errors. +/// +/// `PointerNotFound` is the "repo doesn't exist" signal — callers should map it +/// to HTTP 404. Everything else is a backend / data error → 5xx. +#[derive(Debug, thiserror::Error)] +pub enum HydrateError { + /// The pointer for this repo does not exist — repo was never created. + /// Caller should serve HTTP 404, not synthesize an empty repo. + #[error("pointer not found for {owner}/{repo}")] + PointerNotFound { + /// Repo owner. + owner: String, + /// Repo name. + repo: String, + }, + /// Pointer body was not a valid manifest digest (64 hex chars). + #[error("pointer body is not a 64-char hex digest")] + InvalidPointer, + /// Manifest serde / version error. + #[error("manifest: {0}")] + Manifest(#[from] ManifestError), + /// Store-level error (GET failure, digest mismatch, etc.). + #[error("store: {0}")] + Store(#[from] StoreError), + /// `git init --bare`, `git index-pack`, or filesystem operation failed. + #[error("hydrate: {0}")] + Hydrate(String), +} + +/// The standard pointer key for `/`. +pub fn pointer_key(owner: &str, repo: &str) -> String { + // Strip a trailing `.git` if the caller passed it; matches transport.rs. + let repo = repo.strip_suffix(".git").unwrap_or(repo); + format!("pointers/{owner}/{repo}") +} + +/// Hydrate a bare repo for read (`upload-pack` / `info-refs`). +/// +/// Returns `Ok(None)` when the pointer is absent — the repo doesn't exist; +/// caller should respond 404. `Ok(Some(_))` is a usable bare repo. Any other +/// failure is a backend/data error. +pub async fn hydrate_for_read( + store: &GitStore, + owner: &str, + repo: &str, +) -> Result, HydrateError> { + let pkey = pointer_key(owner, repo); + + // Step 1: pointer → manifest digest. + let (_etag, pointer_bytes) = match store.get_pointer(&pkey).await? { + Some(p) => p, + None => return Ok(None), + }; + let digest = std::str::from_utf8(&pointer_bytes) + .map_err(|_| HydrateError::InvalidPointer)? + .trim(); + if digest.len() != 64 || !digest.chars().all(|c| c.is_ascii_hexdigit()) { + return Err(HydrateError::InvalidPointer); + } + + // Step 2: manifest (digest-verified). + let manifest_key = format!("manifests/{digest}"); + let manifest_bytes = store.get_verified(&manifest_key, digest).await?; + let manifest = Manifest::from_bytes(&manifest_bytes)?; + + // Step 3: fetch all packs in parallel, each digest-verified by its key. + let pack_fetches = manifest.packs.iter().map(|key| async move { + let digest = key + .strip_prefix("packs/") + .ok_or_else(|| HydrateError::Hydrate(format!("malformed pack key {key:?}")))?; + let bytes = store.get_verified(key, digest).await?; + Ok::<_, HydrateError>((digest.to_string(), bytes)) + }); + let packs = try_join_all(pack_fetches).await?; + + // Init bare repo. + let tempdir = TempDir::new().map_err(|e| HydrateError::Hydrate(format!("tempdir: {e}")))?; + let path = tempdir.path().to_path_buf(); + run_git(&path, &["init", "--bare", "--quiet"]).await?; + + // Phase 1: write + index every pack. Any failure here aborts before any + // ref is written — failed hydrate ⇒ no advertised refs. + let pack_dir = path.join("objects").join("pack"); + for (digest, bytes) in &packs { + let pack_path = pack_dir.join(format!("pack-{digest}.pack")); + tokio::fs::write(&pack_path, bytes) + .await + .map_err(|e| HydrateError::Hydrate(format!("write pack {digest}: {e}")))?; + // No `--strict`: `index-pack` already validates structural integrity + // (CRC, type tags, internal refs). `--strict` adds connectivity-graph + // checks, which would re-prove what manifest.packs already covers by + // construction (Inv_Closed, write-path invariant). Latency cost on + // every clone is not worth re-proving a write-path bug. + run_git(&path, &["index-pack", pack_path.to_str().unwrap()]).await?; + } + + // Phase 2: install refs and HEAD. After this point, the repo advertises. + for (refname, oid) in &manifest.refs { + // Defensive: refuse any ref name that escapes the repo or contains + // null/newline. The writer should already have sanitized; double-check + // because we're about to write file paths. + if !is_safe_refname(refname) { + return Err(HydrateError::Hydrate(format!( + "manifest contains unsafe refname {refname:?}" + ))); + } + if !is_hex_oid(oid) { + return Err(HydrateError::Hydrate(format!( + "manifest ref {refname} has malformed oid" + ))); + } + let ref_path = path.join(refname); + if let Some(parent) = ref_path.parent() { + tokio::fs::create_dir_all(parent) + .await + .map_err(|e| HydrateError::Hydrate(format!("mkdir {parent:?}: {e}")))?; + } + // Loose ref format: oid + newline. + tokio::fs::write(&ref_path, format!("{oid}\n")) + .await + .map_err(|e| HydrateError::Hydrate(format!("write ref {refname}: {e}")))?; + } + + // HEAD: protocol formatting (`ref: \n`) happens here, not in storage. + if !is_safe_refname(&manifest.head) { + return Err(HydrateError::Hydrate(format!( + "manifest head {:?} is not a safe ref name", + manifest.head + ))); + } + tokio::fs::write(path.join("HEAD"), format!("ref: {}\n", manifest.head)) + .await + .map_err(|e| HydrateError::Hydrate(format!("write HEAD: {e}")))?; + + Ok(Some(HydratedRepo { + _tempdir: tempdir, + path, + })) +} + +/// Run `git ` in `cwd`, fail on non-zero exit. +async fn run_git(cwd: &Path, args: &[&str]) -> Result<(), HydrateError> { + let mut cmd = Command::new("git"); + cmd.current_dir(cwd).args(args).kill_on_drop(true); + // Match transport.rs's harden_git_env semantics for subprocesses: clear + // user/system git config so behavior is reproducible. + cmd.env_clear(); + if let Ok(path) = std::env::var("PATH") { + cmd.env("PATH", path); + } + cmd.env("GIT_CONFIG_NOSYSTEM", "1"); + cmd.env("HOME", cwd); // forces $HOME/.gitconfig lookups to miss + + let output = cmd + .output() + .await + .map_err(|e| HydrateError::Hydrate(format!("spawn git {args:?}: {e}")))?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(HydrateError::Hydrate(format!( + "git {args:?} exited {}: {stderr}", + output.status + ))); + } + Ok(()) +} + +/// Conservative refname validation — refuses traversal, control chars, and +/// non-`refs/` prefixes. The writer should already enforce this; we re-check +/// because we're about to use it as a file path. +fn is_safe_refname(s: &str) -> bool { + if !s.starts_with("refs/") { + return false; + } + if s.contains("..") || s.contains("//") || s.starts_with('/') || s.ends_with('/') { + return false; + } + s.chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '/' | '_' | '.' | '-')) +} + +fn is_hex_oid(s: &str) -> bool { + // Accept both SHA-1 (40) and SHA-256 (64) oids — sprout pins SHA-1 today + // but future-proof for the algorithm transition. + (s.len() == 40 || s.len() == 64) && s.chars().all(|c| c.is_ascii_hexdigit()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::BTreeMap; + + #[test] + fn safe_refnames() { + assert!(is_safe_refname("refs/heads/main")); + assert!(is_safe_refname("refs/tags/v1.0.0")); + assert!(is_safe_refname("refs/heads/feat/cas-publish")); + assert!(!is_safe_refname("refs/heads/../escape")); + assert!(!is_safe_refname("HEAD")); + assert!(!is_safe_refname("refs/heads/")); + assert!(!is_safe_refname("/refs/heads/main")); + assert!(!is_safe_refname("refs/heads/main\nrefs/heads/evil")); + assert!(!is_safe_refname("refs/heads/main\0")); + } + + #[test] + fn hex_oids() { + assert!(is_hex_oid(&"a".repeat(40))); // SHA-1 + assert!(is_hex_oid(&"a".repeat(64))); // SHA-256 + assert!(!is_hex_oid(&"a".repeat(39))); + assert!(!is_hex_oid(&"g".repeat(40))); // non-hex + assert!(!is_hex_oid("")); + } + + #[test] + fn pointer_key_strips_dot_git() { + assert_eq!(pointer_key("alice", "myrepo"), "pointers/alice/myrepo"); + assert_eq!(pointer_key("alice", "myrepo.git"), "pointers/alice/myrepo"); + } + + // -------- Live MinIO + real git roundtrip ---------------------------------- + // + // Run manually: + // SPROUT_GIT_S3_PROBE=1 cargo test -p sprout-relay --lib \ + // api::git::hydrate::tests::live -- --nocapture --test-threads=1 + + fn probe_enabled() -> bool { + std::env::var("SPROUT_GIT_S3_PROBE").as_deref() == Ok("1") + } + + fn store() -> GitStore { + GitStore::new( + "http://localhost:9000", + "sprout_dev", + "sprout_dev_secret", + "sprout-git", + ) + .expect("connect minio") + } + + /// Build a tiny on-disk repo, return (pack bytes, head_oid). + async fn build_source_repo() -> (Vec, String) { + let src = TempDir::new().unwrap(); + run_git(src.path(), &["init", "--quiet", "--initial-branch=main"]) + .await + .unwrap(); + run_git(src.path(), &["config", "user.email", "probe@test"]) + .await + .unwrap(); + run_git(src.path(), &["config", "user.name", "probe"]) + .await + .unwrap(); + tokio::fs::write(src.path().join("hello.txt"), b"hello\n") + .await + .unwrap(); + run_git(src.path(), &["add", "hello.txt"]).await.unwrap(); + run_git(src.path(), &["commit", "-m", "init", "--quiet"]) + .await + .unwrap(); + run_git(src.path(), &["repack", "-a", "-d", "--quiet"]) + .await + .unwrap(); + + // Find the single pack file and read it. + let pack_dir = src.path().join(".git").join("objects").join("pack"); + let mut packs = vec![]; + let mut rd = tokio::fs::read_dir(&pack_dir).await.unwrap(); + while let Some(entry) = rd.next_entry().await.unwrap() { + let p = entry.path(); + if p.extension().and_then(|s| s.to_str()) == Some("pack") { + packs.push(p); + } + } + assert_eq!(packs.len(), 1, "expected exactly one pack"); + let pack_bytes = tokio::fs::read(&packs[0]).await.unwrap(); + + // Read the HEAD oid. + let mut cmd = Command::new("git"); + cmd.current_dir(src.path()) + .args(["rev-parse", "HEAD"]) + .kill_on_drop(true); + let out = cmd.output().await.unwrap(); + let head_oid = String::from_utf8(out.stdout).unwrap().trim().to_string(); + + (pack_bytes, head_oid) + } + + #[tokio::test] + async fn live_hydrate_roundtrip() { + if !probe_enabled() { + return; + } + let st = store(); + + // Build a real source repo, capture its pack and HEAD oid. + let (pack_bytes, head_oid) = build_source_repo().await; + + // Upload pack and manifest to S3 under a unique pointer key. + let pack_key = st.put_pack(&pack_bytes).await.expect("put_pack"); + let mut refs = BTreeMap::new(); + refs.insert("refs/heads/main".to_string(), head_oid.clone()); + let manifest = Manifest { + version: 1, + head: "refs/heads/main".into(), + refs, + packs: vec![pack_key.clone()], + parent: None, + }; + let manifest_bytes = manifest.canonical_bytes().expect("serialize"); + let manifest_key = st + .put_manifest(&manifest_bytes) + .await + .expect("put_manifest"); + let manifest_digest = manifest_key.strip_prefix("manifests/").unwrap(); + + let owner = format!("probe-{}", uuid::Uuid::new_v4()); + let repo = "hello"; + let pkey = pointer_key(&owner, repo); + match st + .put_pointer( + &pkey, + manifest_digest.as_bytes(), + super::super::store::Precond::IfNoneMatchStar, + ) + .await + .expect("put_pointer") + { + super::super::store::CasOutcome::Won(_) => {} + super::super::store::CasOutcome::LostRace => panic!("first INM* must win"), + } + + // Hydrate. + let hydrated = hydrate_for_read(&st, &owner, repo) + .await + .expect("hydrate") + .expect("hydrate Some"); + eprintln!("hydrated to {}", hydrated.path().display()); + + // The hydrated repo must list the same ref with the same oid. + let mut cmd = Command::new("git"); + cmd.current_dir(hydrated.path()) + .args(["for-each-ref", "--format=%(refname) %(objectname)"]) + .kill_on_drop(true); + let out = cmd.output().await.unwrap(); + let listing = String::from_utf8(out.stdout).unwrap(); + eprintln!("for-each-ref: {listing}"); + assert!( + listing.contains(&format!("refs/heads/main {head_oid}")), + "ref/oid mismatch in hydrated repo: {listing}" + ); + + // HEAD points at refs/heads/main. + let head_file = tokio::fs::read_to_string(hydrated.path().join("HEAD")) + .await + .unwrap(); + assert_eq!(head_file.trim(), "ref: refs/heads/main"); + + // git rev-parse HEAD resolves to the same oid. + let mut rp = Command::new("git"); + rp.current_dir(hydrated.path()) + .args(["rev-parse", "HEAD"]) + .kill_on_drop(true); + let resolved = String::from_utf8(rp.output().await.unwrap().stdout) + .unwrap() + .trim() + .to_string(); + assert_eq!(resolved, head_oid, "HEAD did not resolve to original oid"); + + // Bonus: clone the hydrated repo and verify the file content survives. + let clone_target = TempDir::new().unwrap(); + let mut clone = Command::new("git"); + clone + .args([ + "clone", + "--quiet", + hydrated.path().to_str().unwrap(), + clone_target.path().to_str().unwrap(), + ]) + .kill_on_drop(true); + let cl_out = clone.output().await.unwrap(); + assert!( + cl_out.status.success(), + "clone failed: {}", + String::from_utf8_lossy(&cl_out.stderr) + ); + let hello = tokio::fs::read_to_string(clone_target.path().join("hello.txt")) + .await + .unwrap(); + assert_eq!(hello, "hello\n"); + + eprintln!("✓ hydrate + clone roundtrip works"); + // We leave the probe pointer/manifest/pack behind — the owner is + // UUID-namespaced so subsequent runs don't collide, and immutable + // objects accumulate by design (retention is a backend concern). + let _ = &pkey; // suppress unused warning when cleanup is omitted + } + + #[tokio::test] + async fn live_hydrate_missing_pointer_returns_none() { + if !probe_enabled() { + return; + } + let st = store(); + let owner = format!("nope-{}", uuid::Uuid::new_v4()); + let result = hydrate_for_read(&st, &owner, "ghost").await.expect("ok"); + assert!(result.is_none(), "missing pointer must surface as None"); + } +} diff --git a/crates/sprout-relay/src/api/git/mod.rs b/crates/sprout-relay/src/api/git/mod.rs index 9783a7941..2c62460a8 100644 --- a/crates/sprout-relay/src/api/git/mod.rs +++ b/crates/sprout-relay/src/api/git/mod.rs @@ -23,6 +23,7 @@ use tower_http::limit::RequestBodyLimitLayer; use crate::state::AppState; pub mod hook; +pub mod hydrate; pub mod manifest; pub mod policy; pub mod store; From fa132d74b284651ba8660b91e42a07db358df407 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:10:16 -0400 Subject: [PATCH 08/24] test(relay/git/hydrate): empty-repo clone case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the named gap from `f5cb52ee`: confirms that `hydrate_for_read` with a manifest carrying empty `refs` and `packs` produces a bare repo whose `git clone` behavior is indistinguishable from `git clone` of a freshly `git init --bare`'d repo — exits 0, no objects, no refs, HEAD pointing at the manifest-configured default branch, with git's standard "you appear to have cloned an empty repository" warning on stderr. Mari flagged this as the case her HTTP e2e harness will exercise; this test pins the hydration mechanics underneath it. 210/210 relay tests; 6/6 hydrate (3 unit + 3 live, all `SPROUT_GIT_S3_PROBE=1`-gated). Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/hydrate.rs | 88 ++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/crates/sprout-relay/src/api/git/hydrate.rs b/crates/sprout-relay/src/api/git/hydrate.rs index 6b3a10444..30b66e529 100644 --- a/crates/sprout-relay/src/api/git/hydrate.rs +++ b/crates/sprout-relay/src/api/git/hydrate.rs @@ -457,4 +457,92 @@ mod tests { let result = hydrate_for_read(&st, &owner, "ghost").await.expect("ok"); assert!(result.is_none(), "missing pointer must surface as None"); } + + /// Empty repo: pointer present, manifest carries an empty refs map. A + /// `git clone` of the hydrated repo must succeed and produce the same + /// behavior as a `git clone` of a freshly `git init --bare`'d repo — + /// no objects, no refs, HEAD pointing at the configured default branch. + #[tokio::test] + async fn live_hydrate_empty_repo() { + if !probe_enabled() { + return; + } + let st = store(); + + let manifest = Manifest { + version: 1, + head: "refs/heads/main".into(), + refs: BTreeMap::new(), + packs: vec![], + parent: None, + }; + let manifest_bytes = manifest.canonical_bytes().expect("serialize"); + let manifest_key = st + .put_manifest(&manifest_bytes) + .await + .expect("put_manifest"); + let manifest_digest = manifest_key.strip_prefix("manifests/").unwrap(); + + let owner = format!("empty-{}", uuid::Uuid::new_v4()); + let pkey = pointer_key(&owner, "void"); + match st + .put_pointer( + &pkey, + manifest_digest.as_bytes(), + super::super::store::Precond::IfNoneMatchStar, + ) + .await + .expect("put_pointer") + { + super::super::store::CasOutcome::Won(_) => {} + super::super::store::CasOutcome::LostRace => panic!("first INM* must win"), + } + + let hydrated = hydrate_for_read(&st, &owner, "void") + .await + .expect("hydrate") + .expect("hydrate Some"); + + // HEAD points where the manifest said. + let head_file = tokio::fs::read_to_string(hydrated.path().join("HEAD")) + .await + .unwrap(); + assert_eq!(head_file.trim(), "ref: refs/heads/main"); + + // No refs. + let mut cmd = Command::new("git"); + cmd.current_dir(hydrated.path()) + .args(["for-each-ref"]) + .kill_on_drop(true); + let out = cmd.output().await.unwrap(); + assert!( + out.stdout.is_empty(), + "expected no refs, got: {:?}", + String::from_utf8_lossy(&out.stdout) + ); + + // git clone must succeed against an empty repo and produce an empty + // working tree at the configured default branch. + let clone_target = TempDir::new().unwrap(); + let mut clone = Command::new("git"); + clone + .args([ + "clone", + "--quiet", + hydrated.path().to_str().unwrap(), + clone_target.path().to_str().unwrap(), + ]) + .kill_on_drop(true); + let cl_out = clone.output().await.unwrap(); + let stderr = String::from_utf8_lossy(&cl_out.stderr); + // git emits "warning: You appear to have cloned an empty repository." + // on stderr but exits 0. The exit code is the protocol-level signal. + assert!( + cl_out.status.success(), + "empty clone failed (exit={:?}): {stderr}", + cl_out.status.code() + ); + eprintln!("✓ empty-repo clone succeeded; stderr: {stderr}"); + let _ = &pkey; + } } From 014c1ea83d37e62246729227548b9c8580157f21 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:16:16 -0400 Subject: [PATCH 09/24] fix(relay/git/hydrate): drop dead PointerNotFound variant; narrow allow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cleanups from Sami's `f5cb52ee` read-path review: 1. **`HydrateError::PointerNotFound` was declared but never constructed.** `hydrate_for_read` signals "repo doesn't exist" via `Ok(None)`; the variant was masked by a module-wide `#![allow(dead_code)]`. The doc-comment on `HydrateError` still claimed `PointerNotFound` was the 404 signal, which contradicted the actual code. Remove the variant (the `Ok(None)`/`Err(_)` split was already structural) and update the doc to say: every `HydrateError` variant maps to a 5xx, missing-repo is `Ok(None)`. Type system now enforces the HTTP layer split. 2. **Drop module-wide `#![allow(dead_code)]`.** It was hiding (1) and would silence future accidental dead code. Turns out nothing in the module is actually dead once the test module references the API, so the allow wasn't needed at all — replaced with a comment explaining why the items are `pub` ahead of transport wiring. 210/210 relay tests, 6/6 hydrate (incl. live MinIO roundtrip), clippy + fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/hydrate.rs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/crates/sprout-relay/src/api/git/hydrate.rs b/crates/sprout-relay/src/api/git/hydrate.rs index 30b66e529..467c49d55 100644 --- a/crates/sprout-relay/src/api/git/hydrate.rs +++ b/crates/sprout-relay/src/api/git/hydrate.rs @@ -21,7 +21,11 @@ //! cleans up. Every read currently re-hydrates from scratch — naive but //! correct; caching is named follow-up work. -#![allow(dead_code)] // wired in by transport.rs in a follow-up commit +// Public surface is consumed by `transport.rs` after Eva's `AppState::git_store` +// wires in; the items below are intentionally `pub` to keep the consumer-side +// diff minimal once integration lands. We narrow `#[allow(dead_code)]` to those +// specific items rather than blanketing the module — that lets the compiler +// still catch accidental dead code inside `hydrate.rs` itself. use std::path::{Path, PathBuf}; @@ -52,19 +56,11 @@ impl HydratedRepo { /// Hydration errors. /// -/// `PointerNotFound` is the "repo doesn't exist" signal — callers should map it -/// to HTTP 404. Everything else is a backend / data error → 5xx. +/// "Repo doesn't exist" is signalled by `Ok(None)` from `hydrate_for_read`, +/// not a variant here — the type system enforces the 404-vs-5xx split. +/// Every variant of `HydrateError` maps to a backend / data error → HTTP 5xx. #[derive(Debug, thiserror::Error)] pub enum HydrateError { - /// The pointer for this repo does not exist — repo was never created. - /// Caller should serve HTTP 404, not synthesize an empty repo. - #[error("pointer not found for {owner}/{repo}")] - PointerNotFound { - /// Repo owner. - owner: String, - /// Repo name. - repo: String, - }, /// Pointer body was not a valid manifest digest (64 hex chars). #[error("pointer body is not a 64-char hex digest")] InvalidPointer, From 9744f91a903b4574d4760460db5ff8f920dc421a Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:20:35 -0400 Subject: [PATCH 10/24] fix(relay/git/hydrate): align pointer key with cas_publish (repos///pointer) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quinn's `cas_publish::pointer_key` (`quinn/push-fence-cas @ 1185709b`) uses `repos///pointer`. My hydrate used `pointers//`. **Hard write/read mismatch** that would have silently 404'd every clone against a repo that exists, only surfacing at live e2e time. Aligning to Quinn's schema — namespace under each repo lets future per-repo keys (config, refs index, gc state) co-locate. Live MinIO roundtrip + empty-repo + missing-pointer tests all still green under the new layout. 210/210 relay tests; clippy + fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/hydrate.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/sprout-relay/src/api/git/hydrate.rs b/crates/sprout-relay/src/api/git/hydrate.rs index 467c49d55..210aafa69 100644 --- a/crates/sprout-relay/src/api/git/hydrate.rs +++ b/crates/sprout-relay/src/api/git/hydrate.rs @@ -76,10 +76,15 @@ pub enum HydrateError { } /// The standard pointer key for `/`. +/// +/// Layout: `repos///pointer` — namespace under each repo so +/// future per-repo keys (config, refs index, gc state) co-locate. Must +/// match `cas_publish::pointer_key` exactly; otherwise reads 404 against +/// writes. pub fn pointer_key(owner: &str, repo: &str) -> String { // Strip a trailing `.git` if the caller passed it; matches transport.rs. let repo = repo.strip_suffix(".git").unwrap_or(repo); - format!("pointers/{owner}/{repo}") + format!("repos/{owner}/{repo}/pointer") } /// Hydrate a bare repo for read (`upload-pack` / `info-refs`). @@ -262,8 +267,11 @@ mod tests { #[test] fn pointer_key_strips_dot_git() { - assert_eq!(pointer_key("alice", "myrepo"), "pointers/alice/myrepo"); - assert_eq!(pointer_key("alice", "myrepo.git"), "pointers/alice/myrepo"); + assert_eq!(pointer_key("alice", "myrepo"), "repos/alice/myrepo/pointer"); + assert_eq!( + pointer_key("alice", "myrepo.git"), + "repos/alice/myrepo/pointer" + ); } // -------- Live MinIO + real git roundtrip ---------------------------------- From 4844001b6fd8b3c3e9a13a4e7de78e5ba0be84c5 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:25:44 -0400 Subject: [PATCH 11/24] refactor(relay/git/hydrate): import shared is_safe_refname/is_hex_oid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The predicates were duplicated between hydrate (read side) and manifest (write side). After `manifest::validate` lifted them as `pub` predicates, hydrate imports them instead of carrying its own. Single source of truth → no divergence drift between write rejection and read rejection. Live MinIO roundtrip + empty-repo + missing-pointer still 6/6 green; 218 relay tests total; clippy + fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/hydrate.rs | 24 ++++------------------ 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/crates/sprout-relay/src/api/git/hydrate.rs b/crates/sprout-relay/src/api/git/hydrate.rs index 210aafa69..e813cb08e 100644 --- a/crates/sprout-relay/src/api/git/hydrate.rs +++ b/crates/sprout-relay/src/api/git/hydrate.rs @@ -33,7 +33,7 @@ use futures_util::future::try_join_all; use tempfile::TempDir; use tokio::process::Command; -use super::manifest::{Manifest, ManifestError}; +use super::manifest::{is_hex_oid, is_safe_refname, Manifest, ManifestError}; use super::store::{GitStore, StoreError}; /// A bare repo hydrated to a temporary directory. @@ -218,25 +218,9 @@ async fn run_git(cwd: &Path, args: &[&str]) -> Result<(), HydrateError> { Ok(()) } -/// Conservative refname validation — refuses traversal, control chars, and -/// non-`refs/` prefixes. The writer should already enforce this; we re-check -/// because we're about to use it as a file path. -fn is_safe_refname(s: &str) -> bool { - if !s.starts_with("refs/") { - return false; - } - if s.contains("..") || s.contains("//") || s.starts_with('/') || s.ends_with('/') { - return false; - } - s.chars() - .all(|c| c.is_ascii_alphanumeric() || matches!(c, '/' | '_' | '.' | '-')) -} - -fn is_hex_oid(s: &str) -> bool { - // Accept both SHA-1 (40) and SHA-256 (64) oids — sprout pins SHA-1 today - // but future-proof for the algorithm transition. - (s.len() == 40 || s.len() == 64) && s.chars().all(|c| c.is_ascii_hexdigit()) -} +// `is_safe_refname` and `is_hex_oid` live in `super::manifest` — symmetric +// write-side (Manifest::validate) and read-side (here) protection, single +// source of truth. See `manifest.rs` for the predicates + tests. #[cfg(test)] mod tests { From 6e9263bd2b423a9fe5bc56653877bdeeee1f7a69 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:31:42 -0400 Subject: [PATCH 12/24] refactor(relay/git/hydrate): import pointer_key from manifest Follows the predicate-dedup commit (`471e4dae`): `manifest::pointer_key` is now `pub`, so hydrate imports rather than carrying its own definition. Single source of truth shared with `cas_publish`. Drops the duplicate `pointer_key_strips_dot_git` test (covered by `manifest::tests::pointer_key_strips_dot_git`). 221/221 relay tests; 5/5 hydrate live (MinIO roundtrip + empty-repo + missing-pointer); clippy + fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/hydrate.rs | 24 ++++------------------ 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/crates/sprout-relay/src/api/git/hydrate.rs b/crates/sprout-relay/src/api/git/hydrate.rs index e813cb08e..6211670fa 100644 --- a/crates/sprout-relay/src/api/git/hydrate.rs +++ b/crates/sprout-relay/src/api/git/hydrate.rs @@ -33,7 +33,7 @@ use futures_util::future::try_join_all; use tempfile::TempDir; use tokio::process::Command; -use super::manifest::{is_hex_oid, is_safe_refname, Manifest, ManifestError}; +use super::manifest::{is_hex_oid, is_safe_refname, pointer_key, Manifest, ManifestError}; use super::store::{GitStore, StoreError}; /// A bare repo hydrated to a temporary directory. @@ -75,17 +75,8 @@ pub enum HydrateError { Hydrate(String), } -/// The standard pointer key for `/`. -/// -/// Layout: `repos///pointer` — namespace under each repo so -/// future per-repo keys (config, refs index, gc state) co-locate. Must -/// match `cas_publish::pointer_key` exactly; otherwise reads 404 against -/// writes. -pub fn pointer_key(owner: &str, repo: &str) -> String { - // Strip a trailing `.git` if the caller passed it; matches transport.rs. - let repo = repo.strip_suffix(".git").unwrap_or(repo); - format!("repos/{owner}/{repo}/pointer") -} +// `pointer_key` is imported from `super::manifest` — single source of truth +// shared with `cas_publish` (write side). See manifest.rs. /// Hydrate a bare repo for read (`upload-pack` / `info-refs`). /// @@ -249,14 +240,7 @@ mod tests { assert!(!is_hex_oid("")); } - #[test] - fn pointer_key_strips_dot_git() { - assert_eq!(pointer_key("alice", "myrepo"), "repos/alice/myrepo/pointer"); - assert_eq!( - pointer_key("alice", "myrepo.git"), - "repos/alice/myrepo/pointer" - ); - } + // `pointer_key` is tested in `super::manifest::tests` — single source. // -------- Live MinIO + real git roundtrip ---------------------------------- // From 7f7e364c58d8f190c3e4c57eb4aa69f3508a58d2 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:07:16 -0400 Subject: [PATCH 13/24] feat(relay/git): kind:30618 ref-state event builder Pure function over a borrowed slice of Manifest fields (RefStateInputs). Quinn's cas_publish calls build_ref_state_event after a successful CAS in finalize_push. 9 unit tests pin NIP-34 invariants: - HEAD tag wrapped 'ref: ' (storage is bare; protocol formatting at emit) - Only refs/heads/* and refs/tags/* emit; other ref namespaces filtered - OIDs validated 40/64-hex (SHA-1/SHA-256); invalid OIDs skip-not-fail - Ref names validated (no //, no leading /) - Deterministic tag order: d, refs (BTreeMap-sorted), HEAD, p - Invalid actor_pubkey_hex errors before tag construction - d-tag is repo_id, NOT .git (caller responsibility, pinned) Signed with relay_keys: relay is authoritative for ref state of repos it hosts. Co-authored-by: Sami Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- .../src/api/git/manifest_event.rs | 418 ++++++++++++++++++ crates/sprout-relay/src/api/git/mod.rs | 1 + 2 files changed, 419 insertions(+) create mode 100644 crates/sprout-relay/src/api/git/manifest_event.rs diff --git a/crates/sprout-relay/src/api/git/manifest_event.rs b/crates/sprout-relay/src/api/git/manifest_event.rs new file mode 100644 index 000000000..25161d489 --- /dev/null +++ b/crates/sprout-relay/src/api/git/manifest_event.rs @@ -0,0 +1,418 @@ +//! Manifest → kind:30618 NIP-34 ref-state event. +//! +//! Pure function. No subprocess, no disk read, no S3. +//! Source of truth is the in-memory `Manifest` (loaded from S3 by the caller). +//! +//! NIP-34 reference (kind:30618 "Repository state announcements"): +//! tags = [ +//! ["d", ""], // matches kind:30617 d-tag +//! ["refs/heads/", ""], // zero or more +//! ["refs/tags/", ""], // zero or more +//! ["HEAD", "ref: refs/heads/"], // symbolic HEAD +//! ] +//! +//! Sprout extension: a `p` tag carrying the pusher's pubkey (or the repo +//! owner on creation events) so subscribers can filter by author of state +//! transition. Not part of NIP-34 but consistent with the rest of sprout's +//! event-publishing conventions. + +use std::collections::BTreeMap; + +use nostr::{Event, EventBuilder, Keys, Kind, PublicKey, Tag, TagKind}; + +// ── Inputs ─────────────────────────────────────────────────────────────────── + +/// Subset of `Manifest` needed to emit a kind:30618 event. +/// +/// Deliberately a borrowed slice of fields (not the whole `Manifest`) so this +/// module doesn't take a hard dep on `s3_repo::Manifest`'s exact shape. Caller +/// constructs this from the loaded manifest. +pub struct RefStateInputs<'a> { + /// The kind:30617 d-tag identifier. == repo_id, NOT `.git`. + pub repo_id: &'a str, + /// Symbolic HEAD ref, e.g. "refs/heads/main". NO "ref: " prefix here — + /// the prefix is added when emitting the tag. Storing it unprefixed in + /// the manifest keeps Git-protocol formatting out of the storage schema. + pub head: &'a str, + /// `ref_name -> oid_hex`. Only `refs/heads/*` and `refs/tags/*` will be + /// emitted; other ref namespaces are filtered. + pub refs: &'a BTreeMap, + /// Pubkey to include in the `p` tag (sprout extension). On push, this is + /// the pusher's pubkey from the receive-pack hook. On repo-creation, this + /// is the kind:30617 author (repo owner). Hex-encoded (64 chars). + pub actor_pubkey_hex: &'a str, +} + +/// Errors from building a kind:30618 ref-state event. +#[derive(thiserror::Error, Debug)] +pub enum BuildError { + /// `actor_pubkey_hex` did not parse as a valid 64-char hex pubkey. + #[error("invalid actor_pubkey_hex: {0}")] + InvalidActor(String), + /// `nostr` event signing returned an error. + #[error("nostr event signing failed: {0}")] + Sign(String), +} + +// ── Build helper ───────────────────────────────────────────────────────────── + +/// Build & sign a kind:30618 event from the manifest's ref state. +/// +/// Signed with `relay_keys` — the relay is the authoritative source of ref +/// state for repos it hosts. +/// +/// Invariants enforced inside this function: +/// - HEAD tag is wrapped as `"ref: "` per NIP-34, even though the +/// manifest stores HEAD bare. +/// - Only `refs/heads/*` and `refs/tags/*` are emitted (NIP-34 §"Repository +/// state announcements" semantics). +/// - OIDs validated as 40-hex (SHA-1) or 64-hex (SHA-256). Invalid OIDs are +/// skipped, not failed — same conservative behavior as the legacy code. +/// - Ref names validated (no `//`, no leading `/`, alphanumeric + `/_.-`). +/// - Output tag ordering is deterministic for testability: `d`, refs (sorted +/// by `BTreeMap` iteration), HEAD, p. +pub fn build_ref_state_event( + inputs: &RefStateInputs<'_>, + relay_keys: &Keys, +) -> Result { + // Validate actor pubkey first so we error before any tag construction. + let actor = PublicKey::from_hex(inputs.actor_pubkey_hex) + .map_err(|e| BuildError::InvalidActor(e.to_string()))?; + + let mut tags: Vec = Vec::with_capacity(inputs.refs.len() + 3); + + // d-tag: kind:30617 identifier. + tags.push(Tag::custom(TagKind::custom("d"), [inputs.repo_id])); + + // ref tags: refs/heads/* and refs/tags/* only. + for (ref_name, oid) in inputs.refs { + if !is_emittable_ref(ref_name) { + continue; + } + if !is_valid_oid(oid) { + continue; + } + tags.push(Tag::custom( + TagKind::custom(ref_name.clone()), + [oid.clone()], + )); + } + + // HEAD tag — note the "ref: " prefix required by NIP-34. + if !inputs.head.is_empty() && is_emittable_ref(inputs.head) { + tags.push(Tag::custom( + TagKind::custom("HEAD"), + [format!("ref: {}", inputs.head)], + )); + } + + // p-tag: sprout extension (pusher or owner pubkey). + tags.push(Tag::public_key(actor)); + + let event = EventBuilder::new(Kind::Custom(30618), "", tags) + .sign_with_keys(relay_keys) + .map_err(|e| BuildError::Sign(e.to_string()))?; + + Ok(event) +} + +// ── Validators (private) ───────────────────────────────────────────────────── + +/// NIP-34 kind:30618 only emits refs under heads/ and tags/. +fn is_emittable_ref(name: &str) -> bool { + if !(name.starts_with("refs/heads/") || name.starts_with("refs/tags/")) { + return false; + } + if name.starts_with('/') || name.contains("//") { + return false; + } + name.chars() + .all(|c| c.is_ascii_alphanumeric() || "/_.-".contains(c)) +} + +/// Accept SHA-1 (40 hex) and SHA-256 (64 hex) OIDs. +fn is_valid_oid(s: &str) -> bool { + matches!(s.len(), 40 | 64) && s.chars().all(|c| c.is_ascii_hexdigit()) +} + +// ── Tests ──────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use nostr::SecretKey; + + fn relay_keys() -> Keys { + // Deterministic test key. + Keys::new( + SecretKey::from_hex("0000000000000000000000000000000000000000000000000000000000000001") + .unwrap(), + ) + } + + fn owner_hex() -> String { + // 64-hex pubkey for the test "actor". + "f4a42a97e594b77bdbd8ee35191c8b28a94a4cb871d96f32921558275421fb68".to_string() + } + + fn refs_with(pairs: &[(&str, &str)]) -> BTreeMap { + pairs + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect() + } + + // Helper: get tag values by tag-name prefix. + fn tags_with_kind(ev: &Event, kind: &str) -> Vec> { + ev.tags + .iter() + .filter_map(|t| { + let s = t.as_slice(); + if s.first().map(String::as_str) == Some(kind) { + Some(s.to_vec()) + } else { + None + } + }) + .collect() + } + + fn first_tag(ev: &Event, kind: &str) -> Option> { + tags_with_kind(ev, kind).into_iter().next() + } + + // ── Empty repo (creation event) ────────────────────────────────────────── + + #[test] + fn empty_repo_emits_d_head_p_only() { + let owner = owner_hex(); + let refs = refs_with(&[]); + let inputs = RefStateInputs { + repo_id: "myrepo", + head: "refs/heads/main", + refs: &refs, + actor_pubkey_hex: &owner, + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + + assert_eq!(ev.kind, Kind::Custom(30618)); + assert_eq!(ev.content, ""); + + // d-tag + assert_eq!(first_tag(&ev, "d").unwrap()[1], "myrepo"); + // HEAD tag — note "ref: " prefix + assert_eq!( + first_tag(&ev, "HEAD").unwrap()[1], + "ref: refs/heads/main", + "HEAD tag MUST be wrapped 'ref: ' per NIP-34", + ); + // p-tag = owner + assert_eq!(first_tag(&ev, "p").unwrap()[1], owner); + // no ref tags + assert!(tags_with_kind(&ev, "refs/heads/main").is_empty()); + } + + // ── HEAD wrapping (the gotcha) ─────────────────────────────────────────── + + #[test] + fn head_tag_always_wraps_with_ref_prefix() { + // Even if a caller is sloppy and passes head with the prefix already... + // we shouldn't double-wrap. (Bare ref expected; doc the precondition.) + // This test pins that bare ref → wrapped output. + let refs = refs_with(&[]); + let inputs = RefStateInputs { + repo_id: "x", + head: "refs/heads/dev", + refs: &refs, + actor_pubkey_hex: &owner_hex(), + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + assert_eq!(first_tag(&ev, "HEAD").unwrap()[1], "ref: refs/heads/dev"); + } + + // ── Branch + tag refs ──────────────────────────────────────────────────── + + #[test] + fn emits_branches_and_tags() { + let refs = refs_with(&[ + ( + "refs/heads/main", + "1111111111111111111111111111111111111111", + ), + ("refs/heads/dev", "2222222222222222222222222222222222222222"), + ("refs/tags/v1.0", "3333333333333333333333333333333333333333"), + ]); + let inputs = RefStateInputs { + repo_id: "r", + head: "refs/heads/main", + refs: &refs, + actor_pubkey_hex: &owner_hex(), + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + + assert_eq!( + first_tag(&ev, "refs/heads/main").unwrap()[1], + "1111111111111111111111111111111111111111", + ); + assert_eq!( + first_tag(&ev, "refs/heads/dev").unwrap()[1], + "2222222222222222222222222222222222222222", + ); + assert_eq!( + first_tag(&ev, "refs/tags/v1.0").unwrap()[1], + "3333333333333333333333333333333333333333", + ); + } + + // ── Non-heads/tags refs are filtered ───────────────────────────────────── + + #[test] + fn skips_non_heads_or_tags_refs() { + let refs = refs_with(&[ + ( + "refs/heads/main", + "1111111111111111111111111111111111111111", + ), + ( + "refs/notes/commits", + "2222222222222222222222222222222222222222", + ), + ( + "refs/remotes/origin/x", + "3333333333333333333333333333333333333333", + ), + ("refs/stash", "4444444444444444444444444444444444444444"), + ( + "refs/pull/1/head", + "5555555555555555555555555555555555555555", + ), + ]); + let inputs = RefStateInputs { + repo_id: "r", + head: "refs/heads/main", + refs: &refs, + actor_pubkey_hex: &owner_hex(), + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + + assert!(first_tag(&ev, "refs/heads/main").is_some()); + assert!(first_tag(&ev, "refs/notes/commits").is_none()); + assert!(first_tag(&ev, "refs/remotes/origin/x").is_none()); + assert!(first_tag(&ev, "refs/stash").is_none()); + assert!(first_tag(&ev, "refs/pull/1/head").is_none()); + } + + // ── OID validation: SHA-1 and SHA-256 ──────────────────────────────────── + + #[test] + fn accepts_sha1_and_sha256_oids() { + let sha1 = "1111111111111111111111111111111111111111"; // 40 hex + let sha256 = "1111111111111111111111111111111111111111111111111111111111111111"; // 64 + let refs = refs_with(&[ + ("refs/heads/sha1-branch", sha1), + ("refs/heads/sha256-branch", sha256), + ]); + let inputs = RefStateInputs { + repo_id: "r", + head: "refs/heads/sha1-branch", + refs: &refs, + actor_pubkey_hex: &owner_hex(), + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + assert!(first_tag(&ev, "refs/heads/sha1-branch").is_some()); + assert!(first_tag(&ev, "refs/heads/sha256-branch").is_some()); + } + + #[test] + fn rejects_invalid_oids() { + let refs = refs_with(&[ + ("refs/heads/short", "1234"), // too short + ( + "refs/heads/non-hex", + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", + ), // non-hex + ( + "refs/heads/midlen", + "11111111111111111111111111111111111111111111111111", + ), // 50, between + ("refs/heads/ok", "1111111111111111111111111111111111111111"), + ]); + let inputs = RefStateInputs { + repo_id: "r", + head: "refs/heads/ok", + refs: &refs, + actor_pubkey_hex: &owner_hex(), + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + assert!(first_tag(&ev, "refs/heads/short").is_none()); + assert!(first_tag(&ev, "refs/heads/non-hex").is_none()); + assert!(first_tag(&ev, "refs/heads/midlen").is_none()); + assert!(first_tag(&ev, "refs/heads/ok").is_some()); + } + + // ── Ref name validation ────────────────────────────────────────────────── + + #[test] + fn rejects_malformed_ref_names() { + let refs = refs_with(&[ + ( + "refs/heads//double", + "1111111111111111111111111111111111111111", + ), // // + ( + "/refs/heads/leading", + "1111111111111111111111111111111111111111", + ), // leading / + ( + "refs/heads/space ref", + "1111111111111111111111111111111111111111", + ), // space + ( + "refs/heads/legit", + "1111111111111111111111111111111111111111", + ), + ]); + let inputs = RefStateInputs { + repo_id: "r", + head: "refs/heads/legit", + refs: &refs, + actor_pubkey_hex: &owner_hex(), + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + assert!(first_tag(&ev, "refs/heads/legit").is_some()); + // Malformed refs should not appear. + assert_eq!(tags_with_kind(&ev, "refs/heads//double").len(), 0); + } + + // ── Actor pubkey errors ────────────────────────────────────────────────── + + #[test] + fn rejects_invalid_actor_pubkey() { + let refs = refs_with(&[]); + let inputs = RefStateInputs { + repo_id: "r", + head: "refs/heads/main", + refs: &refs, + actor_pubkey_hex: "not-a-pubkey", + }; + let err = build_ref_state_event(&inputs, &relay_keys()).unwrap_err(); + assert!(matches!(err, BuildError::InvalidActor(_))); + } + + // ── d-tag matches kind:30617 identifier (NOT .git) ───────────────── + + #[test] + fn d_tag_is_repo_id_not_repo_dot_git() { + let refs = refs_with(&[]); + let inputs = RefStateInputs { + repo_id: "myrepo", // caller MUST strip .git before passing + head: "refs/heads/main", + refs: &refs, + actor_pubkey_hex: &owner_hex(), + }; + let ev = build_ref_state_event(&inputs, &relay_keys()).unwrap(); + assert_eq!(first_tag(&ev, "d").unwrap()[1], "myrepo"); + // Pin: caller responsibility — if they pass "myrepo.git", that's what + // ends up in the d-tag and won't match the kind:30617 announcement. + } +} diff --git a/crates/sprout-relay/src/api/git/mod.rs b/crates/sprout-relay/src/api/git/mod.rs index 2c62460a8..5f373da14 100644 --- a/crates/sprout-relay/src/api/git/mod.rs +++ b/crates/sprout-relay/src/api/git/mod.rs @@ -25,6 +25,7 @@ use crate::state::AppState; pub mod hook; pub mod hydrate; pub mod manifest; +pub mod manifest_event; pub mod policy; pub mod store; pub mod transport; From 0bcb0455879b5d0ae8f030adf9426e4bf565874b Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Thu, 21 May 2026 20:48:29 -0400 Subject: [PATCH 14/24] feat(git/transport): type-enforced post-push fence + fallible refs snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors the receive-pack path so the post-push publish is **inline and sequenced before** the success response is built — and so the structural seam is type-visible rather than convention. ## What changes - Introduce `PackOutput { stdout: Vec }` and `PushContext { pack, refs_before, owner, repo, pusher, repo_path }`. The push handler passes a `PushContext` to `finalize_push`, which is the **only** function that converts a push's `PackOutput` into a 2xx `Response` (via `build_git_response`). - `snapshot_refs` returns `Result` instead of collapsing failure to `""`. The previous shape silently skipped publish when *both* pre- and post-push snapshots failed (they both became `""` and compared equal); the `Result` shape makes that impossible. - Pull the publish/skip decision into a pure `should_publish(before, after)` function. Deny-by-default: skip only when both snapshots are `Ok` and equal; every other state (changed refs, either side `Err`) falls through to publish. Six unit tests pin each arm. - Remove the fire-and-forget `tokio::spawn` that previously published *after* `Ok(response)` returned. Publish now awaits inline on the changed-or-error path; no-op pushes short-circuit and pay zero fence latency. - Drop the unused `run_git_service_with_env` wrapper now that receive-pack calls `run_git_subprocess` directly. ## Why This is the §Implementation Correspondence seam called for by `docs/git-on-object-storage.md`. With this in place: 1. **Theorem 1 (fence) — structurally enforceable.** A push cannot produce a 2xx `Response` without going through `finalize_push`, which contains the conditional `publish_ref_state().await`. The compiler is the doc comment. 2. **Double-snapshot-failure hole closed.** `(Err, Err)` is outside the skip arm; it cannot collide on `""` and silently bypass publish. 3. **No-op fast path preserved.** Denied/no-op pushes do not pay publish latency. The fence engages iff refs changed or either snapshot errored. Today `publish_ref_state` is a relay-DB insert; failure is logged and the response is still 200 (pack is durable on disk). The S3 manifest-CAS evolution lands on this same seam — `publish_ref_state` becomes a conditional PUT, and the 412→409 mapping plugs into the `if let Err(e)` arm of `finalize_push`. No further refactor needed when that work lands. ## Tests Six unit tests on `should_publish` cover every arm of the fence decision: no-op skip, changed-refs publish, first-push-to-empty-repo publish, before-err publish, after-err publish, and the load-bearing both-err publish (the bug this refactor exists to close). Runtime ordering — publish completes before `build_git_response` is called — is enforced by `finalize_push` being a single sequential async function. A future integration test once a mockable publish seam exists would be belt-and-suspenders; left as a follow-up. Refs: spec/git-on-object-storage @ 0896aff (or successor). Co-authored-by: Quinn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/transport.rs | 245 ++++++++++++++++--- 1 file changed, 212 insertions(+), 33 deletions(-) diff --git a/crates/sprout-relay/src/api/git/transport.rs b/crates/sprout-relay/src/api/git/transport.rs index 7c00900c5..26571cd35 100644 --- a/crates/sprout-relay/src/api/git/transport.rs +++ b/crates/sprout-relay/src/api/git/transport.rs @@ -468,10 +468,15 @@ pub async fn receive_pack( ("GIT_CONFIG_VALUE_0", hooks_dir), ]; - // Snapshot refs before push — used to detect whether anything actually changed. + // Snapshot refs before push — used by the post-push fence to detect + // whether anything actually changed. let refs_before = snapshot_refs(&validated.repo_path).await; - let response = run_git_service_with_env( + // Run receive-pack and obtain the *owned* subprocess output. Crucially we + // do NOT build a `Response` here — that work is delegated to + // `finalize_push`, which is the sole site that converts a push's + // `PackOutput` into a 2xx response (see the type docs on `PackOutput`). + let pack = run_git_subprocess( &state, ¶ms.owner, ¶ms.repo, @@ -481,28 +486,36 @@ pub async fn receive_pack( ) .await?; - // Post-push: publish kind:30618 ref state only if refs actually changed. - // Git smart HTTP returns 200 even on denied pushes (in-band rejection), - // so we compare before/after refs to avoid publishing on no-ops. - let state_clone = state.clone(); - let owner = params.owner.clone(); - let repo = params.repo.clone(); - let pusher = auth.pubkey; - let repo_path = validated.repo_path.clone(); - tokio::spawn(async move { - let refs_after = snapshot_refs(&repo_path).await; - if refs_before == refs_after { - return; // Nothing changed — skip publish. - } - if let Err(e) = publish_ref_state(&state_clone, &owner, &repo, &pusher).await { - warn!(error = %e, owner = %owner, repo = %repo, "failed to publish kind:30618"); - } - }); + let ctx = PushContext { + pack, + refs_before, + owner: params.owner.clone(), + repo: params.repo.clone(), + pusher: auth.pubkey, + repo_path: validated.repo_path.clone(), + }; + Ok(finalize_push(state, ctx).await) +} - Ok(response) +/// Buffered output of a `git --stateless-rpc` subprocess. +/// +/// The handler holds this as an owned value between subprocess completion and +/// response construction — this is the *structural seam* the post-push fence +/// relies on (see §Implementation Correspondence in +/// `docs/git-on-object-storage.md`): nothing reaches the client until the +/// handler has decided to build a `Response` from these bytes. +pub(crate) struct PackOutput { + pub stdout: Vec, } -/// Shared git service runner — spawns subprocess and streams I/O. +/// Shared git service runner — spawns subprocess and builds the canonical +/// `application/x-git-{service}-result` response. Used for `info/refs` and +/// `upload-pack` (read paths, no post-publish fence). +/// +/// Push (`receive-pack`) does **not** use this: it calls +/// [`run_git_subprocess`] directly to obtain a [`PackOutput`], then dispatches +/// to [`finalize_push`] which is the sole site that constructs a 2xx push +/// response from that buffer. async fn run_git_service( state: &Arc, owner: &str, @@ -510,21 +523,25 @@ async fn run_git_service( service: &str, body: Body, ) -> Result { - run_git_service_with_env(state, owner, repo, service, body, &[]).await + let output = run_git_subprocess(state, owner, repo, service, body, &[]).await?; + Ok(build_git_response(service, output)) } -/// Shared git service runner with extra environment variables. +/// Spawn a `git --stateless-rpc ` subprocess, stream the request body +/// to stdin, and return the buffered stdout/stderr/exit status as a +/// [`PackOutput`]. /// -/// The `extra_env` pairs are set AFTER `harden_git_env` clears the environment, -/// so they're available to the git subprocess and any hooks it spawns. -async fn run_git_service_with_env( +/// Critically returns the **owned** subprocess output rather than a `Response`, +/// so callers can sequence post-subprocess work (e.g. the push fence) before +/// any byte reaches the client. +async fn run_git_subprocess( state: &Arc, owner: &str, repo: &str, service: &str, body: Body, extra_env: &[(&str, String)], -) -> Result { +) -> Result { let validated = validate_repo_path(owner, repo, &state.config.git_repo_path)?; if !validated.repo_path.exists() { return Err((StatusCode::NOT_FOUND, "repository not found").into_response()); @@ -609,33 +626,127 @@ async fn run_git_service_with_env( // Still return output — git protocol errors are communicated in-band. } + Ok(PackOutput { + stdout: output.stdout, + }) +} + +/// Build the canonical `application/x-git-{service}-result` response from a +/// completed subprocess. For the push path this is **only** reached via +/// [`finalize_push`]; for read paths it is reached via [`run_git_service`]. +fn build_git_response(service: &str, output: PackOutput) -> Response { let content_type = format!("application/x-git-{service}-result"); - Ok(Response::builder() + Response::builder() .status(StatusCode::OK) .header(header::CONTENT_TYPE, content_type) .header(header::CACHE_CONTROL, "no-cache") .body(Body::from(output.stdout)) - .unwrap()) + .unwrap() } // ── Post-Push Event Publishing ─────────────────────────────────────────────── +/// Failure mode for a refs snapshot. +/// +/// Both variants are treated identically by [`finalize_push`]: any `Err` +/// collapses to the publish branch. Distinguished only for observability. +#[derive(Debug)] +pub(crate) enum SnapshotError { + /// `git for-each-ref` failed to spawn or exited non-zero. + CommandFailed, +} + /// Quick snapshot of current refs — used to detect whether a push changed anything. /// /// Returns the raw `git for-each-ref` output as a string. Comparison is by /// string equality — cheap and sufficient (same refs + same SHAs = same string). -/// Returns empty string on error (conservative: will trigger publish on failure). -async fn snapshot_refs(repo_path: &std::path::Path) -> String { +/// +/// **Fallible by design.** An earlier version returned `""` on error, which +/// hid a silent-skip bug: if *both* the pre- and post-push snapshots failed, +/// the two empty strings compared equal and the handler skipped publish on +/// a push that did change refs. [`finalize_push`] now matches on +/// `Result` and treats any `Err` as "assume changed → +/// publish fires," closing the double-failure hole. See +/// `docs/git-on-object-storage.md` §Implementation Correspondence. +async fn snapshot_refs(repo_path: &std::path::Path) -> Result { let mut cmd = Command::new("git"); cmd.args(["for-each-ref", "--format=%(refname) %(objectname)"]) .current_dir(repo_path); harden_git_env(&mut cmd); match cmd.output().await { Ok(output) if output.status.success() => { - String::from_utf8_lossy(&output.stdout).into_owned() + Ok(String::from_utf8_lossy(&output.stdout).into_owned()) + } + _ => Err(SnapshotError::CommandFailed), + } +} + +/// Per-push state captured between subprocess completion and response +/// construction. This is the single argument shape `finalize_push` consumes — +/// constructing a `PushContext` is the only path from a push subprocess to a +/// 2xx push response. +pub(crate) struct PushContext { + pub pack: PackOutput, + pub refs_before: Result, + pub owner: String, + pub repo: String, + pub pusher: nostr::PublicKey, + pub repo_path: PathBuf, +} + +/// Decide whether the post-push fence should publish based on the pre- and +/// post-push ref snapshots. +/// +/// Deny-by-default: publish unless we have **both** successful snapshots that +/// compare equal (i.e. a true no-op push). Any `Err` on either side — or any +/// difference between the two — falls through to publish. +/// +/// This is the function whose previous string-based predecessor had the +/// silent-skip bug: when both snapshots failed, both became `""`, they +/// "matched," and publish was skipped on a real push. Now both-`Err` is +/// structurally outside the skip arm. +pub(crate) fn should_publish( + before: &Result, + after: &Result, +) -> bool { + !matches!((before, after), (Ok(b), Ok(a)) if b == a) +} + +/// Finalize a push request: take a snapshot of the post-push ref state, decide +/// whether anything changed, conditionally publish kind:30618 inline, and only +/// then construct the success response. +/// +/// **The fence (Theorem 1 in `docs/git-on-object-storage.md`):** for a push +/// that changed refs (or where either ref snapshot failed), the `Response` is +/// not constructed until `publish_ref_state` has been awaited. No-op pushes — +/// where the before/after snapshots both succeed and compare equal — short- +/// circuit at the `(Ok(b), Ok(a)) if b == a` arm and skip publish entirely, +/// paying zero fence latency. +/// +/// The match below is written deny-by-default: only the `(Ok, Ok)`-and-equal +/// arm skips publish; every other state — including the previously load- +/// bearing **double-failure case** where two failed `snapshot_refs` calls +/// collide on the same value — falls through to the publish branch. +async fn finalize_push(state: Arc, ctx: PushContext) -> Response { + let refs_after = snapshot_refs(&ctx.repo_path).await; + + if should_publish(&ctx.refs_before, &refs_after) { + if let Err(e) = publish_ref_state(&state, &ctx.owner, &ctx.repo, &ctx.pusher).await { + // Today publish failure (relay DB insert) is non-fatal — the pack + // is durable on disk and the client sees success. The S3 manifest- + // CAS evolution of this seam will instead map a precondition + // failure (412) to HTTP 409 here; see + // `docs/git-on-object-storage.md` §Implementation Correspondence. + warn!( + error = %e, + owner = %ctx.owner, + repo = %ctx.repo, + "failed to publish kind:30618" + ); } - _ => String::new(), // Error → empty → won't match after → publish fires (safe default) } + + build_git_response("receive-pack", ctx.pack) } /// Publish kind:30618 (repo state) after a successful push. @@ -762,3 +873,71 @@ pub fn git_router(state: Arc) -> Router { .layer(RequestBodyLimitLayer::new(body_limit)) .with_state(state) } + +// ── Tests ──────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + fn ok(s: &str) -> Result { + Ok(s.to_string()) + } + fn err() -> Result { + Err(SnapshotError::CommandFailed) + } + + /// True no-op push: both snapshots succeed and are equal → publish skipped. + /// This is the fast path; the fence pays zero latency. + #[test] + fn should_publish_skips_on_noop() { + let before = ok("refs/heads/main abc123\n"); + let after = ok("refs/heads/main abc123\n"); + assert!(!should_publish(&before, &after)); + } + + /// Refs changed → publish fires. The fence engages and the response + /// must wait for `publish_ref_state` to complete before being built. + #[test] + fn should_publish_fires_on_changed_refs() { + let before = ok("refs/heads/main abc123\n"); + let after = ok("refs/heads/main def456\n"); + assert!(should_publish(&before, &after)); + } + + /// First push to an empty repo: before is `Ok("")`, after has real refs. + /// They differ; publish fires. Closes "what if the empty-repo case is + /// modeled by `Ok("")` instead of `Err`." + #[test] + fn should_publish_fires_on_first_push_to_empty_repo() { + let before = ok(""); + let after = ok("refs/heads/main abc123\n"); + assert!(should_publish(&before, &after)); + } + + /// Snapshot fails *before* the push: assume changed → publish fires. + /// Anything else would risk dropping a publish on a real ref change. + #[test] + fn should_publish_fires_when_before_errors() { + assert!(should_publish(&err(), &ok("refs/heads/main abc123\n"))); + } + + /// Snapshot fails *after* the push: assume changed → publish fires. + #[test] + fn should_publish_fires_when_after_errors() { + assert!(should_publish(&ok("refs/heads/main abc123\n"), &err())); + } + + /// The bug this whole fix exists to close: both snapshots fail. + /// + /// The pre-refactor implementation returned `""` on error from + /// `snapshot_refs`, so two failed scans both produced `""`, compared + /// equal, and the handler silently skipped publish on a ref-changing + /// push. Under `Result<_, _>` this case is structurally outside the + /// skip arm: `(Err, Err)` does not match `(Ok(b), Ok(a)) if b == a`. + /// Publish fires. + #[test] + fn should_publish_fires_when_both_snapshots_error() { + assert!(should_publish(&err(), &err())); + } +} From 686fd3602ccb29128793e0089083e4ebf050aedf Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:15:40 -0400 Subject: [PATCH 15/24] =?UTF-8?q?feat(git/cas=5Fpublish):=20the=20=C2=A7Pu?= =?UTF-8?q?sh=20step=202-7=20commit=20point?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds crates/sprout-relay/src/api/git/cas_publish.rs — the pure async function that turns a post-receive-pack workspace into a durable manifest CAS. Composes Dawn's GitStore primitives (put_pack, put_manifest, get_pointer, put_pointer) and Dawn's Manifest schema (canonical_bytes, validate, Inv_Closed at compose time) into the spec's step 2-7 sequence: read pointer (e, d_before) §step 3 fetch + verify m_before §step 3 + A1 detectability snapshot refs + symref-HEAD from disk (HEAD inherits parent on detach) pack new objects via pack-objects --revs §step 1-2 put_pack(bytes) -> packs/ §step 2 (A1) compose m_after (parent packs + new pack, parent = digest only) §step 5 m_after.validate() (Sami/Max/Perci #2-#4) put_manifest(canonical_bytes) -> manifests/ §step 6 put_pointer(IfMatch(e) | IfNoneMatchStar) §step 7 (CAS) Won -> CasSuccess { manifest, manifest_key } Lost -> CasError::Conflict { winner_manifest, winner_manifest_key } (→ HTTP 409, with winner for disk reconcile) The function returns *before* a Response is constructed — it is called from finalize_push, which is the unique site that builds a push 2xx, so the structural seam still enforces Theorem 1 (success-after-CAS). ## Review fixes folded in Sami's review (#1–#6) + Perci's #1 + Max's pre-CAS-validation blocker are all addressed in this commit: - **parent = bare 64-hex digest, not full key** (Perci #1, Max). Pointer body is ``; `Manifest.parent` stores the same digest, matching `Inv_RefDerivedFromParent` literally. `read_parent` strips the `manifests/` prefix before assigning. Dawn's new `MalformedParent` validator catches any drift at the write seam. - **Pre-CAS validation** (Sami #2, Max). `m_after.validate()?` runs between `compose_after` and `put_manifest`. Unsafe refnames, malformed oids, empty HEAD — all surface as `CasError::ManifestInvalid(...)` (4xx-class) before any S3 write, *not* as "valid CAS, un-clone-able output." Typed variant (not reused `ManifestReadFailed`) so logs / status mapping distinguish "client input rejected" from "stored parent failed A1" (Max + Dawn). - **Detached-HEAD fallback** (Sami #3). `snapshot_workspace_state` returns empty `head` on detached HEAD; `cas_publish` falls back to `parent.head` if non-empty. `validate()` rejects the first-push-+- detached case (Sami #4 — no parent to inherit from, manifest is un-clone-able). - **Conflict carries the winner** (Sami #5 + Dawn). `Conflict { winner_manifest, winner_manifest_key }` lets `finalize_push` invoke Eva's `reconcile_to_manifest` mechanically from the error arm, without a second pointer GET in the caller. `warn!()` at the `LostRace` site logs (pointer, expected etag, attempted manifest) for debugging concurrent-push patterns. Boxed for `clippy::result_large_err`. - **Empty-pack comment** (Sami #6). Clarified `capture_pack` returns `None` in both the delete-all (`refs_after.is_empty()`) and refs-only (`pack-objects` empty stdout) cases. - **`pointer_key` consolidated** in `manifest.rs` (Sami #1, Dawn, Max — Sami's "single source of truth" argument). `cas_publish` imports it; the duplicate definition is gone. - **`validate-invocation` test added** in `cas_publish.rs` (Sami's recommendation). Pins that a future refactor dropping the `validate?` call between `compose_after` and `put_manifest` is caught by unit test, not by every subsequent un-clone-able read. ## What this deliberately does NOT do (each with citation) - No retry on LostRace. Per Sami's TLA-action guidance: the receive-pack output is derived against a now-superseded parent; reusing it would violate Inv_RefDerivedFromParent. Client re-pushes, which re-hydrates + re-runs receive-pack against the advanced pointer — the only safe retry, which git already performs. Spec §Push step 7: 'GOTO 3 (retry) or respond non-ff' — both arms safe; we take non-ff. - No kind:30618 emission. That is derived after CAS — finalize_push calls Sami's build_ref_state_event over m_after.refs / m_after.head on Ok. Spec §Implementation Correspondence: 'kind:30618 is derived after CAS, never the commit.' - No advisory lock. Spec §Push 'no advisory lock in v1' — writer serialization is the CAS. A mutex would hide the contention Inv_NoFork proves safe. ## Tests 10 unit tests pin digest_from_key (manifest/<...> prefix invariant), compose_after (Inv_Closed coverage, sort, dedupe, refs-only-no-new-pack, first-push, parent-is-digest-not-key), validate invocation (unsafe refname + first-push-empty-HEAD both rejected pre-CAS). 244 relay tests green; clippy --tests -D warnings clean. The integration into finalize_push lands separately — Eva owns the AppState::git_store wiring + main.rs startup probe gate. This module is callable today: cas_publish(&store, repo_path, owner, repo, &refs_before) -> Result. Refs: - docs/git-on-object-storage.md §Push step 2-7, §Implementation Correspondence, §Mechanized Verification (Inv_NoFork, Inv_RefEffectApplied, Inv_RefDerivedFromParent, Inv_Closed). Also makes transport::harden_git_env pub(crate) for reuse by cas_publish's two subprocess sites (for-each-ref, pack-objects). Co-authored-by: Tyler Longwell Co-authored-by: Quinn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- .../sprout-relay/src/api/git/cas_publish.rs | 709 ++++++++++++++++++ crates/sprout-relay/src/api/git/mod.rs | 1 + crates/sprout-relay/src/api/git/transport.rs | 2 +- 3 files changed, 711 insertions(+), 1 deletion(-) create mode 100644 crates/sprout-relay/src/api/git/cas_publish.rs diff --git a/crates/sprout-relay/src/api/git/cas_publish.rs b/crates/sprout-relay/src/api/git/cas_publish.rs new file mode 100644 index 000000000..e2e2b54dd --- /dev/null +++ b/crates/sprout-relay/src/api/git/cas_publish.rs @@ -0,0 +1,709 @@ +//! Push commit point — content-addressed pack + manifest CAS (§Push step 2–7). +//! +//! Pure async function over the spec's commit primitives. Given the post- +//! receive-pack repository workspace and the object-store client, this: +//! +//! 1. Reads the current pointer → `(e, d_before)` (§Push step 3). +//! 2. Fetches `m_before` via `get_verified(d_before)` (§Push step 3) — +//! digest-verified so a corrupt manifest fails closed, not silently. +//! 3. Snapshots refs + HEAD off the workspace (the receive-pack's published +//! state, by which point the pre-receive hook has enforced fast-forward / +//! branch-protection against the parent's refs). +//! 4. Captures the new objects as a pack via `git pack-objects --revs +//! --stdout` over `(refs_after) --not (refs_before-tips)` (§Push step 1–2). +//! Empty pack (refs-only push that doesn't introduce objects) is allowed +//! and stored with no `new_pack_keys`. +//! 5. `put_pack` (content-addressed, create-only, idempotent — §Push step 2). +//! The key is derived from `sha256(bytes)` by the store layer. +//! 6. Composes `m_after` (parent packs ∪ new pack, parent digest, new refs) +//! via `Manifest::compose`-equivalent inline construction (§Push step 5). +//! 7. `put_manifest` (content-addressed, create-only, idempotent — §Push +//! step 6). +//! 8. `put_pointer(IfMatch(e) | IfNoneMatchStar)` — the CAS (§Push step 7). +//! - `Won` → return `CasSuccess { manifest, manifest_key }`. The caller +//! then derives kind:30618 against `m_after` (Sami's +//! `manifest_event::build_ref_state_event`) and constructs the +//! success response — the *fence* in §Push step 8. +//! - `LostRace` → return `CasError::Conflict` (→ HTTP 409). **No +//! retry.** The losing push's receive-pack output was derived against +//! the now-superseded parent; reusing it would violate +//! `Inv_RefDerivedFromParent` (§Mechanized Verification). The client +//! re-runs `git push`, which re-hydrates and re-runs receive-pack +//! against the advanced state — that is the only safe retry, and +//! `git`'s own machinery already does it. +//! +//! ## Fence positioning +//! +//! This function returns *before* the success `Response` is constructed. +//! It is called from inside `finalize_push`, which is the unique site that +//! builds a push `Response`. The structural seam therefore enforces +//! Theorem 1: success cannot be observed until this returns `Ok(_)`. +//! +//! ## What this function deliberately does *not* do +//! +//! - **No retry on `LostRace`.** Per spec §Push step 7 "GOTO 3 (retry) or +//! respond non-ff": both arms are safe; we take the non-ff arm because +//! reusing receive-pack's output against a moved parent isn't safe and +//! re-hydrating from inside the handler is expensive. Sami's TLA-action +//! guidance is explicit: retry would change the TLA action. +//! - **No kind:30618 emission.** That is the *derived* publication after a +//! successful CAS. Caller passes `m_after` into +//! `manifest_event::build_ref_state_event` *after* this returns `Ok`. +//! Spec §Implementation Correspondence: "kind:30618 is derived after +//! CAS, never the commit." +//! - **No advisory lock.** Spec §Push, "No advisory lock in v1": writer +//! serialization is the CAS. Adding a per-repo mutex would hide the +//! exact contention `Inv_NoFork` proves safe. + +use std::collections::BTreeMap; +use std::path::Path; +use std::process::Stdio; + +use tokio::io::AsyncWriteExt; +use tokio::process::Command; +use tracing::{debug, warn}; + +use crate::api::git::manifest::{pointer_key, Manifest, ManifestError, MANIFEST_VERSION}; +use crate::api::git::store::{CasOutcome, ETag, GitStore, Precond, StoreError}; + +/// Errors `cas_publish` surfaces. Distinguished so `finalize_push` can map +/// each to the right HTTP status (the spec's 412 → 409 mapping is here). +#[derive(Debug, thiserror::Error)] +pub enum CasError { + /// The CAS lost the race (§Push step 7 → 412). Maps to HTTP 409. The + /// **terminal** classified outcome — never retried by this function, + /// since the receive-pack output is now derived against a superseded + /// parent. Client retries by re-pushing. + /// + /// Carries the winner's manifest + key so the caller can reconcile + /// the on-disk workspace back to the winning state (Eva's + /// disk-reset-on-lost-race) without a second pointer GET round-trip. + /// The re-read after `LostRace` can itself race with a *third* winner; + /// that's fine — we surface *some* winning state, and the loser's + /// client re-pushes anyway. + /// Boxed because `Manifest` is the largest `CasError` payload and we + /// don't want all error-paths paying the cost of a 200-byte struct in + /// the `Result` ABI (`clippy::result_large_err`). + #[error("CAS lost race; push superseded by winner with manifest {winner_manifest_key}")] + Conflict { + /// The manifest now installed under the pointer (the winner). + winner_manifest: Box, + /// Full content-addressed key of `winner_manifest` + /// (`manifests/`). + winner_manifest_key: String, + }, + + /// The current pointer names a manifest we cannot reconstruct + /// faithfully — digest mismatch, `manifest GET` 404 under a non-empty + /// pointer, unsupported schema version, or malformed pointer body. + /// **Fail closed:** we do not invent a published state to push onto. + /// Maps to HTTP 5xx (parent corruption, ops issue). + #[error("manifest read failed (corrupt or missing): {0}")] + ManifestReadFailed(String), + + /// The composed `m_after` failed `Manifest::validate()` — unsafe + /// refname, malformed oid, empty head. Pre-CAS, fail closed before + /// any write. Maps to HTTP 4xx (client/input rejected — distinct from + /// `ManifestReadFailed` which is server-side data corruption). + #[error("manifest invalid: {0}")] + ManifestInvalid(#[from] ManifestError), + + /// Backend transport / I/O failure surfaced from the object store. + /// Distinct from `Conflict` so `?`-bubbling cannot turn a 412 into a + /// 500. + #[error("object store backend: {0}")] + Backend(#[from] StoreError), + + /// `git pack-objects` failed, or we could not snapshot refs off the + /// workspace. Pre-CAS — the pointer was never written. + #[error("pack capture: {0}")] + PackCapture(String), +} + +/// Outcome of a successful CAS. Carries the composed manifest so the +/// caller can derive kind:30618 against `m_after.refs` / `m_after.head` — +/// these are the values that physically landed, by `Inv_RefEffectApplied`. +#[derive(Debug)] +pub struct CasSuccess { + /// The manifest the CAS installed (the published state). + pub manifest: Manifest, + /// The full content-addressed key of `manifest` (`manifests/`). + pub manifest_key: String, +} + +/// Resolved view of the pre-push pointer (§Push step 3 output). +struct ParentState { + /// ETag predicating the next CAS write. `None` only when the pointer + /// does not yet exist (first push to an empty repo). + if_match: Option, + /// The parent manifest's content-addressed *digest* (64-hex), not the + /// full `manifests/` key. This lands in `Manifest.parent` and + /// is what `Inv_RefDerivedFromParent` reasons over (parent = + /// pointer.digest). Full key is a local fetch detail, derived as + /// `format!("manifests/{}", digest)`. `None` only on first push. + parent_digest: Option, + /// The parsed parent manifest. On first push, an empty manifest. + parent: Manifest, +} + +impl ParentState { + /// State for a brand-new repo with no published manifest yet. + fn fresh() -> Self { + Self { + if_match: None, + parent_digest: None, + parent: Manifest { + version: MANIFEST_VERSION, + head: String::new(), + refs: BTreeMap::new(), + packs: Vec::new(), + parent: None, + }, + } + } +} + +/// Read the current pointer, parse the manifest digest, fetch the manifest, +/// verify its digest — fail closed at every step. +async fn read_parent(store: &GitStore, pkey: &str) -> Result { + let pointer = store.get_pointer(pkey).await?; + let Some((etag, body)) = pointer else { + return Ok(ParentState::fresh()); + }; + + // Pointer body is the raw hex digest of the parent manifest. No JSON, + // no envelope — the pointer key already identifies the repo, the ETag + // carries the version, and the body just names the manifest. Trimmed + // because some S3 implementations have added newline noise historically. + let digest = std::str::from_utf8(&body) + .map_err(|e| CasError::ManifestReadFailed(format!("pointer body not utf-8: {e}")))? + .trim() + .to_string(); + if digest.len() != 64 || !digest.chars().all(|c| c.is_ascii_hexdigit()) { + return Err(CasError::ManifestReadFailed(format!( + "pointer body is not a 64-char hex digest (got {} chars)", + digest.len() + ))); + } + let manifest_key = format!("manifests/{digest}"); + let bytes = store + .get_verified(&manifest_key, &digest) + .await + .map_err(|e| match e { + StoreError::DigestMismatch { .. } => { + CasError::ManifestReadFailed(format!("manifest digest mismatch: {e}")) + } + StoreError::NotFound(_) => { + CasError::ManifestReadFailed(format!("pointer names missing manifest: {e}")) + } + other => CasError::Backend(other), + })?; + let parent = Manifest::from_bytes(&bytes) + .map_err(|e| CasError::ManifestReadFailed(format!("parse manifest: {e}")))?; + + Ok(ParentState { + if_match: Some(etag), + parent_digest: Some(digest), + parent, + }) +} + +/// Read `refs/*` + symbolic-HEAD from the workspace. +/// +/// HEAD is the symref target (e.g. `refs/heads/main`), unprefixed — the +/// manifest stores published ref state, not protocol formatting. Detached +/// HEAD or no HEAD yields an empty string. +async fn snapshot_workspace_state( + repo_path: &Path, +) -> Result<(BTreeMap, String), CasError> { + let mut refs_cmd = Command::new("git"); + refs_cmd + .args(["for-each-ref", "--format=%(refname) %(objectname)"]) + .current_dir(repo_path); + super::transport::harden_git_env(&mut refs_cmd); + let refs_out = refs_cmd + .output() + .await + .map_err(|e| CasError::PackCapture(format!("for-each-ref spawn: {e}")))?; + if !refs_out.status.success() { + return Err(CasError::PackCapture(format!( + "for-each-ref failed: status={:?}", + refs_out.status.code() + ))); + } + let mut refs = BTreeMap::new(); + for line in std::str::from_utf8(&refs_out.stdout) + .unwrap_or_default() + .lines() + { + let mut parts = line.splitn(2, ' '); + let (Some(name), Some(oid)) = (parts.next(), parts.next()) else { + continue; + }; + if oid.len() != 40 || !oid.chars().all(|c| c.is_ascii_hexdigit()) { + warn!(ref_name = %name, oid = %oid, "for-each-ref returned malformed oid; skipping"); + continue; + } + refs.insert(name.to_string(), oid.to_string()); + } + + let mut head_cmd = Command::new("git"); + head_cmd + .args(["symbolic-ref", "--quiet", "HEAD"]) + .current_dir(repo_path); + super::transport::harden_git_env(&mut head_cmd); + let head_out = head_cmd + .output() + .await + .map_err(|e| CasError::PackCapture(format!("symbolic-ref spawn: {e}")))?; + let head = if head_out.status.success() { + String::from_utf8_lossy(&head_out.stdout).trim().to_string() + } else { + String::new() + }; + + Ok((refs, head)) +} + +/// Capture the objects this push introduced as a single pack. +/// +/// Runs `git pack-objects --revs --stdout` reading rev-spec lines from +/// stdin: each `oid` line includes that oid's reachable closure, and each +/// `^oid` line excludes one. We feed `refs_after`'s tips with positive +/// lines and `refs_before`'s tips with `^` lines — the resulting pack is +/// exactly the objects in the symmetric difference's "ahead" half, i.e. +/// the new objects this push needs to durably name. +/// +/// Returns `None` in either of two cases, both legitimate: +/// 1. `refs_after` is empty — a delete-all push (no positive tips to feed +/// pack-objects; nothing to cover). +/// 2. `pack-objects` produces empty stdout — refs-only push that re-points +/// or deletes a ref at an already-stored oid (e.g. `git push :branch`, +/// or `git push origin existing-sha:newname`). +/// +/// In both cases the caller still publishes a new manifest — the ref +/// change is real even if the pack set didn't grow. +async fn capture_pack( + repo_path: &Path, + refs_before: &BTreeMap, + refs_after: &BTreeMap, +) -> Result>, CasError> { + // Build rev-spec stdin: positive new tips, negative old tips. + // Deduplicate against the same-oid case — no point feeding `X ^X`. + let mut stdin_lines = String::new(); + let mut any_positive = false; + for oid in refs_after.values() { + stdin_lines.push_str(oid); + stdin_lines.push('\n'); + any_positive = true; + } + if !any_positive { + // No refs to cover — first-push case where the client deleted + // everything before any tip was set (degenerate, but handle). + return Ok(None); + } + for oid in refs_before.values() { + stdin_lines.push('^'); + stdin_lines.push_str(oid); + stdin_lines.push('\n'); + } + + let mut cmd = Command::new("git"); + cmd.args(["pack-objects", "--revs", "--stdout", "-q"]) + .current_dir(repo_path) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + super::transport::harden_git_env(&mut cmd); + let mut child = cmd + .spawn() + .map_err(|e| CasError::PackCapture(format!("pack-objects spawn: {e}")))?; + { + let mut stdin = child + .stdin + .take() + .ok_or_else(|| CasError::PackCapture("pack-objects stdin closed".into()))?; + stdin + .write_all(stdin_lines.as_bytes()) + .await + .map_err(|e| CasError::PackCapture(format!("pack-objects stdin write: {e}")))?; + // Drop closes stdin → EOF. + } + let out = child + .wait_with_output() + .await + .map_err(|e| CasError::PackCapture(format!("pack-objects wait: {e}")))?; + if !out.status.success() { + return Err(CasError::PackCapture(format!( + "pack-objects failed: status={:?} stderr={}", + out.status.code(), + String::from_utf8_lossy(&out.stderr) + ))); + } + if out.stdout.is_empty() { + return Ok(None); + } + Ok(Some(out.stdout)) +} + +/// Compose `m_after` from the parent manifest and the new ref/pack state. +/// +/// Encodes `Inv_Closed` at the construction site: `m_after.packs ⊇ +/// m_after.parent.packs`. Sorts + dedups packs so canonical bytes are +/// stable across `parent + same_new_pack` regardless of insertion order. +/// +/// `parent_digest` is the 64-hex SHA-256 of the parent manifest's +/// canonical bytes — *not* the full `manifests/` key. Storing the +/// raw digest matches `Inv_RefDerivedFromParent` (parent = pointer.digest) +/// and lets readers reconstruct the chain by prefixing `manifests/` at +/// fetch time. +/// +/// Pure data; does not call `Manifest::validate()`. Validation lives at +/// the write seam in [`cas_publish`] so a future refactor that drops the +/// `validate()` call is visible as the absence of a `validate?` between +/// `compose_after` and `put_manifest`, not a hidden behavior change. +fn compose_after( + parent: &Manifest, + parent_digest: Option, + head: String, + refs: BTreeMap, + new_pack_key: Option, +) -> Manifest { + let mut packs = parent.packs.clone(); + if let Some(k) = new_pack_key { + if !packs.iter().any(|p| p == &k) { + packs.push(k); + } + } + packs.sort(); + packs.dedup(); + Manifest { + version: MANIFEST_VERSION, + head, + refs, + packs, + parent: parent_digest, + } +} + +/// Derive `manifests/` from a returned manifest key, surfacing the +/// hex digest the pointer body needs. +fn digest_from_manifest_key(key: &str) -> Result { + key.strip_prefix("manifests/") + .map(str::to_string) + .ok_or_else(|| { + CasError::Backend(StoreError::Backend(s3::error::S3Error::HttpFailWithBody( + 500, + format!("put_manifest returned non-standard key: {key}"), + ))) + }) +} + +/// The function the §Push step 2–7 protocol distills to. +/// +/// Concurrency: callable in parallel for the same `(owner, repo)`. The CAS +/// at step 7 is the *only* writer serialization (`Inv_NoFork`). No +/// advisory lock — adding one would hide exactly the interleavings the +/// model proves safe. +pub async fn cas_publish( + store: &GitStore, + repo_path: &Path, + owner: &str, + repo: &str, + refs_before: &BTreeMap, +) -> Result { + let pkey = pointer_key(owner, repo); + + // Step 3: read pointer + parent manifest. + let parent_state = read_parent(store, &pkey).await?; + + // Snapshot post-receive-pack state from disk. + let (refs_after, head_observed) = snapshot_workspace_state(repo_path).await?; + + // HEAD fallback: a bare repo serving pushes shouldn't have detached + // HEAD, but if `git symbolic-ref` failed (or returned empty), inherit + // the parent's HEAD rather than installing an empty one. `validate()` + // below rejects "empty after fallback" — that's the first-push + + // detached-HEAD case where the writer must declare a HEAD. + let head = if head_observed.is_empty() { + parent_state.parent.head.clone() + } else { + head_observed + }; + + // Capture new objects as a pack (steps 1–2). + let pack_bytes = capture_pack(repo_path, refs_before, &refs_after).await?; + let new_pack_key = if let Some(bytes) = pack_bytes { + debug!(bytes = bytes.len(), "captured push pack"); + Some(store.put_pack(&bytes).await?) + } else { + debug!("no new objects in push; manifest will reuse parent packs"); + None + }; + + // Compose m_after (step 5). + let m_after = compose_after( + &parent_state.parent, + parent_state.parent_digest.clone(), + head, + refs_after, + new_pack_key, + ); + + // **Pre-CAS validation** (Sami #2 / Max / Dawn): refuse to commit an + // un-clone-able manifest. `Manifest::validate` checks every refname + // against `is_safe_refname`, every oid against `is_hex_oid`, and + // requires a non-empty `head` — same predicates the hydrate path + // uses on read. Failure surfaces as `CasError::ManifestInvalid` + // (4xx-class: client/input rejected) so the caller never confuses + // it with `ManifestReadFailed` (5xx-class: parent corrupt). + m_after.validate()?; + + // Step 6: put_manifest. + let manifest_bytes = m_after.canonical_bytes()?; + let manifest_key = store.put_manifest(&manifest_bytes).await?; + let manifest_digest = digest_from_manifest_key(&manifest_key)?; + + // Step 7: CAS the pointer. + let precond = match &parent_state.if_match { + Some(e) => Precond::IfMatch(e.clone()), + None => Precond::IfNoneMatchStar, + }; + match store + .put_pointer(&pkey, manifest_digest.as_bytes(), precond) + .await? + { + CasOutcome::Won(_new_etag) => Ok(CasSuccess { + manifest: m_after, + manifest_key, + }), + CasOutcome::LostRace => { + // Surface a typed Conflict carrying the winner so the caller + // can reconcile the on-disk workspace without re-reading the + // pointer. We re-GET the pointer here on the slow path; a + // *third* writer may have landed between our 412 and this + // GET, in which case we surface that third winner — also + // correct (loser re-pushes against whatever's current). + let expected = parent_state + .if_match + .as_ref() + .map(|e| e.0.as_str()) + .unwrap_or(""); + warn!( + pointer = %pkey, + expected_etag = %expected, + attempted_manifest = %manifest_key, + "CAS lost race; resolving winner for reconcile" + ); + let (winner_manifest, winner_manifest_key) = + read_winner_after_conflict(store, &pkey).await?; + Err(CasError::Conflict { + winner_manifest, + winner_manifest_key, + }) + } + } +} + +/// Re-read the pointer after a `LostRace` and fetch the winner's manifest. +/// +/// Fail-closed at every step: if the pointer is now absent (a deletion +/// raced in — currently impossible under the protocol's no-delete rule, +/// but defensive), or the named manifest is corrupt/missing, return +/// `ManifestReadFailed` so the caller emits 5xx rather than pretending +/// reconciliation is possible. +async fn read_winner_after_conflict( + store: &GitStore, + pkey: &str, +) -> Result<(Box, String), CasError> { + let Some((_etag, body)) = store.get_pointer(pkey).await? else { + return Err(CasError::ManifestReadFailed( + "pointer vanished after LostRace (no-delete rule violated)".into(), + )); + }; + let digest = std::str::from_utf8(&body) + .map_err(|e| CasError::ManifestReadFailed(format!("winner pointer body not utf-8: {e}")))? + .trim() + .to_string(); + if digest.len() != 64 || !digest.chars().all(|c| c.is_ascii_hexdigit()) { + return Err(CasError::ManifestReadFailed(format!( + "winner pointer body is not a 64-char hex digest (got {} chars)", + digest.len() + ))); + } + let manifest_key = format!("manifests/{digest}"); + let bytes = store + .get_verified(&manifest_key, &digest) + .await + .map_err(|e| match e { + StoreError::DigestMismatch { .. } => { + CasError::ManifestReadFailed(format!("winner manifest digest mismatch: {e}")) + } + StoreError::NotFound(_) => { + CasError::ManifestReadFailed(format!("winner pointer names missing manifest: {e}")) + } + other => CasError::Backend(other), + })?; + let winner = Manifest::from_bytes(&bytes) + .map_err(|e| CasError::ManifestReadFailed(format!("parse winner manifest: {e}")))?; + Ok((Box::new(winner), manifest_key)) +} + +#[cfg(test)] +mod tests { + use super::*; + + // `pointer_key` is owned by `manifest.rs` and unit-tested there + // (one source of truth — Max/Sami's centralization point). + + #[test] + fn digest_from_key_strips_prefix() { + let k = format!("manifests/{}", "a".repeat(64)); + let d = digest_from_manifest_key(&k).unwrap(); + assert_eq!(d, "a".repeat(64)); + } + + #[test] + fn digest_from_key_rejects_unknown_prefix() { + assert!(digest_from_manifest_key("not/manifests/abc").is_err()); + } + + #[test] + fn compose_after_first_push() { + let parent = ParentState::fresh().parent; + let mut refs = BTreeMap::new(); + refs.insert("refs/heads/main".into(), "1".repeat(40)); + let m = compose_after( + &parent, + None, + "refs/heads/main".into(), + refs.clone(), + Some("packs/abc".into()), + ); + assert_eq!(m.version, MANIFEST_VERSION); + assert_eq!(m.head, "refs/heads/main"); + assert_eq!(m.refs, refs); + assert_eq!(m.packs, vec!["packs/abc".to_string()]); + assert_eq!(m.parent, None); + } + + /// 64-char hex parent digest — what `Manifest.parent` stores (the + /// canonical-bytes SHA-256 of the parent manifest, NOT the full + /// `manifests/` key). See `Inv_RefDerivedFromParent`. + fn parent_digest() -> String { + "a".repeat(64) + } + + #[test] + fn compose_after_covers_parent_packs() { + let mut parent = ParentState::fresh().parent; + parent.packs = vec!["packs/old1".into(), "packs/old2".into()]; + let m = compose_after( + &parent, + Some(parent_digest()), + "refs/heads/main".into(), + BTreeMap::new(), + Some("packs/new".into()), + ); + // Inv_Closed: child covers parent. + for p in &parent.packs { + assert!(m.packs.contains(p)); + } + assert!(m.packs.contains(&"packs/new".to_string())); + // Sorted. + let mut sorted = m.packs.clone(); + sorted.sort(); + assert_eq!(m.packs, sorted); + // Parent is the digest, not the full key (Inv_RefDerivedFromParent). + assert_eq!(m.parent, Some(parent_digest())); + assert_eq!(m.parent.as_ref().unwrap().len(), 64); + assert!(!m.parent.as_ref().unwrap().starts_with("manifests/")); + } + + #[test] + fn compose_after_no_new_pack_refs_only_push() { + let mut parent = ParentState::fresh().parent; + parent.packs = vec!["packs/x".into()]; + let m = compose_after( + &parent, + Some(parent_digest()), + "refs/heads/main".into(), + BTreeMap::new(), + None, + ); + assert_eq!(m.packs, vec!["packs/x".to_string()]); + } + + #[test] + fn compose_after_dedupes_pack_already_in_parent() { + let mut parent = ParentState::fresh().parent; + parent.packs = vec!["packs/x".into()]; + let m = compose_after( + &parent, + Some(parent_digest()), + "refs/heads/main".into(), + BTreeMap::new(), + Some("packs/x".into()), + ); + assert_eq!(m.packs, vec!["packs/x".to_string()]); + } + + /// `cas_publish` must invoke `Manifest::validate()` between + /// `compose_after` and `put_manifest`. The unit on `validate` lives in + /// `manifest.rs`; this test pins that the call site here actually + /// invokes it. A future refactor that drops the `validate?` line is + /// caught here, not at every subsequent un-clone-able read. + /// + /// We can't easily call `cas_publish` end-to-end without a `GitStore`, + /// so this exercises the exact chain `cas_publish` uses inline: + /// `compose_after(...)` → `validate()` → expected variant. + #[test] + fn validate_invoked_between_compose_and_put_manifest() { + let parent = ParentState::fresh().parent; + let mut refs = BTreeMap::new(); + // Unsafe refname: `..` traversal. + refs.insert("refs/heads/../escape".into(), "1".repeat(40)); + let m = compose_after( + &parent, + None, + "refs/heads/main".into(), + refs, + Some("packs/abc".into()), + ); + let manifest_err = m.validate().expect_err("unsafe refname must reject"); + match &manifest_err { + crate::api::git::manifest::ManifestError::UnsafeRefName(name) => { + assert!(name.contains("..")); + } + other => panic!("expected UnsafeRefName, got {other:?}"), + } + + // Same error converts through the `From` into the typed CasError + // variant `cas_publish` actually returns at the call site. + let cas_err: CasError = manifest_err.into(); + assert!(matches!(cas_err, CasError::ManifestInvalid(_))); + } + + /// First-push + empty HEAD must fail validation. `ParentState::fresh` + /// has empty `parent.head`, so the HEAD fallback in `cas_publish` + /// leaves `m_after.head = ""` if `git symbolic-ref` also failed. The + /// validator catches this pre-CAS rather than installing an + /// un-clone-able manifest. + #[test] + fn first_push_with_empty_head_rejected_by_validate() { + let parent = ParentState::fresh().parent; + let mut refs = BTreeMap::new(); + refs.insert("refs/heads/main".into(), "1".repeat(40)); + let m = compose_after( + &parent, + None, + String::new(), // empty HEAD — the fallback's worst case + refs, + Some("packs/abc".into()), + ); + assert!(matches!( + m.validate(), + Err(crate::api::git::manifest::ManifestError::EmptyHead) + )); + } +} diff --git a/crates/sprout-relay/src/api/git/mod.rs b/crates/sprout-relay/src/api/git/mod.rs index 5f373da14..730ca225f 100644 --- a/crates/sprout-relay/src/api/git/mod.rs +++ b/crates/sprout-relay/src/api/git/mod.rs @@ -22,6 +22,7 @@ use tower_http::limit::RequestBodyLimitLayer; use crate::state::AppState; +pub mod cas_publish; pub mod hook; pub mod hydrate; pub mod manifest; diff --git a/crates/sprout-relay/src/api/git/transport.rs b/crates/sprout-relay/src/api/git/transport.rs index 26571cd35..ef701182a 100644 --- a/crates/sprout-relay/src/api/git/transport.rs +++ b/crates/sprout-relay/src/api/git/transport.rs @@ -256,7 +256,7 @@ fn validate_repo_path( /// - `GIT_CONFIG_NOSYSTEM=1` — ignore system-wide gitconfig /// - `GIT_CONFIG_GLOBAL=/dev/null` — prevent reading global gitconfig /// - `HOME=/dev/null` — prevent reading ~/.gitconfig -fn harden_git_env(cmd: &mut Command) { +pub(crate) fn harden_git_env(cmd: &mut Command) { cmd.env_clear() .env("PATH", std::env::var("PATH").unwrap_or_default()) .env("GIT_HTTP_EXPORT_ALL", "1") From de160f9a6865fd255dfd5a9500719f6ad5be700f Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 12:20:33 -0400 Subject: [PATCH 16/24] refactor(git): structural Inv_RefDerivedFromParent via ParentState seam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hoists the pointer-load out of cas_publish so the workspace and the CAS predicate come from the same observed pointer state, closing Perci's "build on superseded state" hazard at the type system rather than at runtime. Concretely: - cas_publish::ParentState becomes pub, with a thin from_loaded(etag, digest, manifest) constructor. The pointer-reading helper read_parent is deleted; cas_publish no longer touches the pointer except for the CAS write at step 7. - cas_publish takes &ParentState instead of refs_before. The capture_pack "not" set is now parent_state.parent.refs (the refs the workspace was hydrated from), so the delta covers exactly the new objects, not whatever happened to be on disk. - hydrate.rs factors out two private helpers: load_pointer (pointer → (etag, digest, verified manifest) with fail-closed below-pointer semantics) and materialize_manifest (init bare → fetch+index packs → install refs+HEAD, phase-ordered). hydrate_for_read becomes a three-line wrapper. - New hydrate_for_write(store, owner, repo) -> (HydratedRepo, ParentState): the write-path entry point. Loads pointer + materializes workspace in one round-trip; first-push case returns (git init --bare + ParentState::fresh()), no manifest needed. Caller contract is now structural: cas_publish(.., &parent_state) CAS-es on parent_state.if_match, so a concurrent writer that advances the pointer between hydrate and CAS reliably surfaces as Conflict/HTTP 409. m_after.parent is *literally* the digest of the manifest the workspace was hydrated from — Inv_RefDerivedFromParent holds by construction, not by code review. 65 git tests still pass on relay lib. No transport changes yet — those land next. Co-authored-by: Tyler Longwell Co-authored-by: Quinn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- .../sprout-relay/src/api/git/cas_publish.rs | 112 +++++++++--------- crates/sprout-relay/src/api/git/hydrate.rs | 98 +++++++++++++-- 2 files changed, 143 insertions(+), 67 deletions(-) diff --git a/crates/sprout-relay/src/api/git/cas_publish.rs b/crates/sprout-relay/src/api/git/cas_publish.rs index e2e2b54dd..e4b2f28db 100644 --- a/crates/sprout-relay/src/api/git/cas_publish.rs +++ b/crates/sprout-relay/src/api/git/cas_publish.rs @@ -132,23 +132,39 @@ pub struct CasSuccess { } /// Resolved view of the pre-push pointer (§Push step 3 output). -struct ParentState { +/// +/// **The CAS write is predicated on `if_match`** — the caller must load +/// this *before* running receive-pack against the hydrated workspace, and +/// pass the same value into [`cas_publish`]. If the pointer advances +/// between load and CAS (a concurrent push wins), the CAS fails with +/// `LostRace`/`Conflict` and the loser re-pushes — that is the only safe +/// retry path (the loser's receive-pack output is derived against the +/// superseded parent, so reusing it would violate +/// `Inv_RefDerivedFromParent`). +/// +/// The structural seam this `ParentState` argument creates is what makes +/// `Inv_RefDerivedFromParent` mechanical: `m_after.parent` is *literally* +/// the digest of the manifest receive-pack ran against, not whatever +/// pointer happens to be live at CAS time. +#[derive(Debug, Clone)] +pub struct ParentState { /// ETag predicating the next CAS write. `None` only when the pointer - /// does not yet exist (first push to an empty repo). - if_match: Option, + /// does not yet exist (first push to an empty repo) — then the CAS + /// uses `If-None-Match: *`. + pub if_match: Option, /// The parent manifest's content-addressed *digest* (64-hex), not the /// full `manifests/` key. This lands in `Manifest.parent` and /// is what `Inv_RefDerivedFromParent` reasons over (parent = /// pointer.digest). Full key is a local fetch detail, derived as /// `format!("manifests/{}", digest)`. `None` only on first push. - parent_digest: Option, + pub parent_digest: Option, /// The parsed parent manifest. On first push, an empty manifest. - parent: Manifest, + pub parent: Manifest, } impl ParentState { /// State for a brand-new repo with no published manifest yet. - fn fresh() -> Self { + pub fn fresh() -> Self { Self { if_match: None, parent_digest: None, @@ -161,51 +177,22 @@ impl ParentState { }, } } -} -/// Read the current pointer, parse the manifest digest, fetch the manifest, -/// verify its digest — fail closed at every step. -async fn read_parent(store: &GitStore, pkey: &str) -> Result { - let pointer = store.get_pointer(pkey).await?; - let Some((etag, body)) = pointer else { - return Ok(ParentState::fresh()); - }; - - // Pointer body is the raw hex digest of the parent manifest. No JSON, - // no envelope — the pointer key already identifies the repo, the ETag - // carries the version, and the body just names the manifest. Trimmed - // because some S3 implementations have added newline noise historically. - let digest = std::str::from_utf8(&body) - .map_err(|e| CasError::ManifestReadFailed(format!("pointer body not utf-8: {e}")))? - .trim() - .to_string(); - if digest.len() != 64 || !digest.chars().all(|c| c.is_ascii_hexdigit()) { - return Err(CasError::ManifestReadFailed(format!( - "pointer body is not a 64-char hex digest (got {} chars)", - digest.len() - ))); + /// Build a `ParentState` from already-loaded pointer state. + /// + /// The hydrate layer reads the pointer + verified manifest as part of + /// materializing the workspace, then hands the same `(etag, digest, + /// manifest)` tuple back here. Centralizing the constructor in + /// `cas_publish` means there's one place where `ParentState` + /// invariants live; centralizing the I/O in `hydrate` means we read + /// the pointer once per push, not twice. + pub fn from_loaded(etag: ETag, digest: String, parent: Manifest) -> Self { + Self { + if_match: Some(etag), + parent_digest: Some(digest), + parent, + } } - let manifest_key = format!("manifests/{digest}"); - let bytes = store - .get_verified(&manifest_key, &digest) - .await - .map_err(|e| match e { - StoreError::DigestMismatch { .. } => { - CasError::ManifestReadFailed(format!("manifest digest mismatch: {e}")) - } - StoreError::NotFound(_) => { - CasError::ManifestReadFailed(format!("pointer names missing manifest: {e}")) - } - other => CasError::Backend(other), - })?; - let parent = Manifest::from_bytes(&bytes) - .map_err(|e| CasError::ManifestReadFailed(format!("parse manifest: {e}")))?; - - Ok(ParentState { - if_match: Some(etag), - parent_digest: Some(digest), - parent, - }) } /// Read `refs/*` + symbolic-HEAD from the workspace. @@ -401,6 +388,17 @@ fn digest_from_manifest_key(key: &str) -> Result { /// The function the §Push step 2–7 protocol distills to. /// +/// **Caller contract — `Inv_RefDerivedFromParent` is structural.** The +/// `parent_state` you pass in must be the same one the workspace was +/// hydrated from. Concretely: load `ParentState::load_or_fresh(store, +/// owner, repo)` → hydrate the workspace from `parent_state.parent` → +/// `install_hook` → run `receive-pack` against the workspace → call this +/// with the **same `parent_state`**. The CAS predicate is +/// `parent_state.if_match`, so a concurrent writer that advanced the +/// pointer between hydrate and CAS reliably surfaces as +/// `CasError::Conflict { winner_manifest, .. }` (412 → HTTP 409). The +/// loser re-pushes; the new push re-hydrates against the advanced state. +/// /// Concurrency: callable in parallel for the same `(owner, repo)`. The CAS /// at step 7 is the *only* writer serialization (`Inv_NoFork`). No /// advisory lock — adding one would hide exactly the interleavings the @@ -410,14 +408,13 @@ pub async fn cas_publish( repo_path: &Path, owner: &str, repo: &str, - refs_before: &BTreeMap, + parent_state: &ParentState, ) -> Result { let pkey = pointer_key(owner, repo); - // Step 3: read pointer + parent manifest. - let parent_state = read_parent(store, &pkey).await?; - - // Snapshot post-receive-pack state from disk. + // Snapshot post-receive-pack state from disk. `parent_state.parent.refs` + // are the refs the workspace was hydrated from — `pack-objects --revs` + // below uses them as the "negative" set to produce the delta pack. let (refs_after, head_observed) = snapshot_workspace_state(repo_path).await?; // HEAD fallback: a bare repo serving pushes shouldn't have detached @@ -431,8 +428,11 @@ pub async fn cas_publish( head_observed }; - // Capture new objects as a pack (steps 1–2). - let pack_bytes = capture_pack(repo_path, refs_before, &refs_after).await?; + // Capture new objects as a pack (steps 1–2). The "not" set is the + // parent manifest's refs — i.e. the set the workspace was hydrated + // against — so the delta covers exactly the objects this push + // introduced. + let pack_bytes = capture_pack(repo_path, &parent_state.parent.refs, &refs_after).await?; let new_pack_key = if let Some(bytes) = pack_bytes { debug!(bytes = bytes.len(), "captured push pack"); Some(store.put_pack(&bytes).await?) diff --git a/crates/sprout-relay/src/api/git/hydrate.rs b/crates/sprout-relay/src/api/git/hydrate.rs index 6211670fa..bf9eed25d 100644 --- a/crates/sprout-relay/src/api/git/hydrate.rs +++ b/crates/sprout-relay/src/api/git/hydrate.rs @@ -33,8 +33,9 @@ use futures_util::future::try_join_all; use tempfile::TempDir; use tokio::process::Command; +use super::cas_publish::ParentState; use super::manifest::{is_hex_oid, is_safe_refname, pointer_key, Manifest, ManifestError}; -use super::store::{GitStore, StoreError}; +use super::store::{ETag, GitStore, StoreError}; /// A bare repo hydrated to a temporary directory. /// @@ -88,26 +89,101 @@ pub async fn hydrate_for_read( owner: &str, repo: &str, ) -> Result, HydrateError> { - let pkey = pointer_key(owner, repo); + let Some((_etag, _digest, manifest)) = load_pointer(store, owner, repo).await? else { + return Ok(None); + }; + Ok(Some(materialize_manifest(store, &manifest).await?)) +} - // Step 1: pointer → manifest digest. - let (_etag, pointer_bytes) = match store.get_pointer(&pkey).await? { +/// Hydrate a bare repo for write (`receive-pack`) and return the +/// `ParentState` the workspace was hydrated from. +/// +/// The returned `ParentState` *must* be passed into +/// [`crate::api::git::cas_publish::cas_publish`] without re-reading the +/// pointer. The CAS predicate is `parent_state.if_match`, so a concurrent +/// writer that advances the pointer between this call and the CAS surfaces +/// reliably as `Conflict`/HTTP 409 — `Inv_RefDerivedFromParent` holds +/// because `m_after.parent` is *literally* the digest of the manifest the +/// workspace was hydrated from. +/// +/// First-push case (pointer absent): returns `(empty bare repo, +/// ParentState::fresh())`. The empty bare repo is a fresh `git init --bare` +/// with no refs and no objects; `receive-pack` will accept the first push +/// and create whatever refs the client sends, and `cas_publish` will CAS +/// the pointer with `If-None-Match: *`. +/// +/// Any below-pointer failure (manifest 404 under non-empty pointer, digest +/// mismatch, malformed pointer body) is a hard error — never silently +/// treated as "fresh repo," because that would let a corrupt pointer +/// install a brand-new history alongside the broken one. +pub async fn hydrate_for_write( + store: &GitStore, + owner: &str, + repo: &str, +) -> Result<(HydratedRepo, ParentState), HydrateError> { + match load_pointer(store, owner, repo).await? { + Some((etag, digest, manifest)) => { + let repo = materialize_manifest(store, &manifest).await?; + let parent = ParentState::from_loaded(etag, digest, manifest); + Ok((repo, parent)) + } + None => { + // First push: empty bare repo. No packs to fetch, no refs to + // install. `receive-pack` will accept whatever the client + // sends; `cas_publish` will use `If-None-Match: *`. + let tempdir = + TempDir::new().map_err(|e| HydrateError::Hydrate(format!("tempdir: {e}")))?; + let path = tempdir.path().to_path_buf(); + run_git(&path, &["init", "--bare", "--quiet"]).await?; + Ok(( + HydratedRepo { + _tempdir: tempdir, + path, + }, + ParentState::fresh(), + )) + } + } +} + +/// Resolve the pointer to its `(ETag, digest, verified Manifest)` triple. +/// +/// `Ok(None)` if the pointer is absent (caller decides 404 vs first-push +/// per call site). `Err(_)` on any below-pointer failure. +async fn load_pointer( + store: &GitStore, + owner: &str, + repo: &str, +) -> Result, HydrateError> { + let pkey = pointer_key(owner, repo); + let (etag, pointer_bytes) = match store.get_pointer(&pkey).await? { Some(p) => p, None => return Ok(None), }; let digest = std::str::from_utf8(&pointer_bytes) .map_err(|_| HydrateError::InvalidPointer)? - .trim(); + .trim() + .to_string(); if digest.len() != 64 || !digest.chars().all(|c| c.is_ascii_hexdigit()) { return Err(HydrateError::InvalidPointer); } - - // Step 2: manifest (digest-verified). let manifest_key = format!("manifests/{digest}"); - let manifest_bytes = store.get_verified(&manifest_key, digest).await?; + let manifest_bytes = store.get_verified(&manifest_key, &digest).await?; let manifest = Manifest::from_bytes(&manifest_bytes)?; + Ok(Some((etag, digest, manifest))) +} - // Step 3: fetch all packs in parallel, each digest-verified by its key. +/// Materialize a manifest into a fresh tempdir bare repo. +/// +/// Shared by `hydrate_for_read` and `hydrate_for_write`. Phase-ordered +/// (packs first + verified + indexed, refs/HEAD only after) so a failed +/// hydrate leaves no advertised refs — failure mode is "empty/no refs," +/// never "refs point at missing objects." +async fn materialize_manifest( + store: &GitStore, + manifest: &Manifest, +) -> Result { + // Fetch all packs in parallel, each digest-verified by its key. let pack_fetches = manifest.packs.iter().map(|key| async move { let digest = key .strip_prefix("packs/") @@ -176,10 +252,10 @@ pub async fn hydrate_for_read( .await .map_err(|e| HydrateError::Hydrate(format!("write HEAD: {e}")))?; - Ok(Some(HydratedRepo { + Ok(HydratedRepo { _tempdir: tempdir, path, - })) + }) } /// Run `git ` in `cwd`, fail on non-zero exit. From 645ad84418120d1bbe8a0188b939439842261935 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 11:28:36 -0400 Subject: [PATCH 17/24] test(git): gate object store and add live git e2e Co-authored-by: Mari <95cae996907d7cab9f5dbf43c0f53edeac6ab0b032a6feae4abfd784e467b3f5@users.noreply.sprout> Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/store.rs | 9 +- crates/sprout-relay/src/main.rs | 37 +++ crates/sprout-relay/src/state.rs | 12 + crates/sprout-test-client/tests/e2e_git.rs | 357 +++++++++++++++++++++ 4 files changed, 413 insertions(+), 2 deletions(-) create mode 100644 crates/sprout-test-client/tests/e2e_git.rs diff --git a/crates/sprout-relay/src/api/git/store.rs b/crates/sprout-relay/src/api/git/store.rs index e91451fa1..788616b57 100644 --- a/crates/sprout-relay/src/api/git/store.rs +++ b/crates/sprout-relay/src/api/git/store.rs @@ -24,6 +24,8 @@ #![allow(dead_code)] // wired in by the push path in a follow-up commit +use std::sync::Arc; + use bytes::Bytes; use s3::creds::Credentials; use s3::error::S3Error; @@ -136,8 +138,9 @@ impl From for StoreError { } /// Object-store client for git refs. +#[derive(Clone)] pub struct GitStore { - bucket: Box, + bucket: Arc, } impl GitStore { @@ -159,7 +162,9 @@ impl GitStore { let bucket = Bucket::new(bucket_name, region, creds) .map_err(StoreError::Backend)? .with_path_style(); - Ok(Self { bucket }) + Ok(Self { + bucket: Arc::from(bucket), + }) } /// Compute the hex SHA-256 of `bytes`. The content-addressed key. diff --git a/crates/sprout-relay/src/main.rs b/crates/sprout-relay/src/main.rs index b74c091c2..395b5f07d 100644 --- a/crates/sprout-relay/src/main.rs +++ b/crates/sprout-relay/src/main.rs @@ -225,6 +225,43 @@ async fn main() -> anyhow::Result<()> { ); let state = Arc::new(app_state); + // Git-on-object-storage: admit the configured S3/MinIO backend against the + // linearizable conditional-write axiom (A3) before serving git traffic. + // Failure is fatal: a backend that cannot satisfy pointer CAS invalidates + // the manifest-pointer protocol. This is a deployment gate, not a proof. + if std::env::var("SPROUT_GIT_CONFORMANCE_PROBE") + .map(|v| v != "false") + .unwrap_or(true) + { + let race_width = std::env::var("SPROUT_GIT_PROBE_WRITERS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(32); + let race_rounds = std::env::var("SPROUT_GIT_PROBE_ROUNDS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(3); + let cfg = sprout_relay::api::git::store::ProbeConfig { + race_width, + race_rounds, + }; + tracing::info!( + race_width, + race_rounds, + "running git object-store conformance probe (A3 gate)" + ); + let report = state + .git_store + .run_conformance_probe(cfg) + .await + .map_err(|e| anyhow::anyhow!("git conformance probe failed: {e}"))?; + tracing::info!( + race_width = report.race_width, + race_rounds = report.race_rounds, + "git object-store backend admitted: A3 conformance probe passed" + ); + } + // NIP-43: publish the initial membership list on startup so clients can // REQ kind:13534 immediately without waiting for the next membership change. if config.require_relay_membership { diff --git a/crates/sprout-relay/src/state.rs b/crates/sprout-relay/src/state.rs index 08f912eed..7706ff167 100644 --- a/crates/sprout-relay/src/state.rs +++ b/crates/sprout-relay/src/state.rs @@ -213,6 +213,10 @@ pub struct AppState { pub audit_tx: mpsc::Sender, /// Media storage client (S3/MinIO). pub media_storage: Arc, + /// Git object-store backend (content-addressed packs/manifests plus + /// CAS-guarded manifest pointer). This is the durable git source of truth; + /// see `api::git::store` and `docs/git-on-object-storage.md`. + pub git_store: crate::api::git::store::GitStore, /// Audio relay room manager — tracks active huddle audio rooms. pub audio_rooms: Arc, /// Set to `true` on SIGTERM — readiness probe returns 503. @@ -316,6 +320,13 @@ impl AppState { }); let git_max_concurrent_ops = config.git_max_concurrent_ops; + let git_store = crate::api::git::store::GitStore::new( + &config.media.s3_endpoint, + &config.media.s3_access_key, + &config.media.s3_secret_key, + &config.media.s3_bucket, + ) + .expect("media storage was already constructed with this S3 config"); let state = Self { config: Arc::new(config), db, @@ -355,6 +366,7 @@ impl AppState { search_index_tx, audit_tx, media_storage: Arc::new(media_storage), + git_store, audio_rooms: Arc::new(AudioRoomManager::new()), shutting_down: Arc::new(AtomicBool::new(false)), started_at: Instant::now(), diff --git a/crates/sprout-test-client/tests/e2e_git.rs b/crates/sprout-test-client/tests/e2e_git.rs new file mode 100644 index 000000000..ff1976a39 --- /dev/null +++ b/crates/sprout-test-client/tests/e2e_git.rs @@ -0,0 +1,357 @@ +//! End-to-end git-over-object-storage tests. +//! +//! Drives the real `git` binary (clone / push / fetch / force-push / tag, +//! plus a best-effort concurrent push race) against a running relay backed by +//! S3/MinIO, exercising the full manifest-pointer CAS commit path described in +//! `docs/git-on-object-storage.md`. +//! +//! Requires: relay at localhost:3000 with git + S3/MinIO configured, `git` on +//! PATH, and the `git-credential-nostr` helper built. All tests are `#[ignore]` +//! so they don't run in CI by default. +//! +//! # Running +//! +//! ```text +//! cargo build --release -p git-credential-nostr +//! GIT_CREDENTIAL_NOSTR_BIN=$PWD/target/release/git-credential-nostr \ +//! cargo test -p sprout-test-client --test e2e_git -- --ignored --nocapture +//! ``` + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use nostr::{EventBuilder, Keys, Kind, Tag}; + +fn relay_http_url() -> String { + std::env::var("RELAY_HTTP_URL").unwrap_or_else(|_| "http://localhost:3000".to_string()) +} + +/// Path to the compiled credential helper. Defaults to the workspace release +/// build; override with `GIT_CREDENTIAL_NOSTR_BIN`. +fn credential_helper() -> PathBuf { + if let Ok(p) = std::env::var("GIT_CREDENTIAL_NOSTR_BIN") { + return PathBuf::from(p); + } + // tests run from the crate dir; the workspace target is two levels up. + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.pop(); + p.pop(); + p.push("target/release/git-credential-nostr"); + p +} + +/// Submit a signed event to the relay's REST bridge (`POST /api/events`). +async fn post_event(event: &nostr::Event) { + let client = reqwest::Client::new(); + let resp = client + .post(format!("{}/events", relay_http_url())) + .header("X-Pubkey", event.pubkey.to_hex()) + .header("Content-Type", "application/json") + .body(serde_json::to_string(event).unwrap()) + .send() + .await + .expect("post event"); + assert!( + resp.status().is_success(), + "event rejected: {}", + resp.text().await.unwrap_or_default() + ); +} + +/// Run `git` with the Sprout credential helper and isolated config. +fn git_status(args: &[&str], cwd: &Path, owner_nsec: &str) -> std::process::Output { + let helper = credential_helper(); + Command::new("git") + .args([ + "-c", + "credential.useHttpPath=true", + "-c", + &format!("credential.helper={}", helper.display()), + "-c", + "commit.gpgsign=false", + "-c", + "tag.gpgsign=false", + "-c", + "user.name=E2E", + "-c", + "user.email=e2e@example.com", + ]) + .args(args) + .current_dir(cwd) + // Isolate from any machine/agent git config (signing, etc.). + .env("GIT_CONFIG_GLOBAL", "/dev/null") + .env("GIT_CONFIG_NOSYSTEM", "1") + .env_remove("GIT_CONFIG_COUNT") + .env("NOSTR_PRIVATE_KEY", owner_nsec) + .output() + .expect("spawn git") +} + +/// Run `git` with the Sprout credential helper and isolated config. Asserts the +/// command succeeds; returns stdout. +fn git(args: &[&str], cwd: &Path, owner_nsec: &str) -> String { + let out = git_status(args, cwd, owner_nsec); + assert!( + out.status.success(), + "git {:?} failed:\nstdout: {}\nstderr: {}", + args, + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + String::from_utf8_lossy(&out.stdout).into_owned() +} + +#[tokio::test] +#[ignore = "requires live relay + MinIO + git"] +async fn git_clone_push_fetch_force_roundtrip() { + use nostr::ToBech32; + + let owner = Keys::generate(); + let owner_hex = owner.public_key().to_hex(); + let owner_nsec = owner.secret_key().to_bech32().unwrap(); + let repo = format!("e2e-git-{}", std::process::id()); + + // Announce the repo (kind:30617) so the relay creates the bare repo + hook. + let announce = EventBuilder::new( + Kind::from(30617), + "", + vec![ + Tag::parse(&["d", &repo]).unwrap(), + Tag::parse(&["name", "e2e git repo"]).unwrap(), + ], + ) + .sign_with_keys(&owner) + .unwrap(); + post_event(&announce).await; + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + + let tmp = tempdir(); + let url = format!("{}/git/{}/{}", relay_http_url(), owner_hex, repo); + + // 1. Clone the empty repo. + git( + &["clone", "--quiet", &url, "clone1"], + tmp.path(), + &owner_nsec, + ); + let clone1 = tmp.path().join("clone1"); + assert!(clone1.exists(), "clone1 created"); + + // 2. Push an initial commit. + std::fs::write(clone1.join("README.md"), "hello\n").unwrap(); + git(&["add", "."], &clone1, &owner_nsec); + git( + &["commit", "--quiet", "-m", "initial"], + &clone1, + &owner_nsec, + ); + git(&["branch", "-M", "main"], &clone1, &owner_nsec); + git(&["push", "--quiet", "origin", "main"], &clone1, &owner_nsec); + let sha1 = git(&["rev-parse", "main"], &clone1, &owner_nsec) + .trim() + .to_string(); + + // 3. A fresh clone observes the pushed content and exact SHA. + git( + &["clone", "--quiet", &url, "clone2"], + tmp.path(), + &owner_nsec, + ); + let clone2 = tmp.path().join("clone2"); + assert_eq!( + std::fs::read_to_string(clone2.join("README.md")).unwrap(), + "hello\n", + "fresh clone sees pushed content" + ); + assert_eq!( + git(&["rev-parse", "main"], &clone2, &owner_nsec).trim(), + sha1, + "fresh clone main == pushed SHA" + ); + + // 4. Second commit, push, pull into the other clone. + std::fs::write(clone1.join("README.md"), "hello\nmore\n").unwrap(); + git( + &["commit", "--quiet", "-am", "second"], + &clone1, + &owner_nsec, + ); + git(&["push", "--quiet", "origin", "main"], &clone1, &owner_nsec); + let sha2 = git(&["rev-parse", "main"], &clone1, &owner_nsec) + .trim() + .to_string(); + git(&["pull", "--quiet", "origin", "main"], &clone2, &owner_nsec); + assert_eq!( + git(&["rev-parse", "main"], &clone2, &owner_nsec).trim(), + sha2, + "clone2 fetched second commit" + ); + + // 5. Force-push a rewritten history. + git(&["reset", "--quiet", "--hard", &sha1], &clone1, &owner_nsec); + std::fs::write(clone1.join("README.md"), "forced\n").unwrap(); + git( + &["commit", "--quiet", "-am", "forced"], + &clone1, + &owner_nsec, + ); + let sha_f = git(&["rev-parse", "main"], &clone1, &owner_nsec) + .trim() + .to_string(); + git( + &["push", "--quiet", "--force", "origin", "main"], + &clone1, + &owner_nsec, + ); + assert_ne!(sha_f, sha2); + + // 6. A new clone after the force-push gets the rewritten history. + git( + &["clone", "--quiet", &url, "clone3"], + tmp.path(), + &owner_nsec, + ); + assert_eq!( + std::fs::read_to_string(tmp.path().join("clone3/README.md")).unwrap(), + "forced\n", + "clone3 has force-pushed content" + ); + + // 7. Tag push survives the round-trip. + git(&["tag", "v1.0"], &clone1, &owner_nsec); + git(&["push", "--quiet", "origin", "v1.0"], &clone1, &owner_nsec); + git( + &["clone", "--quiet", &url, "clone4"], + tmp.path(), + &owner_nsec, + ); + let tags = git(&["tag"], &tmp.path().join("clone4"), &owner_nsec); + assert!(tags.contains("v1.0"), "tag v1.0 cloned back: {tags}"); +} + +#[tokio::test] +#[ignore = "requires live relay + MinIO + git"] +async fn git_concurrent_push_one_wins_and_repo_recovers() { + use nostr::ToBech32; + + let owner = Keys::generate(); + let owner_hex = owner.public_key().to_hex(); + let owner_nsec = owner.secret_key().to_bech32().unwrap(); + let repo = format!("e2e-git-concurrent-{}", std::process::id()); + + let announce = EventBuilder::new( + Kind::from(30617), + "", + vec![ + Tag::parse(&["d", &repo]).unwrap(), + Tag::parse(&["name", "e2e concurrent git repo"]).unwrap(), + ], + ) + .sign_with_keys(&owner) + .unwrap(); + post_event(&announce).await; + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + + let tmp = tempdir_named("sprout-e2e-git-concurrent"); + let url = format!("{}/git/{}/{}", relay_http_url(), owner_hex, repo); + + git(&["clone", "--quiet", &url, "seed"], tmp.path(), &owner_nsec); + let seed = tmp.path().join("seed"); + std::fs::write(seed.join("README.md"), "base\n").unwrap(); + git(&["add", "."], &seed, &owner_nsec); + git(&["commit", "--quiet", "-m", "base"], &seed, &owner_nsec); + git(&["branch", "-M", "main"], &seed, &owner_nsec); + git(&["push", "--quiet", "origin", "main"], &seed, &owner_nsec); + let base_sha = git(&["rev-parse", "main"], &seed, &owner_nsec) + .trim() + .to_string(); + + let contenders = 8usize; + for i in 0..contenders { + let dir = format!("c{i}"); + git(&["clone", "--quiet", &url, &dir], tmp.path(), &owner_nsec); + let worktree = tmp.path().join(&dir); + std::fs::write( + worktree.join(format!("file-{i}.txt")), + format!("winner? {i}\n"), + ) + .unwrap(); + git(&["add", "."], &worktree, &owner_nsec); + git( + &["commit", "--quiet", "-m", &format!("contender {i}")], + &worktree, + &owner_nsec, + ); + } + + let mut children = Vec::new(); + for i in 0..contenders { + let worktree = tmp.path().join(format!("c{i}")); + let owner_nsec = owner_nsec.clone(); + children.push(std::thread::spawn(move || { + git_status( + &["push", "--quiet", "origin", "main"], + &worktree, + &owner_nsec, + ) + })); + } + + let mut successes = 0usize; + let mut failures = 0usize; + for child in children { + let out = child.join().expect("push thread panicked"); + if out.status.success() { + successes += 1; + } else { + failures += 1; + } + } + assert_eq!(successes, 1, "exactly one concurrent push should win"); + assert_eq!(failures, contenders - 1, "the rest should lose cleanly"); + + git( + &["clone", "--quiet", &url, "after"], + tmp.path(), + &owner_nsec, + ); + let after = tmp.path().join("after"); + let after_sha = git(&["rev-parse", "main"], &after, &owner_nsec) + .trim() + .to_string(); + assert_ne!(after_sha, base_sha, "winner advanced main"); + let log = git( + &["log", "--oneline", "--decorate", "-1"], + &after, + &owner_nsec, + ); + assert!( + log.contains("contender"), + "published head is one contender: {log}" + ); +} + +// ── tiny tempdir (avoid an extra dep) ───────────────────────────────────────── + +struct TempDir(PathBuf); +impl TempDir { + fn path(&self) -> &Path { + &self.0 + } +} +impl Drop for TempDir { + fn drop(&mut self) { + let _ = std::fs::remove_dir_all(&self.0); + } +} +fn tempdir() -> TempDir { + tempdir_named("sprout-e2e-git") +} + +fn tempdir_named(prefix: &str) -> TempDir { + let mut p = std::env::temp_dir(); + p.push(format!("{prefix}-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&p); + std::fs::create_dir_all(&p).unwrap(); + TempDir(p) +} From 11085ba0bad9b45246b554e23e824f2152e440a1 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 12:29:58 -0400 Subject: [PATCH 18/24] feat(git/transport): all three handlers on hydrate + CAS, end to end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrites transport.rs to make the manifest-pointer protocol the only path that exists. The legacy persistent-bare-repo path is gone; every request hydrates an ephemeral tempdir from the published state and drops it on scope exit. Per Eva's "transport is yours, end to end" brief. ## What lands **Read path (info_refs, upload_pack):** validate_repo_id → hydrate_for_read → spawn git --stateless-rpc against repo.path() → build response. `Ok(None)` from hydrate (pointer absent) → 404; `Err` (any below-pointer failure: manifest 404, digest mismatch, pack 404, index-pack fail) → 5xx. Closes Max's read-fail-closed blocker — A1 detectability now holds end-to-end on the read side. **Push path (receive_pack):** validate_repo_id → hydrate_for_write → install_hook → run receive-pack against tempdir → finalize_push. PushContext binds the *same* ParentState the workspace was hydrated from, so the CAS predicate in cas_publish is structurally tied to the observed pointer — Inv_RefDerivedFromParent holds by construction, no re-reading of the pointer between hydrate and CAS. **Fence (finalize_push):** the unique constructor of a push 2xx. Calls cas_publish; on Won, derives kind:30618 from the *committed* manifest's refs/head via manifest_event::build_ref_state_event, fans out, *then* builds the response. On Conflict → 409 (tempdir drops on scope exit; no on-disk reconcile needed because there's no persistent disk to drift). On ManifestInvalid → 400 (client/input rejected pre- CAS). On other CasError → 500. **30618 only on real change:** compares parent_digest to the committed manifest digest; a true no-op push (refs/head identical to parent, no new objects) skips 30618 emission entirely. Pointer re-installation on no-op is cheap (one IfMatch PUT of the same value) and was deemed not worth a special-case bypass. ## What's gone - `git_repo_locks` on AppState + the per-repo tokio mutex. **Writer serialization is the CAS** (spec §Push, no advisory lock in v1). The mutex hid the contention Inv_NoFork proves safe and was wrong at multi-instance scale anyway. - `should_publish` predicate + `SnapshotError` + `snapshot_refs` + 6 unit tests on the old fence shape. The new fence is "the structural seam: only finalize_push builds a push 2xx, and only after cas_publish returns Ok." Skip semantics moved into the manifest-changed comparison in finalize_push. - `publish_ref_state` (90 LOC of inline kind:30618 building) — replaced by a 5-line call to Sami's `manifest_event::build_ref_state_event` over the committed manifest. Unit-pinned NIP-34 invariants beat reading refs from disk a second time. - `validate_repo_path` (path canonicalization, repo-root containment). No longer relevant — workspaces are tempdirs, not paths under a repo root. Owner/repo *name* validation stays as `validate_repo_id`; it's still load-bearing because owner/repo become object-store key components via `manifest::pointer_key`. - `run_git_service` + `run_git_subprocess` (`(owner, repo)`-coupled) replaced by single `run_git_at(path, service, body, extra_env)` taking a path directly. One shared subprocess runner for all three handlers; the read paths use it for `--advertise-refs`, push uses it for `--stateless-rpc receive-pack`. ## Caller contract preserved - Pre-receive hook contract is unchanged: install_hook writes the same script; hook env (SPROUT_HOOK_URL/SECRET/REPO_*/PUSHER_PUBKEY + GIT_CONFIG_COUNT/KEY/VALUE override of core.hooksPath) is the same per-push env vec, just keyed to the tempdir's hooks/ instead of a persistent path. Authz + FF detection is byte-identical to before. - Global git_semaphore stays — bounds in-flight subprocesses, distinct from the per-repo serialization the dropped mutex was conflating itself with. ## Tests 238 relay lib tests green (-6 from the dropped should_publish tests). clippy --tests -D warnings clean. fmt clean. Relay binary builds. sprout-test-client e2e_git compiles (Mari's live tests, ported via Mari's cherry-picked commit). The full live e2e roundtrip needs a running relay + MinIO; that's Eva's e2e run on the assembled branch per the convergence plan. Refs: - docs/git-on-object-storage.md §Push step 2–8, §Read, §Implementation Correspondence, §Mechanized Verification (Inv_NoFork, Inv_RefEffectApplied, Inv_RefDerivedFromParent, Inv_Closed). Co-authored-by: Tyler Longwell Co-authored-by: Quinn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- .../sprout-relay/src/api/git/cas_publish.rs | 24 +- crates/sprout-relay/src/api/git/transport.rs | 783 ++++++++---------- crates/sprout-relay/src/state.rs | 9 +- 3 files changed, 355 insertions(+), 461 deletions(-) diff --git a/crates/sprout-relay/src/api/git/cas_publish.rs b/crates/sprout-relay/src/api/git/cas_publish.rs index e4b2f28db..f1ab91c68 100644 --- a/crates/sprout-relay/src/api/git/cas_publish.rs +++ b/crates/sprout-relay/src/api/git/cas_publish.rs @@ -24,9 +24,13 @@ //! then derives kind:30618 against `m_after` (Sami's //! `manifest_event::build_ref_state_event`) and constructs the //! success response — the *fence* in §Push step 8. -//! - `LostRace` → return `CasError::Conflict` (→ HTTP 409). **No -//! retry.** The losing push's receive-pack output was derived against -//! the now-superseded parent; reusing it would violate +//! - `LostRace` → re-read the pointer to fetch the winner's manifest, +//! then return `CasError::Conflict { winner_manifest, +//! winner_manifest_key }` (→ HTTP 409). The winner payload is for +//! the caller's diagnostic + future cache; the loser's ephemeral +//! tempdir dies on scope exit, so there's no disk to reconcile. +//! **No retry.** The losing push's receive-pack output was derived +//! against the now-superseded parent; reusing it would violate //! `Inv_RefDerivedFromParent` (§Mechanized Verification). The client //! re-runs `git push`, which re-hydrates and re-runs receive-pack //! against the advanced state — that is the only safe retry, and @@ -390,13 +394,13 @@ fn digest_from_manifest_key(key: &str) -> Result { /// /// **Caller contract — `Inv_RefDerivedFromParent` is structural.** The /// `parent_state` you pass in must be the same one the workspace was -/// hydrated from. Concretely: load `ParentState::load_or_fresh(store, -/// owner, repo)` → hydrate the workspace from `parent_state.parent` → -/// `install_hook` → run `receive-pack` against the workspace → call this -/// with the **same `parent_state`**. The CAS predicate is -/// `parent_state.if_match`, so a concurrent writer that advanced the -/// pointer between hydrate and CAS reliably surfaces as -/// `CasError::Conflict { winner_manifest, .. }` (412 → HTTP 409). The +/// hydrated from. Concretely: `hydrate::hydrate_for_write(store, owner, +/// repo)` returns `(HydratedRepo, ParentState)` from a single pointer +/// observation → `install_hook(repo.path())` → run `receive-pack` +/// against the workspace → call this with the **same `parent_state`**. +/// The CAS predicate is `parent_state.if_match`, so a concurrent writer +/// that advanced the pointer between hydrate and CAS reliably surfaces +/// as `CasError::Conflict { winner_manifest, .. }` (412 → HTTP 409). The /// loser re-pushes; the new push re-hydrates against the advanced state. /// /// Concurrency: callable in parallel for the same `(owner, repo)`. The CAS diff --git a/crates/sprout-relay/src/api/git/transport.rs b/crates/sprout-relay/src/api/git/transport.rs index ef701182a..fa5423bc3 100644 --- a/crates/sprout-relay/src/api/git/transport.rs +++ b/crates/sprout-relay/src/api/git/transport.rs @@ -8,7 +8,7 @@ //! Auth: NIP-98 on all routes (clone + push). No public repos for v1. //! Transport: shells out to `git --stateless-rpc` with `env_clear()`. -use std::path::{Path, PathBuf}; +use std::path::Path; use std::sync::Arc; use axum::{ @@ -26,6 +26,10 @@ use tokio::process::Command; use tower_http::limit::RequestBodyLimitLayer; use tracing::{error, info, warn}; +use super::cas_publish::{cas_publish, CasError, ParentState}; +use super::hook::install_hook; +use super::hydrate::{hydrate_for_read, hydrate_for_write, HydrateError, HydratedRepo}; +use super::manifest_event::{build_ref_state_event, RefStateInputs}; use crate::state::AppState; // ── Timeouts ───────────────────────────────────────────────────────────────── @@ -187,21 +191,20 @@ impl axum::extract::FromRequestParts> for GitAuth { } } -// ── Path Validation ────────────────────────────────────────────────────────── +// ── Repo Id Validation ─────────────────────────────────────────────────────── -struct ValidatedRepoPath { - repo_path: PathBuf, -} - -/// Validate and resolve a git repo path from URL parameters. +/// Validate URL `(owner, repo)` parameters and return the canonical repo +/// id (= `repo` with any `.git` suffix stripped). /// -/// Security: allowlist characters, canonicalize, verify under repo root. +/// Security: allowlist characters on both owner (64 lower-hex pubkey) and +/// repo name (`[a-zA-Z0-9._-]{1,64}`, no leading dots, no `..`). The +/// filesystem-path canonicalization that the old persistent-repo +/// implementation needed is no longer relevant — git workspaces are +/// ephemeral tempdirs from `hydrate_for_{read,write}`, not paths under a +/// repo root — but the *name* validation stays because owner/repo are +/// still used as object-store key components via `manifest::pointer_key`. #[allow(clippy::result_large_err)] // Response is the natural error type for axum handlers -fn validate_repo_path( - owner: &str, - repo: &str, - git_repo_root: &Path, -) -> Result { +fn validate_repo_id<'a>(owner: &str, repo: &'a str) -> Result<&'a str, Response> { // Owner must be exactly 64 lowercase hex chars. if owner.len() != 64 || !owner @@ -226,26 +229,7 @@ fn validate_repo_path( return Err((StatusCode::BAD_REQUEST, "invalid repo name").into_response()); } - let repo_path = git_repo_root.join(owner).join(format!("{repo_name}.git")); - - // Path canonicalization: verify resolved path is under repo root. - // Fail closed: if the repo root doesn't exist, reject — the service can't operate safely. - let canonical_root = git_repo_root.canonicalize().map_err(|_| { - error!("git_repo_path does not exist or cannot be canonicalized"); - ( - StatusCode::INTERNAL_SERVER_ERROR, - "git service misconfigured", - ) - .into_response() - })?; - // Repo may not exist yet (404 handled later), but if it does, verify containment. - if let Ok(canonical_repo) = repo_path.canonicalize() { - if !canonical_repo.starts_with(&canonical_root) { - return Err((StatusCode::BAD_REQUEST, "path traversal detected").into_response()); - } - } - - Ok(ValidatedRepoPath { repo_path }) + Ok(repo_name) } /// Apply hardened environment to a git subprocess command. @@ -265,6 +249,38 @@ pub(crate) fn harden_git_env(cmd: &mut Command) { .env("HOME", "/dev/null"); } +/// Acquire the global git-subprocess semaphore permit, or respond 503. +/// +/// Bounds total in-flight git subprocesses across all routes. Returned +/// `OwnedSemaphorePermit` releases automatically on drop, so the caller +/// just binds it for the function scope. +#[allow(clippy::result_large_err)] +fn acquire_git_permit( + state: &Arc, +) -> Result { + Arc::clone(&state.git_semaphore) + .try_acquire_owned() + .map_err(|_| { + Response::builder() + .status(StatusCode::SERVICE_UNAVAILABLE) + .header("Retry-After", "5") + .body(Body::from("git service busy")) + .unwrap() + }) +} + +/// Convert a [`HydrateError`] to the HTTP response shape the read+write +/// paths share. Below-pointer failure ⇒ 5xx; pointer-absent is signalled +/// via `Ok(None)` from [`hydrate_for_read`] and never reaches this fn. +fn hydrate_error_to_response(owner: &str, repo: &str, err: HydrateError) -> Response { + error!(error = %err, owner = %owner, repo = %repo, "hydrate failed"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + "git backend hydration failed", + ) + .into_response() +} + // ── Route Handlers ─────────────────────────────────────────────────────────── #[derive(Deserialize)] @@ -283,6 +299,13 @@ pub struct GitRepoParams { /// `GET /git/{owner}/{repo}/info/refs?service={service}` /// /// Advertises refs for clone (git-upload-pack) or push (git-receive-pack). +/// +/// **Read fail-closed (Max's blocker):** pointer-absent → 404 (repo +/// never existed). *Any* below-pointer failure (manifest 404 under a +/// non-empty pointer, digest mismatch, pack 404, `index-pack` failure) +/// → 5xx via `HydrateError`. The legacy "leave disk as-is on hydrate +/// error" behavior is gone — A1 detectability now holds end-to-end on +/// the read side too. pub async fn info_refs( State(state): State>, _auth: GitAuth, @@ -294,29 +317,29 @@ pub async fn info_refs( "git-upload-pack" | "git-receive-pack" => &query.service, _ => return Err((StatusCode::BAD_REQUEST, "invalid service").into_response()), }; + let _repo_name = validate_repo_id(¶ms.owner, ¶ms.repo)?; - let validated = validate_repo_path(¶ms.owner, ¶ms.repo, &state.config.git_repo_path)?; - if !validated.repo_path.exists() { - return Err((StatusCode::NOT_FOUND, "repository not found").into_response()); - } + let _permit = acquire_git_permit(&state)?; - let _permit = state.git_semaphore.try_acquire().map_err(|_| { - Response::builder() - .status(StatusCode::SERVICE_UNAVAILABLE) - .header("Retry-After", "5") - .body(Body::from("git service busy")) - .unwrap() - })?; + // Hydrate the published state into an ephemeral bare repo. `Ok(None)` + // = pointer absent = repo never existed → 404. `Err(_)` = below-pointer + // failure → 5xx. + let repo = match hydrate_for_read(&state.git_store, ¶ms.owner, ¶ms.repo).await { + Ok(Some(repo)) => repo, + Ok(None) => return Err((StatusCode::NOT_FOUND, "repository not found").into_response()), + Err(e) => return Err(hydrate_error_to_response(¶ms.owner, ¶ms.repo, e)), + }; - let mut cmd = Command::new("git"); // Git's smart HTTP protocol uses service names like "git-upload-pack" and // "git-receive-pack", but the actual git subcommands are "upload-pack" and // "receive-pack" (without the "git-" prefix). let git_subcmd = service.strip_prefix("git-").unwrap_or(service.as_str()); + + let mut cmd = Command::new("git"); cmd.arg(git_subcmd) .arg("--stateless-rpc") .arg("--advertise-refs") - .arg(&validated.repo_path) + .arg(repo.path()) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .kill_on_drop(true); @@ -328,8 +351,6 @@ pub async fn info_refs( })?; // kill_on_drop requires a Child handle — .output() doesn't expose one. - // Spawn first, then wait under a timeout; on timeout the Child is dropped - // and kill_on_drop terminates the subprocess. let output = tokio::time::timeout(INFO_REFS_TIMEOUT, child.wait_with_output()) .await .map_err(|_| { @@ -349,6 +370,10 @@ pub async fn info_refs( error!(stderr = %stderr, "git --advertise-refs failed"); return Err((StatusCode::INTERNAL_SERVER_ERROR, "git error").into_response()); } + // `repo` (the tempdir) must live until *after* the subprocess has read + // its objects. Holding it until here is the structural lifetime that + // guarantees that. + drop(repo); // Build pkt-line response: service header + flush + git output. let svc_line = format!("# service={service}\n"); @@ -370,89 +395,102 @@ pub async fn info_refs( /// `POST /git/{owner}/{repo}/git-upload-pack` /// /// Handles clone/fetch — client sends wants/haves, server sends pack data. +/// +/// Reads from a tempdir bare repo hydrated from the published manifest; +/// the tempdir lives only for the duration of this request. pub async fn upload_pack( State(state): State>, _auth: GitAuth, AxumPath(params): AxumPath, body: Body, ) -> Result { - run_git_service(&state, ¶ms.owner, ¶ms.repo, "upload-pack", body).await + let _ = validate_repo_id(¶ms.owner, ¶ms.repo)?; + let _permit = acquire_git_permit(&state)?; + + let repo = match hydrate_for_read(&state.git_store, ¶ms.owner, ¶ms.repo).await { + Ok(Some(repo)) => repo, + Ok(None) => return Err((StatusCode::NOT_FOUND, "repository not found").into_response()), + Err(e) => return Err(hydrate_error_to_response(¶ms.owner, ¶ms.repo, e)), + }; + + let output = run_git_at(repo.path(), "upload-pack", body, &[]).await?; + drop(repo); + Ok(build_git_response("upload-pack", output)) } /// `POST /git/{owner}/{repo}/git-receive-pack` /// /// Handles push — client sends ref updates + pack data. +/// /// Authorization: NIP-98 authenticates the pusher. The pre-receive hook -/// calls back to the internal policy endpoint for ref-level authorization -/// (channel role + protection rules). Any authenticated user can attempt a push; -/// the hook enforces the actual permissions. +/// installed into the hydrated tempdir calls back to the internal policy +/// endpoint for ref-level authorization (channel role + protection rules +/// from kind:30617). +/// +/// **Push flow (spec §Push steps 1–8):** +/// 1. Validate ids; acquire global git permit (bounds concurrent +/// subprocesses; **no per-repo lock** — writer serialization is the +/// pointer CAS, per spec). +/// 2. `hydrate_for_write` → `(HydratedRepo, ParentState)`. The +/// `ParentState` is the *same* observed pointer state the workspace +/// was hydrated from; it's the CAS predicate at step 7 below, which +/// is what makes `Inv_RefDerivedFromParent` structural rather than a +/// code-review property. +/// 3. `install_hook(repo.path())` — drop the pre-receive script + chmod. +/// Same script, same env contract, same policy callback as today; +/// only the on-disk path is ephemeral. +/// 4. Run `receive-pack --stateless-rpc` against the tempdir. The hook +/// enforces fast-forward + branch protection in-process. +/// 5. `finalize_push(PushContext { pack, parent_state, repo, ... })` is +/// the only path that builds a push `Response`. It calls +/// `cas_publish` (§Push steps 2–7) and emits kind:30618 on `Won`, +/// *only then* builds the 2xx. pub async fn receive_pack( State(state): State>, auth: GitAuth, AxumPath(params): AxumPath, body: Body, ) -> Result { + let repo_name = validate_repo_id(¶ms.owner, ¶ms.repo)?; let pusher_hex = hex::encode(auth.pubkey.serialize()); - - // Per-repo lock: prevent concurrent pushes to the same bare repo. - // git receive-pack is not safe for concurrent access. - let validated = validate_repo_path(¶ms.owner, ¶ms.repo, &state.config.git_repo_path)?; - let repo_lock = state - .git_repo_locks - .entry(validated.repo_path.clone()) - .or_insert_with(|| Arc::new(tokio::sync::Mutex::new(()))) - .clone(); - let _repo_guard = repo_lock.lock().await; - - // SECURITY: Verify pre-receive hook is a regular file, executable, and not a symlink. - // If the hook is missing, non-executable, or a symlink (potential tampering), - // deny the push rather than allowing it without permission checks. - let hook_path = validated.repo_path.join("hooks").join("pre-receive"); - { - let hook_ok = { - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - // Use symlink_metadata to detect symlinks (doesn't follow them). - std::fs::symlink_metadata(&hook_path) - .map(|m| { - m.file_type().is_file() // Regular file, not symlink - && m.permissions().mode() & 0o111 != 0 // Executable - }) - .unwrap_or(false) - } - #[cfg(not(unix))] - { - hook_path.is_file() - } - }; - if !hook_ok { - warn!( - repo = %params.repo, - hook = %hook_path.display(), - "push denied: pre-receive hook missing, not executable, or symlink" - ); - return Err(( - StatusCode::INTERNAL_SERVER_ERROR, - "push denied: repository permission hook not installed", - ) - .into_response()); - } - } - - // Resolve repo name (strip .git suffix if present). - let repo_name = params.repo.strip_suffix(".git").unwrap_or(¶ms.repo); + let _permit = acquire_git_permit(&state)?; + + // **No per-repo advisory lock — by design.** Writer serialization is + // the pointer CAS at `cas_publish` step 7 (`Inv_NoFork` proves this + // sufficient). The previous code held a per-repo `tokio::Mutex`, but + // that only spanned one process; once we run >1 relay instance + // (which is the point of "no local stateful disk"), it spans nothing + // and CAS is the only serialization that holds. The named tradeoff: + // two concurrent same-repo pushes each hydrate + run receive-pack, + // and the loser's CPU/IO is thrown away on `Conflict`. **Accepted + // for v1** — same-ref contention is rare, and a cross-instance lock + // would be a distributed-lock service we explicitly don't want. + // If contention shows up in metrics, the fix is a short local + // best-effort lock as a *latency optimization*, never a correctness + // dependency. (Eva's call, on record in #proj-git-on-s3 with the + // ParentState seam review.) + + // Hydrate parent state + workspace in one round-trip. ParentState + // travels with the workspace into finalize_push so the CAS predicates + // on the same pointer ETag the workspace was hydrated from. + let (repo, parent_state) = hydrate_for_write(&state.git_store, ¶ms.owner, ¶ms.repo) + .await + .map_err(|e| hydrate_error_to_response(¶ms.owner, ¶ms.repo, e))?; + + // Install the pre-receive hook into the ephemeral workspace. The + // hook script is fixed per-deployment; per-push state (callback URL, + // HMAC secret, pusher pubkey) rides in env at exec time. + install_hook(repo.path()).await.map_err(|e| { + error!(error = %e, "install pre-receive hook into hydrated workspace"); + (StatusCode::INTERNAL_SERVER_ERROR, "git hook install failed").into_response() + })?; // Build hook env vars for the pre-receive hook. - // The hook uses these to call back to the internal policy endpoint. let hook_url = format!( "http://127.0.0.1:{}/internal/git/policy", state.config.bind_addr.port() ); - // SECURITY: Force core.hooksPath via env to prevent repo-local config from - // overriding the hook directory. Without this, a malicious repo config could - // set core.hooksPath=/dev/null to bypass the pre-receive hook entirely. - let hooks_dir = validated.repo_path.join("hooks").display().to_string(); + let hooks_dir = repo.path().join("hooks").display().to_string(); let hook_env = vec![ ("SPROUT_HOOK_URL", hook_url), ( @@ -462,114 +500,71 @@ pub async fn receive_pack( ("SPROUT_REPO_ID", repo_name.to_string()), ("SPROUT_REPO_OWNER", params.owner.clone()), ("SPROUT_PUSHER_PUBKEY", pusher_hex.clone()), - // Override any repo-local core.hooksPath setting. + // Override any repo-local core.hooksPath setting; defense in + // depth even though the hydrated workspace has no inherited + // config. ("GIT_CONFIG_COUNT", "1".to_string()), ("GIT_CONFIG_KEY_0", "core.hooksPath".to_string()), ("GIT_CONFIG_VALUE_0", hooks_dir), ]; - // Snapshot refs before push — used by the post-push fence to detect - // whether anything actually changed. - let refs_before = snapshot_refs(&validated.repo_path).await; - - // Run receive-pack and obtain the *owned* subprocess output. Crucially we - // do NOT build a `Response` here — that work is delegated to - // `finalize_push`, which is the sole site that converts a push's - // `PackOutput` into a 2xx response (see the type docs on `PackOutput`). - let pack = run_git_subprocess( - &state, - ¶ms.owner, - ¶ms.repo, - "receive-pack", - body, - &hook_env, - ) - .await?; + // Run receive-pack against the tempdir. Returns the *owned* subprocess + // output (PackOutput) — crucially NOT a Response, so the post-push + // fence in finalize_push can sequence the CAS before any 2xx exists. + let pack = run_git_at(repo.path(), "receive-pack", body, &hook_env).await?; let ctx = PushContext { pack, - refs_before, + parent_state, owner: params.owner.clone(), repo: params.repo.clone(), + repo_id: repo_name.to_string(), pusher: auth.pubkey, - repo_path: validated.repo_path.clone(), + repo_handle: repo, }; - Ok(finalize_push(state, ctx).await) + Ok(finalize_push(&state, ctx).await) } +// ── Subprocess Runner ──────────────────────────────────────────────────────── + /// Buffered output of a `git --stateless-rpc` subprocess. /// -/// The handler holds this as an owned value between subprocess completion and -/// response construction — this is the *structural seam* the post-push fence -/// relies on (see §Implementation Correspondence in -/// `docs/git-on-object-storage.md`): nothing reaches the client until the -/// handler has decided to build a `Response` from these bytes. +/// The handler holds this as an owned value between subprocess completion +/// and response construction — this is the *structural seam* the post-push +/// fence relies on (see §Implementation Correspondence in +/// `docs/git-on-object-storage.md`): nothing reaches the client until +/// [`finalize_push`] has decided to build a `Response` from these bytes. pub(crate) struct PackOutput { pub stdout: Vec, } -/// Shared git service runner — spawns subprocess and builds the canonical -/// `application/x-git-{service}-result` response. Used for `info/refs` and -/// `upload-pack` (read paths, no post-publish fence). +/// Spawn a `git --stateless-rpc ` subprocess against the given +/// path, stream the request body to stdin, and return the buffered +/// stdout/stderr/exit status as a [`PackOutput`]. /// -/// Push (`receive-pack`) does **not** use this: it calls -/// [`run_git_subprocess`] directly to obtain a [`PackOutput`], then dispatches -/// to [`finalize_push`] which is the sole site that constructs a 2xx push -/// response from that buffer. -async fn run_git_service( - state: &Arc, - owner: &str, - repo: &str, - service: &str, - body: Body, -) -> Result { - let output = run_git_subprocess(state, owner, repo, service, body, &[]).await?; - Ok(build_git_response(service, output)) -} - -/// Spawn a `git --stateless-rpc ` subprocess, stream the request body -/// to stdin, and return the buffered stdout/stderr/exit status as a -/// [`PackOutput`]. -/// -/// Critically returns the **owned** subprocess output rather than a `Response`, -/// so callers can sequence post-subprocess work (e.g. the push fence) before -/// any byte reaches the client. -async fn run_git_subprocess( - state: &Arc, - owner: &str, - repo: &str, +/// Critically returns the **owned** subprocess output rather than a +/// `Response`, so callers can sequence post-subprocess work (the push +/// fence) before any byte reaches the client. +async fn run_git_at( + repo_path: &Path, service: &str, body: Body, extra_env: &[(&str, String)], ) -> Result { - let validated = validate_repo_path(owner, repo, &state.config.git_repo_path)?; - if !validated.repo_path.exists() { - return Err((StatusCode::NOT_FOUND, "repository not found").into_response()); - } - - let _permit = state.git_semaphore.try_acquire().map_err(|_| { - Response::builder() - .status(StatusCode::SERVICE_UNAVAILABLE) - .header("Retry-After", "5") - .body(Body::from("git service busy")) - .unwrap() - })?; - let mut cmd = Command::new("git"); cmd.arg(service) .arg("--stateless-rpc") - .arg(&validated.repo_path) + .arg(repo_path) .stdin(std::process::Stdio::piped()) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .kill_on_drop(true); harden_git_env(&mut cmd); - // Pass extra env vars (e.g., hook callback URL and HMAC secret). for (key, value) in extra_env { cmd.env(key, value); } let mut child = cmd.spawn().map_err(|e| { - error!(error = %e, "git subprocess failed to spawn"); + error!(error = %e, service = %service, "git subprocess failed to spawn"); (StatusCode::INTERNAL_SERVER_ERROR, "git error").into_response() })?; @@ -591,16 +586,10 @@ async fn run_git_subprocess( Err(_) => break, } } - drop(stdin); // Close stdin → EOF for git. + drop(stdin); // close stdin → EOF for git }); - // Grab an abort handle before moving body_task into the timeout block. - // On timeout, we abort it explicitly — dropping a JoinHandle only detaches - // the task (it keeps running). A stalled client could otherwise keep the - // spawned task alive indefinitely waiting on stream.next().await. let body_abort = body_task.abort_handle(); - // Timeout covers both body streaming and subprocess completion. - // On timeout: child is killed via kill_on_drop, body_task via abort_handle. let timeout_result = tokio::time::timeout(PACK_OPS_TIMEOUT, async { let _ = body_task.await; child.wait_with_output().await @@ -614,7 +603,7 @@ async fn run_git_subprocess( return Err((StatusCode::GATEWAY_TIMEOUT, "git operation timed out").into_response()); } Ok(Err(e)) => { - error!(error = %e, "git subprocess failed"); + error!(error = %e, service = %service, "git subprocess failed"); return Err((StatusCode::INTERNAL_SERVER_ERROR, "git error").into_response()); } Ok(Ok(out)) => out, @@ -631,9 +620,10 @@ async fn run_git_subprocess( }) } -/// Build the canonical `application/x-git-{service}-result` response from a -/// completed subprocess. For the push path this is **only** reached via -/// [`finalize_push`]; for read paths it is reached via [`run_git_service`]. +/// Build the canonical `application/x-git-{service}-result` response from +/// a completed subprocess. For the push path this is **only** reached via +/// [`finalize_push`], which is the unique constructor of a push 2xx — the +/// structural fence. fn build_git_response(service: &str, output: PackOutput) -> Response { let content_type = format!("application/x-git-{service}-result"); Response::builder() @@ -644,218 +634,187 @@ fn build_git_response(service: &str, output: PackOutput) -> Response { .unwrap() } -// ── Post-Push Event Publishing ─────────────────────────────────────────────── - -/// Failure mode for a refs snapshot. -/// -/// Both variants are treated identically by [`finalize_push`]: any `Err` -/// collapses to the publish branch. Distinguished only for observability. -#[derive(Debug)] -pub(crate) enum SnapshotError { - /// `git for-each-ref` failed to spawn or exited non-zero. - CommandFailed, -} - -/// Quick snapshot of current refs — used to detect whether a push changed anything. -/// -/// Returns the raw `git for-each-ref` output as a string. Comparison is by -/// string equality — cheap and sufficient (same refs + same SHAs = same string). -/// -/// **Fallible by design.** An earlier version returned `""` on error, which -/// hid a silent-skip bug: if *both* the pre- and post-push snapshots failed, -/// the two empty strings compared equal and the handler skipped publish on -/// a push that did change refs. [`finalize_push`] now matches on -/// `Result` and treats any `Err` as "assume changed → -/// publish fires," closing the double-failure hole. See -/// `docs/git-on-object-storage.md` §Implementation Correspondence. -async fn snapshot_refs(repo_path: &std::path::Path) -> Result { - let mut cmd = Command::new("git"); - cmd.args(["for-each-ref", "--format=%(refname) %(objectname)"]) - .current_dir(repo_path); - harden_git_env(&mut cmd); - match cmd.output().await { - Ok(output) if output.status.success() => { - Ok(String::from_utf8_lossy(&output.stdout).into_owned()) - } - _ => Err(SnapshotError::CommandFailed), - } -} +// ── Post-Push Fence ────────────────────────────────────────────────────────── /// Per-push state captured between subprocess completion and response -/// construction. This is the single argument shape `finalize_push` consumes — -/// constructing a `PushContext` is the only path from a push subprocess to a -/// 2xx push response. +/// construction. Constructing a `PushContext` is the only path from a +/// push subprocess to a 2xx push response (see [`finalize_push`]) — the +/// structural fence (spec Theorem 1). pub(crate) struct PushContext { pub pack: PackOutput, - pub refs_before: Result, + /// Parent pointer state observed at hydrate time. The CAS in + /// `cas_publish` predicates on `parent_state.if_match`, so a + /// concurrent writer that advanced the pointer between hydrate and + /// CAS surfaces as `Conflict`/HTTP 409 — `Inv_RefDerivedFromParent` + /// is structural, not a code-review property. + pub parent_state: ParentState, pub owner: String, + /// Raw URL repo segment (may include `.git`). pub repo: String, + /// Stripped repo id (= `repo` with any `.git` suffix removed). Used + /// as the `d` tag on kind:30618 — must match the kind:30617 author's + /// `d` exactly. + pub repo_id: String, pub pusher: nostr::PublicKey, - pub repo_path: PathBuf, + /// The hydrated workspace handle. Held until response construction + /// (which happens *after* `cas_publish` returns) so the tempdir + /// outlives the receive-pack subprocess and the CAS publish. + pub repo_handle: HydratedRepo, } -/// Decide whether the post-push fence should publish based on the pre- and -/// post-push ref snapshots. +/// Finalize a push request: CAS-commit the new state into the object +/// store, derive kind:30618 from the committed manifest, and only then +/// build the success response. /// -/// Deny-by-default: publish unless we have **both** successful snapshots that -/// compare equal (i.e. a true no-op push). Any `Err` on either side — or any -/// difference between the two — falls through to publish. -/// -/// This is the function whose previous string-based predecessor had the -/// silent-skip bug: when both snapshots failed, both became `""`, they -/// "matched," and publish was skipped on a real push. Now both-`Err` is -/// structurally outside the skip arm. -pub(crate) fn should_publish( - before: &Result, - after: &Result, -) -> bool { - !matches!((before, after), (Ok(b), Ok(a)) if b == a) -} - -/// Finalize a push request: take a snapshot of the post-push ref state, decide -/// whether anything changed, conditionally publish kind:30618 inline, and only -/// then construct the success response. -/// -/// **The fence (Theorem 1 in `docs/git-on-object-storage.md`):** for a push -/// that changed refs (or where either ref snapshot failed), the `Response` is -/// not constructed until `publish_ref_state` has been awaited. No-op pushes — -/// where the before/after snapshots both succeed and compare equal — short- -/// circuit at the `(Ok(b), Ok(a)) if b == a` arm and skip publish entirely, -/// paying zero fence latency. -/// -/// The match below is written deny-by-default: only the `(Ok, Ok)`-and-equal -/// arm skips publish; every other state — including the previously load- -/// bearing **double-failure case** where two failed `snapshot_refs` calls -/// collide on the same value — falls through to the publish branch. -async fn finalize_push(state: Arc, ctx: PushContext) -> Response { - let refs_after = snapshot_refs(&ctx.repo_path).await; - - if should_publish(&ctx.refs_before, &refs_after) { - if let Err(e) = publish_ref_state(&state, &ctx.owner, &ctx.repo, &ctx.pusher).await { - // Today publish failure (relay DB insert) is non-fatal — the pack - // is durable on disk and the client sees success. The S3 manifest- - // CAS evolution of this seam will instead map a precondition - // failure (412) to HTTP 409 here; see - // `docs/git-on-object-storage.md` §Implementation Correspondence. +/// **The fence (Theorem 1):** the success response is constructed only +/// after `cas_publish` returns `Ok(CasSuccess)`. Lost-race / conflict / +/// backend failure all return *without* a 2xx. This is the unique +/// constructor of a push 2xx, so the seam is structural (not by +/// convention). +async fn finalize_push(state: &Arc, ctx: PushContext) -> Response { + // Step 7 (CAS). The PushContext binds `parent_state` (observed at + // hydrate) to the CAS predicate here — no re-reading of the pointer + // between hydrate and CAS. + let success = match cas_publish( + &state.git_store, + ctx.repo_handle.path(), + &ctx.owner, + &ctx.repo, + &ctx.parent_state, + ) + .await + { + Ok(s) => s, + Err(CasError::Conflict { + winner_manifest_key, + .. + }) => { warn!( - error = %e, owner = %ctx.owner, repo = %ctx.repo, - "failed to publish kind:30618" + winner = %winner_manifest_key, + "push lost CAS race; tempdir dropped, returning 409" ); + return ( + StatusCode::CONFLICT, + "push superseded by a concurrent writer; pull and retry", + ) + .into_response(); } - } - - build_git_response("receive-pack", ctx.pack) -} - -/// Publish kind:30618 (repo state) after a successful push. -/// -/// Reads current refs from the repo and publishes a relay-signed event -/// with the pusher's pubkey in a `p` tag. This makes pushes subscribable. -async fn publish_ref_state( - state: &Arc, - owner: &str, - repo: &str, - pusher: &nostr::PublicKey, -) -> anyhow::Result<()> { - let validated = validate_repo_path(owner, repo, &state.config.git_repo_path) - .map_err(|_| anyhow::anyhow!("invalid repo path"))?; - - // Read current refs. - let mut cmd = Command::new("git"); - cmd.args(["for-each-ref", "--format=%(refname) %(objectname)"]) - .current_dir(&validated.repo_path); - harden_git_env(&mut cmd); - let output = cmd.output().await?; - - if !output.status.success() { - return Err(anyhow::anyhow!("git for-each-ref failed")); - } - - let refs_output = String::from_utf8_lossy(&output.stdout); - - // Get HEAD symbolic ref. - let mut head_cmd = Command::new("git"); - head_cmd - .args(["symbolic-ref", "HEAD"]) - .current_dir(&validated.repo_path); - harden_git_env(&mut head_cmd); - let head_output = head_cmd.output().await.ok(); - - let repo_name = repo.strip_suffix(".git").unwrap_or(repo); - - // Build NIP-34 kind:30618 tags. - let mut tags = vec![nostr::Tag::custom(nostr::TagKind::custom("d"), [repo_name])]; - - for line in refs_output.lines() { - let parts: Vec<&str> = line.splitn(2, ' ').collect(); - if parts.len() != 2 { - continue; - } - let (ref_name, sha) = (parts[0], parts[1]); - - // NIP-34 kind:30618 only includes heads and tags — skip stash, notes, remotes. - if !ref_name.starts_with("refs/heads/") && !ref_name.starts_with("refs/tags/") { - continue; - } - // Validate ref name and SHA. - if ref_name.starts_with('/') - || ref_name.contains("//") - || !ref_name - .chars() - .all(|c| c.is_ascii_alphanumeric() || "/_.-".contains(c)) - { - continue; + Err(CasError::ManifestInvalid(e)) => { + // 4xx-class: the workspace produced refs/HEAD/oids the + // manifest validator rejects (unsafe refname, malformed oid, + // empty head, malformed parent). Pre-CAS — no pointer was + // written. + warn!( + owner = %ctx.owner, + repo = %ctx.repo, + error = %e, + "push rejected: manifest validation failed" + ); + return ( + StatusCode::BAD_REQUEST, + "push produced invalid manifest state", + ) + .into_response(); } - if sha.len() != 40 || !sha.chars().all(|c| c.is_ascii_hexdigit()) { - continue; + Err(e) => { + // 5xx-class: ManifestReadFailed (parent corruption), + // Backend, PackCapture. The tempdir drops on scope exit; no + // pointer was written (or, on rare ManifestReadFailed during + // winner-fetch, the winner is already installed and the + // loser's data is unrelated). + error!( + owner = %ctx.owner, + repo = %ctx.repo, + error = %e, + "push failed pre-response" + ); + return (StatusCode::INTERNAL_SERVER_ERROR, "git backend error").into_response(); } + }; - tags.push(nostr::Tag::custom(nostr::TagKind::custom(ref_name), [sha])); - } - - // HEAD tag. - if let Some(head) = head_output { - if head.status.success() { - let head_ref = String::from_utf8_lossy(&head.stdout).trim().to_string(); - if !head_ref.is_empty() { - tags.push(nostr::Tag::custom( - nostr::TagKind::custom("HEAD"), - [format!("ref: {head_ref}")], - )); + // Derived after CAS: kind:30618 ref-state event over the *committed* + // manifest's refs/head. Spec §Implementation Correspondence: + // "kind:30618 is derived after CAS, never the commit." We emit only + // when the committed manifest differs from the parent — a true no-op + // push pays no 30618 cost. + // + // **Strict no-op detection.** We emit unless the canonical manifest + // is byte-identical to the parent (Dawn's `canonical_bytes` is + // deterministic, so equal published state ⇒ equal digest by + // construction). The cases this differs from "refs+head equality": + // pack-only changes (rare; internal recompaction, or a push that + // produces a new pack covering existing tips with different deltas) + // would emit a 30618 with identical `(refs, head)`. The relay DB's + // `Ok((_, false))` arm below dedups it for free — one extra DB + // round-trip per pack-only push, which clients don't normally + // generate. Tightening to refs+head equality is a future + // micro-optimization only if that dedup cost becomes visible. + let parent_digest_str: Option<&str> = ctx.parent_state.parent_digest.as_deref(); + let after_digest = success.manifest_key.strip_prefix("manifests/"); + let manifest_changed = match (parent_digest_str, after_digest) { + (Some(before), Some(after)) => before != after, + _ => true, // first push (parent None) or impossible-shape after key → publish + }; + if manifest_changed { + let inputs = RefStateInputs { + repo_id: &ctx.repo_id, + head: &success.manifest.head, + refs: &success.manifest.refs, + actor_pubkey_hex: &hex::encode(ctx.pusher.serialize()), + }; + match build_ref_state_event(&inputs, &state.relay_keypair) { + Ok(event) => match state.db.insert_event(&event, None).await { + Ok((stored, true)) => { + let matches = state.sub_registry.fan_out(&stored); + for (conn_id, sub_id) in matches { + let _ = state.conn_manager.send_to( + conn_id, + crate::protocol::RelayMessage::event(&sub_id, &stored.event), + ); + } + info!( + owner = %ctx.owner, + repo = %ctx.repo_id, + manifest = %success.manifest_key, + "kind:30618 published (derived after CAS)" + ); + } + Ok((_, false)) => { + info!( + owner = %ctx.owner, + repo = %ctx.repo_id, + "kind:30618 deduplicated by relay db" + ); + } + Err(e) => { + warn!( + owner = %ctx.owner, + repo = %ctx.repo_id, + error = %e, + "kind:30618 insert failed; push remains durable in object store" + ); + } + }, + Err(e) => { + warn!( + owner = %ctx.owner, + repo = %ctx.repo_id, + error = %e, + "kind:30618 build failed; push remains durable in object store" + ); } } } - // Pusher pubkey in p tag. - tags.push(nostr::Tag::public_key(*pusher)); - - info!( - repo = %repo_name, - owner = %owner, - ref_count = tags.len().saturating_sub(2), - "publishing kind:30618 ref state" - ); - - // Sign with relay keypair — the relay is the authoritative source of ref state. - let event = nostr::EventBuilder::new(nostr::Kind::Custom(30618), "", tags) - .sign_with_keys(&state.relay_keypair) - .map_err(|e| anyhow::anyhow!("failed to sign kind:30618: {e}"))?; - - // Store globally (channel_id = None) and fan out to subscribers. - let (stored, was_inserted) = state.db.insert_event(&event, None).await?; - if was_inserted { - let matches = state.sub_registry.fan_out(&stored); - for (conn_id, sub_id) in matches { - let _ = state.conn_manager.send_to( - conn_id, - crate::protocol::RelayMessage::event(&sub_id, &stored.event), - ); - } - } - - Ok(()) + // Only now — after CAS commit and (optional) 30618 emission — build + // the 2xx. The tempdir's lifetime is tied to `ctx.repo_handle`, which + // we drop after building the response so the subprocess output bytes + // are fully consumed before the on-disk objects vanish. + let response = build_git_response("receive-pack", ctx.pack); + drop(ctx.repo_handle); + response } // ── Router Builder ─────────────────────────────────────────────────────────── @@ -873,71 +832,3 @@ pub fn git_router(state: Arc) -> Router { .layer(RequestBodyLimitLayer::new(body_limit)) .with_state(state) } - -// ── Tests ──────────────────────────────────────────────────────────────────── - -#[cfg(test)] -mod tests { - use super::*; - - fn ok(s: &str) -> Result { - Ok(s.to_string()) - } - fn err() -> Result { - Err(SnapshotError::CommandFailed) - } - - /// True no-op push: both snapshots succeed and are equal → publish skipped. - /// This is the fast path; the fence pays zero latency. - #[test] - fn should_publish_skips_on_noop() { - let before = ok("refs/heads/main abc123\n"); - let after = ok("refs/heads/main abc123\n"); - assert!(!should_publish(&before, &after)); - } - - /// Refs changed → publish fires. The fence engages and the response - /// must wait for `publish_ref_state` to complete before being built. - #[test] - fn should_publish_fires_on_changed_refs() { - let before = ok("refs/heads/main abc123\n"); - let after = ok("refs/heads/main def456\n"); - assert!(should_publish(&before, &after)); - } - - /// First push to an empty repo: before is `Ok("")`, after has real refs. - /// They differ; publish fires. Closes "what if the empty-repo case is - /// modeled by `Ok("")` instead of `Err`." - #[test] - fn should_publish_fires_on_first_push_to_empty_repo() { - let before = ok(""); - let after = ok("refs/heads/main abc123\n"); - assert!(should_publish(&before, &after)); - } - - /// Snapshot fails *before* the push: assume changed → publish fires. - /// Anything else would risk dropping a publish on a real ref change. - #[test] - fn should_publish_fires_when_before_errors() { - assert!(should_publish(&err(), &ok("refs/heads/main abc123\n"))); - } - - /// Snapshot fails *after* the push: assume changed → publish fires. - #[test] - fn should_publish_fires_when_after_errors() { - assert!(should_publish(&ok("refs/heads/main abc123\n"), &err())); - } - - /// The bug this whole fix exists to close: both snapshots fail. - /// - /// The pre-refactor implementation returned `""` on error from - /// `snapshot_refs`, so two failed scans both produced `""`, compared - /// equal, and the handler silently skipped publish on a ref-changing - /// push. Under `Result<_, _>` this case is structurally outside the - /// skip arm: `(Err, Err)` does not match `(Ok(b), Ok(a)) if b == a`. - /// Publish fires. - #[test] - fn should_publish_fires_when_both_snapshots_error() { - assert!(should_publish(&err(), &err())); - } -} diff --git a/crates/sprout-relay/src/state.rs b/crates/sprout-relay/src/state.rs index 7706ff167..fe2ae9f19 100644 --- a/crates/sprout-relay/src/state.rs +++ b/crates/sprout-relay/src/state.rs @@ -180,11 +180,11 @@ pub struct AppState { pub conn_semaphore: Arc, /// Semaphore limiting concurrent message handler tasks. pub handler_semaphore: Arc, - /// Semaphore limiting concurrent git subprocess operations. + /// Semaphore limiting concurrent git subprocess operations across + /// the whole relay. Bounds resource use; **not** writer + /// serialization — that's the CAS at the manifest pointer (spec + /// §Push step 7, `Inv_NoFork`). pub git_semaphore: Arc, - /// Per-repo mutex map — prevents concurrent pushes to the same bare repo. - /// Key: canonical repo path. Value: mutex guarding exclusive push access. - pub git_repo_locks: Arc>>>, /// Workflow engine for background processing. pub workflow_engine: Arc, @@ -340,7 +340,6 @@ impl AppState { conn_semaphore: Arc::new(Semaphore::new(max_connections)), handler_semaphore: Arc::new(Semaphore::new(max_concurrent_handlers)), git_semaphore: Arc::new(Semaphore::new(git_max_concurrent_ops)), - git_repo_locks: Arc::new(DashMap::new()), workflow_engine, relay_keypair, From 37dad0463af616378df7f94ab366179c7b5bcac7 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 12:43:30 -0400 Subject: [PATCH 19/24] fix(relay/handlers): seed empty manifest pointer on kind:30617 announce MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The architectural split Eva's live e2e caught: 30617 created the on-disk bare repo but no S3 pointer, while the read path (info_refs/upload_pack) now hydrates from the pointer. Pointer-absent ⇒ Ok(None) ⇒ 404 by Max's intentional fail-closed, so announce-then-clone returned 404 for every freshly-announced repo. Fix: establish the invariant "repo announced ⟺ pointer exists" by seeding an empty-manifest pointer at the end of `handle_git_repo_ announcement`. After seed: - `info_refs` stays strictly fail-closed: pointer-absent means never-announced, not "announced but no pushes." Max's blocker preserved exactly. - `live_hydrate_empty_repo` proves clone of an empty hydrated repo works. - First push CASes the seeded pointer via `IfMatch(etag)`, no special-case branch. The `IfNoneMatchStar` arm of `cas_publish` becomes dead code for announced repos. Two new private helpers in `side_effects.rs`: - `seed_manifest_pointer(state, owner, repo)` — `put_manifest(empty)` + `put_pointer(IfNoneMatchStar)`. **Idempotency is constructive, not trusted**: a `CasOutcome::LostRace` is treated as success ONLY if the existing pointer body matches the empty manifest's digest. Any other value (stale from a prior lifecycle, real misconfiguration) surfaces as `anyhow!` — Max's guardrail #1. - `emit_initial_ref_state(state, owner, repo)` — kind:30618 over the seeded empty manifest using Sami's `build_ref_state_event`. Pointer is the commit; this event is the derived "repo exists, empty" signal — Max's guardrail #2. Non-fatal on failure: manifest is truth, 30618 is just notification. Empty manifests across all repos share canonical bytes (deterministic serialization by construction) ⟹ same digest ⟹ `put_manifest` is idempotent at the store level. One blob, many pointers. Pinned by `empty_manifest_validates` test in manifest.rs with byte-stable canonical bytes — locks the digest so future serde version bumps don't silently shift it. Rollback on seed failure: remove the on-disk repo + the name reservation, same pattern as hook-install failure. A successful seed but failed emit leaves the pointer in place (correctly — it's the source of truth). 239/239 relay tests (was 238 + 1 new), clippy + fmt clean. Co-authored-by: Dawn Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/manifest.rs | 24 +++ .../sprout-relay/src/handlers/side_effects.rs | 156 ++++++++++++++++++ 2 files changed, 180 insertions(+) diff --git a/crates/sprout-relay/src/api/git/manifest.rs b/crates/sprout-relay/src/api/git/manifest.rs index 12ad43199..ed4b093ec 100644 --- a/crates/sprout-relay/src/api/git/manifest.rs +++ b/crates/sprout-relay/src/api/git/manifest.rs @@ -297,6 +297,30 @@ mod tests { sample().validate().expect("sample manifest must validate"); } + /// The empty manifest is the announce-time seed (`side_effects.rs:: + /// seed_manifest_pointer`). It must validate — otherwise repo announce + /// would fail before the pointer can be seeded, and the read path would + /// 404 every freshly-announced repo. This pins that contract. + #[test] + fn empty_manifest_validates() { + let m = Manifest { + version: MANIFEST_VERSION, + head: "refs/heads/main".into(), + refs: BTreeMap::new(), + packs: Vec::new(), + parent: None, + }; + m.validate().expect("empty manifest is the announce-seed"); + // Canonical bytes must be deterministic + stable so all empty manifests + // share one digest (idempotent put_manifest, one shared S3 object). + let bytes = m.canonical_bytes().expect("serialize"); + let s = std::str::from_utf8(&bytes).expect("utf8"); + assert_eq!( + s, + r#"{"version":1,"head":"refs/heads/main","refs":{},"packs":[],"parent":null}"# + ); + } + #[test] fn validate_rejects_empty_head() { let mut m = sample(); diff --git a/crates/sprout-relay/src/handlers/side_effects.rs b/crates/sprout-relay/src/handlers/side_effects.rs index 4419d2e50..6aa7e101a 100644 --- a/crates/sprout-relay/src/handlers/side_effects.rs +++ b/crates/sprout-relay/src/handlers/side_effects.rs @@ -1871,6 +1871,23 @@ async fn handle_git_repo_announcement(event: &Event, state: &Arc) -> a anyhow::anyhow!("failed to install pre-receive hook: {e}") })?; + // Seed the empty-manifest pointer in object storage. Establishes the + // invariant "repo announced ⟺ pointer exists" so the read path can rely + // on pointer-absent meaning never-announced (not just no-pushes-yet), + // keeping `info_refs`'s fail-closed `Ok(None) → 404` unambiguous. + // First push CASes the seeded pointer normally — no special-case branch. + seed_manifest_pointer(state, &owner_hex, &repo_id) + .await + .map_err(|e| { + // Pointer seed failure leaves the on-disk repo + name reservation + // without a clone-able pointer, which is exactly the broken state + // the seed exists to prevent. Unwind both so the announce is + // either fully consummated or fully rolled back. + let _ = std::fs::remove_dir_all(&repo_dir); + let _ = std::fs::remove_dir(&reservation); + anyhow::anyhow!("failed to seed manifest pointer: {e}") + })?; + info!( repo_id = %repo_id, owner = %owner_hex, @@ -1878,6 +1895,145 @@ async fn handle_git_repo_announcement(event: &Event, state: &Arc) -> a "bare git repo created from kind:30617 announcement" ); + // Derived after the pointer commits: kind:30618 ref-state event over the + // seeded empty manifest. Pointer is the commit; this event is the + // notification that the repo exists (with empty refs) so subscribers see + // a first signal without waiting for the first push. + if let Err(e) = emit_initial_ref_state(state, &owner_hex, &repo_id).await { + // Non-fatal: the manifest is the source of truth; this is just the + // derived notification. A failure here means subscribers miss the + // "repo now exists" event, but clone/push still works. + warn!( + repo_id = %repo_id, + owner = %owner_hex, + error = %e, + "failed to emit initial kind:30618 ref state (non-fatal)" + ); + } + + Ok(()) +} + +/// Default symbolic HEAD for a freshly-announced (empty) repo. Matches +/// `init.defaultBranch=main` (git ≥ 2.28) and the seed used by +/// `live_hydrate_empty_repo`. Pinned in one place so the seeded manifest +/// and the initial kind:30618 emission can't drift. +/// +/// The first push's `cas_publish` overwrites this with the real symbolic +/// HEAD observed in the receive-pack workspace via standard CAS, so the +/// default is a stand-in, not a permanent commitment. +const DEFAULT_HEAD: &str = "refs/heads/main"; + +/// Seed the manifest-pointer for a newly-announced repo with an empty manifest. +/// +/// Idempotent: a `CasOutcome::LostRace` is treated as success **only if** the +/// existing pointer names the same empty manifest digest. Any other pre-existing +/// pointer body (e.g. a non-empty manifest from a previous announce/push pair +/// for the same `(owner, repo)`) surfaces as an error rather than silently +/// succeeding — that would mask a real misconfiguration. +async fn seed_manifest_pointer( + state: &Arc, + owner_hex: &str, + repo_id: &str, +) -> anyhow::Result<()> { + use crate::api::git::manifest::{pointer_key, Manifest, MANIFEST_VERSION}; + use crate::api::git::store::{CasOutcome, Precond}; + use std::collections::BTreeMap; + + // The empty manifest. All empty manifests across all repos share canonical + // bytes — by design — so `put_manifest` is idempotent at the store level + // too. + let empty = Manifest { + version: MANIFEST_VERSION, + head: DEFAULT_HEAD.to_string(), + refs: BTreeMap::new(), + packs: Vec::new(), + parent: None, + }; + empty + .validate() + .map_err(|e| anyhow::anyhow!("empty manifest failed validation: {e}"))?; + let bytes = empty + .canonical_bytes() + .map_err(|e| anyhow::anyhow!("empty manifest serialize: {e}"))?; + let manifest_key = state + .git_store + .put_manifest(&bytes) + .await + .map_err(|e| anyhow::anyhow!("put_manifest: {e}"))?; + let digest = manifest_key + .strip_prefix("manifests/") + .ok_or_else(|| anyhow::anyhow!("put_manifest returned non-standard key: {manifest_key}"))?; + + let pkey = pointer_key(owner_hex, repo_id); + let outcome = state + .git_store + .put_pointer(&pkey, digest.as_bytes(), Precond::IfNoneMatchStar) + .await + .map_err(|e| anyhow::anyhow!("put_pointer: {e}"))?; + match outcome { + CasOutcome::Won(_) => Ok(()), + CasOutcome::LostRace => { + // Pointer already exists. Idempotency check: only treat as success + // if it names the same empty manifest digest. Any other value is + // either a stale pointer from a prior repo lifecycle for the same + // (owner, repo) or a real misconfiguration — surface, don't swallow. + let (_etag, body) = state + .git_store + .get_pointer(&pkey) + .await + .map_err(|e| anyhow::anyhow!("re-read pointer after LostRace: {e}"))? + .ok_or_else(|| anyhow::anyhow!("pointer vanished after LostRace race"))?; + let existing = std::str::from_utf8(&body) + .map_err(|e| anyhow::anyhow!("pointer body not utf-8: {e}"))? + .trim(); + if existing != digest { + return Err(anyhow::anyhow!( + "repo '{repo_id}' for owner {owner_hex} already has a non-empty pointer \ + ({existing}); refusing to overwrite via announce" + )); + } + Ok(()) + } + } +} + +/// Emit the initial kind:30618 ref-state event for a freshly-announced repo. +/// +/// The seeded empty manifest is the source of truth; this event is the +/// derived notification. Fires once per announce, signed by the relay, +/// carrying the announcer's pubkey in the `p` tag (sprout extension). +async fn emit_initial_ref_state( + state: &Arc, + owner_hex: &str, + repo_id: &str, +) -> anyhow::Result<()> { + use crate::api::git::manifest_event::{build_ref_state_event, RefStateInputs}; + use std::collections::BTreeMap; + + let empty_refs: BTreeMap = BTreeMap::new(); + let inputs = RefStateInputs { + repo_id, + head: DEFAULT_HEAD, + refs: &empty_refs, + actor_pubkey_hex: owner_hex, + }; + let event = build_ref_state_event(&inputs, &state.relay_keypair) + .map_err(|e| anyhow::anyhow!("build_ref_state_event: {e}"))?; + let (stored, was_inserted) = state + .db + .insert_event(&event, None) + .await + .map_err(|e| anyhow::anyhow!("insert kind:30618: {e}"))?; + if was_inserted { + let matches = state.sub_registry.fan_out(&stored); + for (conn_id, sub_id) in matches { + let _ = state.conn_manager.send_to( + conn_id, + crate::protocol::RelayMessage::event(&sub_id, &stored.event), + ); + } + } Ok(()) } From 63f589badb81bc14ab5ef4addd22046b5331b596 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 12:55:26 -0400 Subject: [PATCH 20/24] =?UTF-8?q?docs(git-on-object-storage):=20v1=20deplo?= =?UTF-8?q?yment=20architecture=20+=20=C2=A7Impl=20Correspondence=20refres?= =?UTF-8?q?h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit §Scope-and-Non-Goals gains a 'v1 deployment architecture' subsection: - Stateless per-request hydration as the v1 read+write path (hydrate_for_read / hydrate_for_write). Multi-instance-ready by construction; nothing on local disk needs cross-instance coordination. - The accepted v1 tradeoff: under concurrent same-repo pushes, every contender hydrates and runs receive-pack, and CAS losers' subprocess work is discarded. Wasted CPU/IO under contention, not a correctness bug — Inv_NoFork holds because the CAS is the only writer serialization. - Bounded retry on classified-terminal-vs-transport errors is parked, not closed. The checked-in e2e runs an 8-way live CAS race against MinIO with no retry layer (the regression fence); a local 16-way run confirms the property holds at that width too (calibration evidence, not the CI default). v1 ships without retry. The open question — 'is the no-retry default safe past MinIO and ≤16-way?' — re-opens on a different backend or sustained load. Non-negotiable rule: retry, if added, retries only pre-classification network errors; never Ok(2xx), LostRace(412), or NotFound(404). §Implementation Correspondence refreshed to match PR #726's tip. The 'fallible snapshot' / 'conditional fence' / 'should_publish' bullets and the pre-S3 line-number table predated the S3-CAS implementation and were actively stale; replaced with four current obligations: - Unique constructor seam (unchanged: finalize_push is still the sole push 2xx constructor). - Parent observed once: hydrate_for_write returns (HydratedRepo, ParentState); cas_publish predicates the CAS on the same ETag the workspace was hydrated against, never re-reads. Rust analogue of Inv_RefDerivedFromParent. - 412 → 409 terminal: CasError::Conflict is typed and distinct from Backend(StoreError); ?-bubbling cannot turn 412 into 500; no in-handler retry. Closes the 'future work' gap the previous version named. - kind:30618 derived after CAS: emission gated on manifest digest change; event built from CasSuccess.manifest (the values that physically landed); relay-signed; non-fatal on insert error. - No advisory lock: writer serialization is the CAS; the legacy per-repo mutex is incompatible with the multi-instance architecture and is gone. Spec-element table updated to current file locations across manifest.rs / store.rs / hydrate.rs / cas_publish.rs / manifest_event.rs / transport.rs (line numbers pinned at landing time; reviewers checking after later refactors should use symbol search, not line counts). 239 relay lib tests + live MinIO e2e (roundtrip + N-way concurrent-push no-fork) green on PR #726's tip. Sole remaining named follow-up: behavioral integration test for runtime ordering (publish-before-response), pending a mockable-CAS seam. The no-fork claim is empirically gated by the live N-way e2e. Co-authored-by: Sami Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- docs/git-on-object-storage.md | 159 ++++++++++++++++++++++++---------- 1 file changed, 111 insertions(+), 48 deletions(-) diff --git a/docs/git-on-object-storage.md b/docs/git-on-object-storage.md index c5fc98e6f..901cb13d8 100644 --- a/docs/git-on-object-storage.md +++ b/docs/git-on-object-storage.md @@ -49,6 +49,41 @@ Stating this boundary is part of the claim. "Provably sound" without naming the trust boundary does not survive scrutiny; "safety is machine-checkable relative to three stated axioms, each empirically gated per backend" does. +### v1 deployment architecture + +The implementation has *no per-repo persistent filesystem state*. Every request +hydrates an ephemeral working tree from the published manifest, runs the +appropriate git subprocess against it, and drops the tree on scope exit: +read paths (`info/refs`, `upload-pack`) via `hydrate_for_read`, the write path +(`receive-pack`) via `hydrate_for_write`, which also returns the `ParentState` +the CAS at §Push step 7 predicates on. The relay is multi-instance-ready by +construction: nothing on local disk needs to be coordinated between instances. + +The accepted v1 tradeoff: under concurrent same-repo pushes, every contender +hydrates and runs receive-pack, and the CAS losers' subprocess work is +discarded. This is wasted CPU/IO under contention, not a correctness bug — +`Inv_NoFork` (Theorem 3) holds because the CAS is the only writer +serialization. Same-ref concurrent push is rare; the alternative (a +cross-instance lock service) is the kind of dependency this protocol exists +to avoid. If contention ever shows up in metrics the fix is a short +best-effort *local* lock as a latency optimization, never a correctness +dependency. + +A bounded retry layer on classified-terminal-vs-transport errors is **parked, +not closed.** The checked-in regression fence is the 8-way live CAS race +(`e2e_git::git_concurrent_push_one_wins_and_repo_recovers`), which passes +against MinIO with no retry layer; we ship v1 without one. (A one-off 16-way +local run against MinIO also passed, as separate calibration evidence that +the property holds at greater width — the regression test stays at 8 because +each contender clones/commits/pushes through real `git` and the cost grows +with width.) The open question — "is the no-retry default safe past MinIO +and beyond the widths so far exercised?" — re-opens on a different backend +or a sustained-load regime the conformance probe (§Conformance) doesn't +already exercise. The non-negotiable rule: retry, if added, lives in the +store layer and retries *only* pre-classification network errors — never +`Ok(2xx)`, `LostRace(412)`, or `NotFound(404)`. Retrying a classified +outcome would change the TLA action and break the proof. + ## System Model A **repository** `R` has the following state in the object store: @@ -386,57 +421,85 @@ transfer: `finalize_push`, the fence would be convention, not structure, and Theorem 1 would not hold. This is a checkable code property, verified by reviewers against the actual seam (`finalize_push`). -- **Fallible snapshot.** Ref-state snapshots are `Result`, and the skip predicate - is `Ok(after) == Ok(before)`, never `after == before` over values that default - to equal on error. Any `Err` on either snapshot falls through to publish - ("assume changed"). This closes the double-failure hole — two failed scans - comparing equal and bypassing the fence — and is exactly the `MustPublish == - changes \/ snapErr` rule the model checks. -- **Conditional fence.** The fence engages only when refs changed; a no-op push - returns success without awaiting publish (the `SkipPublish` path). The - obligation is "publish-before-success *for ref-changing pushes*," not - unconditional publish. - -Six decision tests pin every arm of the fence predicate (`should_publish`): -no-op skip, changed-refs publish, first-push-to-empty-repo publish, before-error -publish, after-error publish, and the load-bearing both-snapshots-fail publish. -Runtime *ordering* — publish completes before the `Response` is constructed — is -enforced by `finalize_push` being a single sequential async function (no detached -`tokio::spawn`), not by a test; a behavioral ordering test once a mockable publish -seam exists is named follow-up work. - -**Current code status (verified provenance).** The seam *shape* exists in code as -of `quinn/transport-fence-typesplit` @ `17df7884` (`crates/sprout-relay/src/api/ -git/transport.rs`), 198 relay tests green: +- **Parent observed once.** `hydrate_for_write` reads the pointer, fetches and + verifies the parent manifest, materializes the workspace from it, and returns + a `(HydratedRepo, ParentState)` pair where `ParentState` carries the exact + `(ETag, digest, Manifest)` triple the workspace was hydrated against. That + same `ParentState` rides on the `PushContext` through receive-pack, and + `cas_publish` predicates the CAS on `parent_state.if_match` — it never re-reads + the pointer. The "build on `d_old`, publish against `d_new`" hazard is closed + at the type system: a concurrent writer that advances the pointer between + hydrate and CAS surfaces as `CasError::Conflict`/HTTP 409, not as a manifest + whose `parent` disagrees with the refs the workspace produced. This is the + Rust analogue of `Inv_RefDerivedFromParent`. +- **412 → 409, terminal.** The CAS lost-race outcome maps to a typed + `CasError::Conflict { winner_manifest, winner_manifest_key }`. The variant is + distinct from `Backend(StoreError)` so `?`-bubbling cannot turn 412 into 500. + There is no in-handler retry: the loser's receive-pack output was derived + against a now-superseded parent, so reusing it would change the TLA action + and break `Inv_RefDerivedFromParent`. The client re-pushes, which re-hydrates + against the advanced state — that is the only safe retry, and `git` itself + drives it. +- **kind:30618 derived after CAS.** Emission is conditional on + `manifest_changed = (parent_digest != committed_digest)` — `Manifest:: + canonical_bytes` is deterministic, so equal published state ⇒ equal digest by + construction (no-op pushes pay no 30618 cost). The event is built by + `manifest_event::build_ref_state_event` from `CasSuccess.manifest` — the + values that *physically landed* via CAS, by `Inv_RefEffectApplied`. The event + is relay-signed (the relay is authoritative for ref state of repos it hosts); + the pusher's pubkey rides in a `p` tag (sprout extension; NIP-34 does not + define one). 30618 emission happens after `cas_publish` returns `Ok` and + before the success `Response` is constructed — so 30618 is a strict + consequence of a committed CAS, never the commit itself. A failed 30618 + insert is non-fatal: the push remains durable in the object store, and the + next read/push surfaces the committed state from the manifest. +- **No advisory lock.** Writer serialization is the CAS. The per-repo mutex the + legacy persistent-disk path used would only have spanned a single process and + is incompatible with the multi-instance v1 architecture (§Scope). Dropping it + was strictly more correct, not a risk — same-repo concurrent pushes each + hydrate + run receive-pack, and the CAS losers' work is discarded (an + accepted v1 tradeoff named in §Scope). + +**Current code status (verified provenance).** The full S3-CAS implementation +exists in code at PR #726's tip (`crates/sprout-relay/src/api/git/`), with the +relay lib green, clippy `--tests -D warnings` clean, fmt clean, and the live +MinIO e2e — clone/push/fetch/force-push roundtrip + N-way concurrent-push +no-fork — green on the assembled tip. (Line numbers below are pinned at +landing time; reviewers checking after subsequent refactors should consult +symbol search, not line counts.) | Spec element | Code | |---|---| -| `PackOutput { stdout: Vec }` | `:507` | -| `SnapshotError` | `:654` | -| `snapshot_refs -> Result<_, SnapshotError>` | `:671` | -| `PushContext { pack, refs_before, … }` | `:688` | -| `should_publish(before, after) -> bool` (pure, deny-by-default) | `:708` | -| `finalize_push(state, ctx) -> Response` — **the seam** | `:730` | -| `build_git_response` (sole `Body::from(stdout)` site) | `:637` | - -The push path reaches `build_git_response` *only* through `finalize_push`, which -consumes a `PushContext` — so the compiler enforces "no `PushContext` ⇒ no push -`Response`." (`build_git_response` is shared with read paths, but those carry no -`PushContext` and no fence obligation; push-side uniqueness is structural.) The -fallible-snapshot bite has a dedicated test, `should_publish_fires_when_both_ -snapshots_error` — the exact double-failure hole, now structurally outside the -skip arm. - -Two honest gaps remain, both named: -1. **`412 → 409` is future work.** Today `publish_ref_state` is a relay-DB insert - (`db.insert_event`, kind:30618), not an S3 pointer swap; its failure is logged. - The `if let Err(e)` arm in `finalize_push` (`:740`) is the plug point where the - S3 manifest-CAS evolution maps `412 → 409`. The spec describes that target; the - seam shape is real now, the S3 CAS is not yet. -2. **Runtime-ordering test deferred.** Six tests pin the publish *decision*; the - publish-before-response *ordering* is enforced by `finalize_push` being a single - sequential async fn (no `tokio::spawn`). A mockable-publish integration test is - the belt-and-suspenders follow-up. +| `Manifest { version, head, refs, packs, parent }` + `canonical_bytes` | `manifest.rs` | +| `Manifest::validate()` (pre-CAS rejection: refs/HEAD/OIDs/parent-shape) | `manifest.rs` | +| `GitStore::{put_pack, put_manifest, put_pointer}` (create-only + CAS) | `store.rs` | +| `run_conformance_probe` (A1/A3 fail-closed startup gate) | `store.rs` + `main.rs` | +| `hydrate_for_read` / `hydrate_for_write` | `hydrate.rs` | +| `ParentState { if_match, parent_digest, parent }` + `from_loaded`/`fresh` | `cas_publish.rs:154` | +| `cas_publish(.., &parent_state) -> Result` | `cas_publish.rs:410` | +| `CasError::Conflict { winner_manifest, winner_manifest_key }` (typed 412) | `cas_publish.rs:92` | +| `build_ref_state_event(&RefStateInputs, &Keys)` (NIP-34 kind:30618) | `manifest_event.rs` | +| `PushContext { pack, parent_state, repo_handle, … }` | `transport.rs:643` | +| `finalize_push(state, ctx) -> Response` — **the seam** | `transport.rs:674` | +| `build_git_response` (sole `Body::from(stdout)` site) | `transport.rs:627` | + +The push path reaches `build_git_response` *only* through `finalize_push`, +which consumes a `PushContext`; the compiler enforces "no `PushContext` ⇒ no +push `Response`." Read paths reach `build_git_response` independently after +hydrating the published state via `hydrate_for_read` — pointer-absent → 404, +any below-pointer failure → 5xx, never a synthesized empty repo (A1 +detectability holds in the read direction too). The 404 invariant is +unambiguous because kind:30617 announce seeds an empty-manifest pointer +*before* the announcement event is published: an announced repo is always +cloneable (empty refs, but a valid pointer), and pointer-absence means "never +announced." + +One named follow-up: a behavioral integration test for runtime ordering +(publish-before-response) — currently enforced by `finalize_push` being a +single sequential async fn (no detached `tokio::spawn`) — is the +belt-and-suspenders item to add once a mockable-CAS seam exists. The +mechanical no-fork claim is empirically gated by the live N-way +concurrent-push e2e (`e2e_git::git_concurrent_push_one_wins_and_repo_recovers`). ## Mechanized Verification From a898c184d464031d6c9266108bec45af03a85315 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 12:56:29 -0400 Subject: [PATCH 21/24] test(git): assert live e2e uses S3 manifest pointer Co-authored-by: Mari <95cae996907d7cab9f5dbf43c0f53edeac6ab0b032a6feae4abfd784e467b3f5@users.noreply.sprout> Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- Cargo.lock | 1 + crates/sprout-test-client/Cargo.toml | 1 + crates/sprout-test-client/tests/e2e_git.rs | 131 ++++++++++++++++++++- 3 files changed, 129 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afeb9a379..2fbba4148 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4293,6 +4293,7 @@ dependencies = [ "nostr", "rand 0.10.1", "reqwest 0.13.3", + "rust-s3", "rustls", "serde", "serde_json", diff --git a/crates/sprout-test-client/Cargo.toml b/crates/sprout-test-client/Cargo.toml index 3c0ce55df..0d2cf7350 100644 --- a/crates/sprout-test-client/Cargo.toml +++ b/crates/sprout-test-client/Cargo.toml @@ -34,6 +34,7 @@ hex = { workspace = true } rand = { workspace = true } sha2 = { workspace = true } chrono = { workspace = true } +s3 = { version = "0.37", package = "rust-s3", default-features = false, features = ["tokio-rustls-tls", "fail-on-err", "tags"] } [[bin]] name = "sprout-test-cli" diff --git a/crates/sprout-test-client/tests/e2e_git.rs b/crates/sprout-test-client/tests/e2e_git.rs index ff1976a39..88a6eba2c 100644 --- a/crates/sprout-test-client/tests/e2e_git.rs +++ b/crates/sprout-test-client/tests/e2e_git.rs @@ -19,8 +19,11 @@ use std::path::{Path, PathBuf}; use std::process::Command; +use std::time::Duration; use nostr::{EventBuilder, Keys, Kind, Tag}; +use s3::creds::Credentials; +use s3::{Bucket, Region}; fn relay_http_url() -> String { std::env::var("RELAY_HTTP_URL").unwrap_or_else(|_| "http://localhost:3000".to_string()) @@ -101,6 +104,91 @@ fn git(args: &[&str], cwd: &Path, owner_nsec: &str) -> String { String::from_utf8_lossy(&out.stdout).into_owned() } +struct GitS3Probe { + bucket: Box, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct PointerSnapshot { + etag: String, + digest: String, +} + +impl GitS3Probe { + fn from_env() -> Self { + let endpoint = std::env::var("SPROUT_GIT_S3_ENDPOINT") + .or_else(|_| std::env::var("SPROUT_S3_ENDPOINT")) + .unwrap_or_else(|_| "http://localhost:9000".to_string()); + let access_key = std::env::var("SPROUT_GIT_S3_ACCESS_KEY") + .or_else(|_| std::env::var("SPROUT_S3_ACCESS_KEY")) + .unwrap_or_else(|_| "sprout_dev".to_string()); + let secret_key = std::env::var("SPROUT_GIT_S3_SECRET_KEY") + .or_else(|_| std::env::var("SPROUT_S3_SECRET_KEY")) + .unwrap_or_else(|_| "sprout_dev_secret".to_string()); + let bucket = std::env::var("SPROUT_GIT_S3_BUCKET") + .or_else(|_| std::env::var("SPROUT_S3_BUCKET")) + .unwrap_or_else(|_| "sprout-media".to_string()); + + let region = Region::Custom { + region: "us-east-1".into(), + endpoint, + }; + let creds = Credentials::new(Some(&access_key), Some(&secret_key), None, None, None) + .expect("S3 credentials"); + let bucket = Bucket::new(&bucket, region, creds) + .expect("S3 bucket") + .with_path_style(); + Self { bucket } + } + + fn pointer_key(owner: &str, repo: &str) -> String { + let repo = repo.strip_suffix(".git").unwrap_or(repo); + format!("repos/{owner}/{repo}/pointer") + } + + async fn pointer(&self, owner: &str, repo: &str) -> Option { + let key = Self::pointer_key(owner, repo); + match self.bucket.get_object(&key).await { + Ok(resp) => { + let etag = resp + .headers() + .iter() + .find(|(k, _)| k.eq_ignore_ascii_case("etag")) + .map(|(_, v)| v.to_string()) + .expect("pointer GET must include ETag"); + let digest = String::from_utf8(resp.to_vec()).expect("pointer body utf-8"); + assert_eq!(digest.len(), 64, "pointer body is manifest digest"); + assert!(digest.chars().all(|c| c.is_ascii_hexdigit())); + Some(PointerSnapshot { etag, digest }) + } + Err(s3::error::S3Error::HttpFailWithBody(404, _)) => None, + Err(e) => panic!("GET S3 pointer {key} failed: {e}"), + } + } + + async fn require_pointer(&self, owner: &str, repo: &str) -> PointerSnapshot { + for _ in 0..40 { + if let Some(p) = self.pointer(owner, repo).await { + self.assert_manifest_exists(&p.digest).await; + return p; + } + tokio::time::sleep(Duration::from_millis(250)).await; + } + panic!( + "S3 manifest pointer {} never appeared; git may have fallen back to disk", + Self::pointer_key(owner, repo) + ); + } + + async fn assert_manifest_exists(&self, digest: &str) { + let key = format!("manifests/{digest}"); + match self.bucket.get_object(&key).await { + Ok(_) => {} + Err(e) => panic!("pointer named manifest {key}, but GET failed: {e}"), + } + } +} + #[tokio::test] #[ignore = "requires live relay + MinIO + git"] async fn git_clone_push_fetch_force_roundtrip() { @@ -110,6 +198,7 @@ async fn git_clone_push_fetch_force_roundtrip() { let owner_hex = owner.public_key().to_hex(); let owner_nsec = owner.secret_key().to_bech32().unwrap(); let repo = format!("e2e-git-{}", std::process::id()); + let s3 = GitS3Probe::from_env(); // Announce the repo (kind:30617) so the relay creates the bare repo + hook. let announce = EventBuilder::new( @@ -136,6 +225,7 @@ async fn git_clone_push_fetch_force_roundtrip() { ); let clone1 = tmp.path().join("clone1"); assert!(clone1.exists(), "clone1 created"); + let empty_pointer = s3.require_pointer(&owner_hex, &repo).await; // 2. Push an initial commit. std::fs::write(clone1.join("README.md"), "hello\n").unwrap(); @@ -147,6 +237,11 @@ async fn git_clone_push_fetch_force_roundtrip() { ); git(&["branch", "-M", "main"], &clone1, &owner_nsec); git(&["push", "--quiet", "origin", "main"], &clone1, &owner_nsec); + let p1 = s3.require_pointer(&owner_hex, &repo).await; + assert_ne!( + p1, empty_pointer, + "initial push must advance S3 manifest pointer" + ); let sha1 = git(&["rev-parse", "main"], &clone1, &owner_nsec) .trim() .to_string(); @@ -177,6 +272,8 @@ async fn git_clone_push_fetch_force_roundtrip() { &owner_nsec, ); git(&["push", "--quiet", "origin", "main"], &clone1, &owner_nsec); + let p2 = s3.require_pointer(&owner_hex, &repo).await; + assert_ne!(p2, p1, "second push must advance S3 manifest pointer"); let sha2 = git(&["rev-parse", "main"], &clone1, &owner_nsec) .trim() .to_string(); @@ -203,6 +300,8 @@ async fn git_clone_push_fetch_force_roundtrip() { &clone1, &owner_nsec, ); + let p3 = s3.require_pointer(&owner_hex, &repo).await; + assert_ne!(p3, p2, "force push must advance S3 manifest pointer"); assert_ne!(sha_f, sha2); // 6. A new clone after the force-push gets the rewritten history. @@ -220,6 +319,8 @@ async fn git_clone_push_fetch_force_roundtrip() { // 7. Tag push survives the round-trip. git(&["tag", "v1.0"], &clone1, &owner_nsec); git(&["push", "--quiet", "origin", "v1.0"], &clone1, &owner_nsec); + let p4 = s3.require_pointer(&owner_hex, &repo).await; + assert_ne!(p4, p3, "tag push must advance S3 manifest pointer"); git( &["clone", "--quiet", &url, "clone4"], tmp.path(), @@ -238,6 +339,7 @@ async fn git_concurrent_push_one_wins_and_repo_recovers() { let owner_hex = owner.public_key().to_hex(); let owner_nsec = owner.secret_key().to_bech32().unwrap(); let repo = format!("e2e-git-concurrent-{}", std::process::id()); + let s3 = GitS3Probe::from_env(); let announce = EventBuilder::new( Kind::from(30617), @@ -262,11 +364,13 @@ async fn git_concurrent_push_one_wins_and_repo_recovers() { git(&["commit", "--quiet", "-m", "base"], &seed, &owner_nsec); git(&["branch", "-M", "main"], &seed, &owner_nsec); git(&["push", "--quiet", "origin", "main"], &seed, &owner_nsec); + let base_pointer = s3.require_pointer(&owner_hex, &repo).await; let base_sha = git(&["rev-parse", "main"], &seed, &owner_nsec) .trim() .to_string(); let contenders = 8usize; + let mut contenders_info = Vec::new(); for i in 0..contenders { let dir = format!("c{i}"); git(&["clone", "--quiet", &url, &dir], tmp.path(), &owner_nsec); @@ -282,6 +386,10 @@ async fn git_concurrent_push_one_wins_and_repo_recovers() { &worktree, &owner_nsec, ); + let sha = git(&["rev-parse", "main"], &worktree, &owner_nsec) + .trim() + .to_string(); + contenders_info.push((i, sha)); } let mut children = Vec::new(); @@ -297,18 +405,29 @@ async fn git_concurrent_push_one_wins_and_repo_recovers() { })); } - let mut successes = 0usize; + let mut successes = Vec::new(); let mut failures = 0usize; - for child in children { + for (i, child) in children.into_iter().enumerate() { let out = child.join().expect("push thread panicked"); if out.status.success() { - successes += 1; + successes.push(i); } else { failures += 1; } } - assert_eq!(successes, 1, "exactly one concurrent push should win"); + assert_eq!(successes.len(), 1, "exactly one concurrent push should win"); assert_eq!(failures, contenders - 1, "the rest should lose cleanly"); + let winner_index = successes[0]; + let winner_sha = contenders_info + .iter() + .find_map(|(i, sha)| (*i == winner_index).then_some(sha.clone())) + .expect("winner sha recorded"); + + let after_pointer = s3.require_pointer(&owner_hex, &repo).await; + assert_ne!( + after_pointer, base_pointer, + "winning push must advance S3 manifest pointer" + ); git( &["clone", "--quiet", &url, "after"], @@ -320,6 +439,10 @@ async fn git_concurrent_push_one_wins_and_repo_recovers() { .trim() .to_string(); assert_ne!(after_sha, base_sha, "winner advanced main"); + assert_eq!( + after_sha, winner_sha, + "published head must equal the successful contender's tip" + ); let log = git( &["log", "--oneline", "--decorate", "-1"], &after, From 61cc38f6b921938a8a943db96dd9306628107815 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 13:18:57 -0400 Subject: [PATCH 22/24] ci(desktop-e2e): cap git conformance probe at 8-way concurrency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The startup A3 conformance gate races race_width parallel CAS updates against the object store (default 32). The Desktop E2E job's MinIO couldn't service 32 simultaneous fresh connections at boot — one racer hit a reqwest send error (transport, not a conformance violation) and the fail-closed probe aborted relay startup. 8-way still exercises all four probe phases (sequential / if_match_race / if_none_match_race / etag_consistency) and proves A1/A2/A3; it just doesn't burst the connection pool. Matches the dev default. Follow-up (separate commit): the probe should classify transport errors (reqwest/http/io send failures) distinctly from conformance signals and bounded-retry them, so a saturated backend never gates startup at any width. Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 68661c0c9..5dda23bc8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -198,6 +198,7 @@ jobs: SPROUT_BIND_ADDR=0.0.0.0:3000 \ SPROUT_REQUIRE_AUTH_TOKEN=false \ SPROUT_RECONCILE_CHANNELS=true \ + SPROUT_GIT_PROBE_WRITERS=8 \ ./target/ci/sprout-relay > /tmp/sprout-relay.log 2>&1 & echo $! > /tmp/sprout-relay.pid for attempt in $(seq 1 60); do From 0ad56eba9557195e09d494eb3532c720ea74c246 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 14:00:53 -0400 Subject: [PATCH 23/24] feat(relay/git): drop dead on-disk bare repo from kind:30617 announce MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR's thesis is "no persistent per-repo disk state — runtime reads and writes hydrate an ephemeral bare repo from object storage per request." But handle_git_repo_announcement still wrote one: git init --bare + four git config calls + pre-receive hook install at git_repo_path//.git/. Nothing in the runtime ever read it — info_refs / upload_pack / receive_pack all go through hydrate_for_read / hydrate_for_write into a tempfile::TempDir and do their own init + hook per request. The on-disk repo was dead state that contradicted the architecture it shipped under. It wasn't a clean delete: repo_dir.exists() was the "already announced?" check and the owner subdir's entry-count was the per-pubkey quota, so removing the bare repo naively would have broken both. Collapse both onto the .names/ registry, which already existed for name uniqueness: .names//owner records the announcer. One mechanism now serves three jobs — atomic-create uniqueness, same-owner idempotent re-announce, and quota-by-owner. The path-canonicalization block (only there to guard the now-gone repo_dir) and the tokio::process::Command import go with it. git_repo_path now holds only the name index; doc comments on the handler, the dispatch arm, and the config field updated to say so. The one local-disk simplification (separate instances each grant a name; the CAS pointer, not this registry, prevents ref-state corruption) is called out in-code as the multi-instance follow-up. Net -114 LOC. Verified: full cargo test -p sprout-relay (239 passed), clippy -D warnings clean, fmt clean, and the live MinIO e2e (git_clone_push_fetch_force_roundtrip + concurrent-push no-fork race) green against a relay running this binary — with zero bare repos created on disk, only .names//owner reservations. Co-authored-by: Eva Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/config.rs | 5 +- .../sprout-relay/src/handlers/side_effects.rs | 265 +++++------------- 2 files changed, 78 insertions(+), 192 deletions(-) diff --git a/crates/sprout-relay/src/config.rs b/crates/sprout-relay/src/config.rs index 23b2cf4a7..dc0ca9c04 100644 --- a/crates/sprout-relay/src/config.rs +++ b/crates/sprout-relay/src/config.rs @@ -100,8 +100,9 @@ pub struct Config { pub ephemeral_ttl_override: Option, // ── Git server configuration ───────────────────────────────────────────── - /// Root directory for bare git repositories. - /// Repos are stored at `{git_repo_path}/{owner_hex}/{repo_id}.git/`. + /// Root directory for the relay's local git state. No per-repo bare repos + /// live here — runtime reads/writes hydrate ephemeral repos from object + /// storage. Holds only the name-reservation index at `{git_repo_path}/.names/`. pub git_repo_path: std::path::PathBuf, /// Maximum pack file size for git push (bytes). Default: 500 MB. pub git_max_pack_bytes: u64, diff --git a/crates/sprout-relay/src/handlers/side_effects.rs b/crates/sprout-relay/src/handlers/side_effects.rs index 6aa7e101a..41b52ad24 100644 --- a/crates/sprout-relay/src/handlers/side_effects.rs +++ b/crates/sprout-relay/src/handlers/side_effects.rs @@ -86,7 +86,7 @@ pub async fn handle_side_effects( } 9021 => handle_join_request(event, state).await, 9022 => handle_leave_request(event, state).await, - // NIP-34: Git repo announcement → create bare repo on disk. + // NIP-34: Git repo announcement → reserve name + seed manifest pointer. KIND_GIT_REPO_ANNOUNCEMENT => handle_git_repo_announcement(event, state).await, // kind:7 (reaction) handled inline in ingest_event() before storage. _ => Ok(()), @@ -1653,18 +1653,16 @@ fn validate_repo_id(repo_id: &str) -> bool { /// Handle kind:30617 (NIP-34 Git Repository Announcement). /// -/// Creates a bare git repo on disk when a repo announcement event is stored. -/// The event's `d` tag is the repo identifier; the pubkey is the owner. +/// Reserves the repo name and seeds its empty-manifest pointer when a repo +/// announcement event is stored. The event's `d` tag is the repo identifier; +/// the pubkey is the owner. No bare repo is created on disk — runtime reads +/// and writes hydrate an ephemeral repo from object storage per request. /// /// Security hardening: /// - Repo name validated: `[a-zA-Z0-9._-]{1,64}`, no leading dots, no `..` -/// - Owner pubkey validated: exactly 64 lowercase hex chars -/// - Path canonicalized and verified to start with repo root -/// - Pre-receive hook installed for permission enforcement (only hook enabled) +/// - Name reserved atomically (`.names/`), unique across owners /// - Per-pubkey repo count limit enforced async fn handle_git_repo_announcement(event: &Event, state: &Arc) -> anyhow::Result<()> { - use tokio::process::Command; - // Extract repo identifier from d tag (required for NIP-33 parameterized replaceable events). let repo_id = extract_tag_value(event, "d").ok_or_else(|| anyhow::anyhow!("kind:30617 missing d tag"))?; @@ -1677,53 +1675,74 @@ async fn handle_git_repo_announcement(event: &Event, state: &Arc) -> a let owner_hex = nostr::util::hex::encode(event.pubkey.serialize()); - // Resolve repo path. + // The relay holds no persistent per-repo disk state: runtime reads and + // writes hydrate an ephemeral bare repo from object storage per request + // (see `api::git::hydrate`). Announce only (1) reserves the repo name and + // (2) seeds the empty-manifest pointer that makes the repo clone-able. + // + // `.names/` is the relay's name registry. Each reservation holds + // an `owner` file naming the announcer. It serves three jobs at once: + // - uniqueness: `create_dir` is atomic, so concurrent kind:30617 events + // for the same name can't both claim it (TOCTOU-free); + // - idempotent re-announce: a reservation owned by the same pubkey is an + // update, not a collision; + // - per-pubkey quota: count the reservations whose `owner` matches. + // + // This is the one local-disk simplification in v1: separate relay + // instances with separate disks would each grant the name, with the CAS + // pointer (not this registry) preventing actual ref-state corruption. A + // CAS-backed name index is the multi-instance follow-up. let git_repo_root = &state.config.git_repo_path; + let names_dir = git_repo_root.join(".names"); + std::fs::create_dir_all(&names_dir) + .map_err(|e| anyhow::anyhow!("failed to create name reservation index: {e}"))?; - // Defensive: ensure the configured root exists. Config bootstrap creates - // this at startup, but a misconfigured deployment or out-of-band deletion - // would otherwise cause every canonicalize() below to fail and the - // side-effect to be silently swallowed by the ingest pipeline, leaving the - // repo announcement stored but no bare repo on disk (push then 500s with - // "git service misconfigured"). - if let Err(e) = std::fs::create_dir_all(git_repo_root) { - return Err(anyhow::anyhow!( - "failed to ensure git_repo_path {} exists: {e}", - git_repo_root.display() - )); + let reservation = names_dir.join(&repo_id); + let owner_marker = reservation.join("owner"); + + // Re-announce by the same owner is a no-op update; a name held by anyone + // else is a collision (the relay signs kind:30618 with d-tag = repo_name, + // so a shared name would let one owner overwrite another's ref state). + if reservation.exists() { + match std::fs::read_to_string(&owner_marker) { + Ok(existing) if existing == owner_hex => { + info!( + repo_id = %repo_id, + owner = %owner_hex, + "kind:30617 repo announcement updated (name already reserved)" + ); + return Ok(()); + } + _ => { + return Err(anyhow::anyhow!( + "repo name '{repo_id}' already taken by another owner" + )); + } + } } - let repo_dir = git_repo_root - .join(&owner_hex) - .join(format!("{repo_id}.git")); - - // If repo already exists, this is an update to the announcement — nothing to do on disk. - // Backfill the name reservation if missing (handles upgrade from pre-uniqueness-check state). - if repo_dir.exists() { - let names_dir = git_repo_root.join(".names"); - let _ = std::fs::create_dir_all(&names_dir); - let _ = std::fs::create_dir(names_dir.join(&repo_id)); - info!( - repo_id = %repo_id, - owner = %owner_hex, - "kind:30617 repo announcement updated (repo already exists)" - ); - return Ok(()); + // Per-pubkey repo count limit: reservations owned by this pubkey. + let limit = state.config.git_max_repos_per_pubkey as usize; + let owned = std::fs::read_dir(&names_dir) + .map(|entries| { + entries + .filter_map(|e| e.ok()) + .filter(|e| { + std::fs::read_to_string(e.path().join("owner")) + .map(|o| o == owner_hex) + .unwrap_or(false) + }) + .count() + }) + .unwrap_or(0); + if owned >= limit { + return Err(anyhow::anyhow!("repo limit exceeded: {owned} >= {limit}")); } - // Global uniqueness: repo names are unique across all owners (relay = single namespace). - // The relay signs kind:30618 ref-state with d-tag = repo_name, so collisions would - // cause one owner's ref state to overwrite another's. - // - // Atomicity: std::fs::create_dir fails with AlreadyExists if the directory exists, - // preventing TOCTOU races between concurrent kind:30617 events. The .names/ directory - // acts as a global name reservation index. - let names_dir = git_repo_root.join(".names"); - std::fs::create_dir_all(&names_dir) - .map_err(|e| anyhow::anyhow!("failed to create name reservation index: {e}"))?; - let reservation = names_dir.join(&repo_id); + // Claim the name. `create_dir` (not `create_dir_all`) fails AlreadyExists + // if a concurrent announce won the race, closing the TOCTOU window above. match std::fs::create_dir(&reservation) { - Ok(()) => {} // Name claimed successfully. + Ok(()) => {} Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { return Err(anyhow::anyhow!( "repo name '{repo_id}' already taken by another owner" @@ -1735,142 +1754,11 @@ async fn handle_git_repo_announcement(event: &Event, state: &Arc) -> a )); } } - - // Per-pubkey repo count limit. - let owner_dir = git_repo_root.join(&owner_hex); - if owner_dir.exists() { - let count = std::fs::read_dir(&owner_dir) - .map(|entries| entries.filter_map(|e| e.ok()).count()) - .unwrap_or(0); - let limit = state.config.git_max_repos_per_pubkey as usize; - if count >= limit { - let _ = std::fs::remove_dir(&reservation); - return Err(anyhow::anyhow!("repo limit exceeded: {count} >= {limit}")); - } - } - - // Create parent directory. - if let Err(e) = tokio::fs::create_dir_all(&repo_dir).await { - let _ = std::fs::remove_dir(&reservation); - return Err(anyhow::anyhow!( - "failed to create repo directory {}: {e}", - repo_dir.display() - )); - } - - // Path canonicalization: verify resolved path is under the repo root. - let canonical_root = match git_repo_root.canonicalize() { - Ok(p) => p, - Err(e) => { - let _ = tokio::fs::remove_dir_all(&repo_dir).await; - let _ = std::fs::remove_dir(&reservation); - return Err(anyhow::anyhow!("failed to canonicalize repo root: {e}")); - } - }; - let canonical_repo = match repo_dir.canonicalize() { - Ok(p) => p, - Err(e) => { - let _ = tokio::fs::remove_dir_all(&repo_dir).await; - let _ = std::fs::remove_dir(&reservation); - return Err(anyhow::anyhow!("failed to canonicalize repo path: {e}")); - } - }; - if !canonical_repo.starts_with(&canonical_root) { - let _ = tokio::fs::remove_dir_all(&repo_dir).await; - let _ = std::fs::remove_dir(&reservation); - return Err(anyhow::anyhow!( - "repo path escapes root: {} not under {}", - canonical_repo.display(), - canonical_root.display() - )); - } - - // Initialize bare repo with main as default branch. - let output = match Command::new("git") - .arg("init") - .arg("--bare") - .arg("-b") - .arg("main") - .arg(&repo_dir) - .env_clear() - .env("PATH", std::env::var("PATH").unwrap_or_default()) - .env("GIT_CONFIG_NOSYSTEM", "1") - .env("GIT_CONFIG_GLOBAL", "/dev/null") - .env("HOME", "/dev/null") - .output() - .await - { - Ok(o) => o, - Err(e) => { - let _ = tokio::fs::remove_dir_all(&repo_dir).await; - let _ = std::fs::remove_dir(&reservation); - return Err(anyhow::anyhow!("git init --bare failed to spawn: {e}")); - } - }; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - let _ = tokio::fs::remove_dir_all(&repo_dir).await; - let _ = std::fs::remove_dir(&reservation); - return Err(anyhow::anyhow!("git init --bare failed: {stderr}")); - } - - // Git config for Smart HTTP compatibility. - for (key, value) in [ - ("http.receivepack", "true"), - ("receive.denyNonFastForwards", "false"), - ("uploadpack.allowTipSHA1InWant", "true"), - ("uploadpack.allowReachableSHA1InWant", "true"), - ] { - match Command::new("git") - .args(["config", "--file"]) - .arg(repo_dir.join("config")) - .args([key, value]) - .env_clear() - .env("PATH", std::env::var("PATH").unwrap_or_default()) - .env("GIT_CONFIG_NOSYSTEM", "1") - .env("GIT_CONFIG_GLOBAL", "/dev/null") - .env("HOME", "/dev/null") - .output() - .await - { - Ok(output) if !output.status.success() => { - tracing::warn!( - key, - value, - status = %output.status, - "git config failed for bare repo" - ); - } - Err(e) => { - tracing::warn!( - key, - value, - error = %e, - "git config command failed for bare repo" - ); - } - _ => {} - } + if let Err(e) = std::fs::write(&owner_marker, &owner_hex) { + let _ = std::fs::remove_dir_all(&reservation); + return Err(anyhow::anyhow!("failed to record repo owner: {e}")); } - // Install pre-receive hook for permission enforcement. - // This replaces the old "disable all hooks" approach — we now have our own - // hook that calls back to the relay's internal policy endpoint. - // Only pre-receive is installed; all other hook slots remain empty (RCE prevention). - // SECURITY: Hook installation is FATAL. If the hook can't be installed, - // the repo would be unprotected. Better to fail repo creation than allow - // an unprotected repo to exist. The receive_pack handler also checks hook - // existence as a belt-and-suspenders measure. - crate::api::git::hook::install_hook(&repo_dir) - .await - .map_err(|e| { - // Clean up the repo directory since it's unusable without the hook. - let _ = std::fs::remove_dir_all(&repo_dir); - let _ = std::fs::remove_dir(&reservation); - anyhow::anyhow!("failed to install pre-receive hook: {e}") - })?; - // Seed the empty-manifest pointer in object storage. Establishes the // invariant "repo announced ⟺ pointer exists" so the read path can rely // on pointer-absent meaning never-announced (not just no-pushes-yet), @@ -1879,20 +1767,17 @@ async fn handle_git_repo_announcement(event: &Event, state: &Arc) -> a seed_manifest_pointer(state, &owner_hex, &repo_id) .await .map_err(|e| { - // Pointer seed failure leaves the on-disk repo + name reservation - // without a clone-able pointer, which is exactly the broken state - // the seed exists to prevent. Unwind both so the announce is - // either fully consummated or fully rolled back. - let _ = std::fs::remove_dir_all(&repo_dir); - let _ = std::fs::remove_dir(&reservation); + // A reserved name without a clone-able pointer is exactly the + // broken state the seed exists to prevent. Release the reservation + // so the announce is either fully consummated or fully rolled back. + let _ = std::fs::remove_dir_all(&reservation); anyhow::anyhow!("failed to seed manifest pointer: {e}") })?; info!( repo_id = %repo_id, owner = %owner_hex, - path = %repo_dir.display(), - "bare git repo created from kind:30617 announcement" + "kind:30617 repo announced (name reserved, manifest pointer seeded)" ); // Derived after the pointer commits: kind:30618 ref-state event over the From e6a285d0e02076db50e7adb6ed27c7b343d85cac Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Fri, 22 May 2026 14:29:08 -0400 Subject: [PATCH 24/24] fix(relay/git/store): drop-and-floor classification for conformance probe races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe was conflating transport failures with conformance failures. `if_match_race` / `if_none_match_race` did `Err(e) → ProbeFailure` for any per-racer error, but `S3Error::Reqwest/Http/Io` means the racer never got a classified response from the backend — its outcome is *unknown*, not negative. A3 is a claim about **observers** ("exactly one CAS wins per (key, etag)"); an unknown racer isn't an observer. Treating one as a probe failure made the relay un-bootable on any constrained host that couldn't service the burst (Eva caught this on Desktop E2E at 32-way against a CI-box MinIO). Fix is drop-and-floor: - Pre-classification transport errors (`S3Error::{Reqwest, Http, Io}`) drop the racer from the observer set and increment `transport_drops`. - Post-receipt parse/decode errors (`Utf8`, `ReqwestHeaderToStr`, `SerdeXml`, `FromUtf8`, `InvalidHeaderValue`) and any `HttpFailWithBody` stay in the catch-all — the backend *did* answer, but not in the contract shape, which is a real conformance signal. - Per-round floor: `classified >= 2` (A3 needs ≥2 observers to *see* a race; with 0 classified the probe didn't run at all — fail closed). - `winners == 1` among classified observers (was: among all racers). - Create-only race contract is now `twos == 1 && twelves == classified - 1` (was: fixed `race_width - 1`, which false-positived on any transport drop). `ProbeReport` gains `transport_drops: usize` (total across both race phases), surfaced in the startup admission log line — a slowly- degrading backend shows up as "admitted with degraded observation count" before it's a probe failure. The shape is strictly smaller than the alternatives the team considered: no per-racer or per-round retry, no retry policy parameters, no startup-latency cost under flake. Crucially it doesn't smuggle a network-stack test into a correctness probe — retrying a classified outcome would change the TLA action and violate the same "retry only pre-classification errors" rule Sami's `cas_publish` guidance enforces. Eva's runtime workaround (`SPROUT_GIT_PROBE_WRITERS=8` on Desktop E2E, already shipped on 61cc38f6) stays as a deployment-level knob, but the probe is no longer fragile to it. Diff scope: `store.rs` (the rewrite) + one log-line touch in `main.rs` (folding `transport_drops` into the admission log). 239 relay lib tests still green, clippy `--tests -D warnings` clean, fmt clean. Refs: - Channel discussion: PR #726 thread, shape (c) drop-and-floor agreed by Quinn + Max + Eva (the latter accepting the framing flip from her original (a)/(b) retry proposals). - docs/git-on-object-storage.md §Conformance, §Implementation Correspondence (retry rule: "if added, lives in the store layer and retries *only* pre-classification network errors"). Co-authored-by: Quinn Co-authored-by: Tyler Longwell Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-relay/src/api/git/store.rs | 119 +++++++++++++++++++++-- crates/sprout-relay/src/main.rs | 1 + 2 files changed, 112 insertions(+), 8 deletions(-) diff --git a/crates/sprout-relay/src/api/git/store.rs b/crates/sprout-relay/src/api/git/store.rs index 788616b57..af892352b 100644 --- a/crates/sprout-relay/src/api/git/store.rs +++ b/crates/sprout-relay/src/api/git/store.rs @@ -115,6 +115,21 @@ pub struct ProbeReport { pub race_width: usize, /// Rounds executed per race phase. pub race_rounds: usize, + /// Total number of *transport-unknown* per-racer outcomes across all + /// race rounds (sum of both `if_match_race` and `if_none_match_race` + /// phases). A "transport-unknown" is a pre-classification failure — + /// `S3Error::{Reqwest, Http, Io}` — that means the racer never got a + /// classified response from the backend, so its outcome is neither + /// evidence for nor against A3 linearizability. Such racers are + /// dropped from the observer set (see the race phases for the + /// invariant: `classified >= 2` and `winners == 1` *among classified + /// observers*). + /// + /// Surfaced on the admission log line so a slowly-degrading backend + /// shows up before it's a probe failure: a passing probe with + /// non-zero `transport_drops` is "admitted with degraded + /// observation count," not silently flaky. + pub transport_drops: usize, } /// Failure carrying the phase that failed plus enough context to diagnose. @@ -405,6 +420,9 @@ impl GitStore { } let nonce = uuid::Uuid::new_v4(); let pointer_key = format!("probe/pointer-{nonce}"); + // Accumulator for *transport-unknown* per-racer outcomes across both + // race phases. See `ProbeReport::transport_drops` for the rationale. + let mut transport_drops = 0usize; // -- Phase 1: sequential -------------------------------------------------- for round in 0..cfg.race_rounds { @@ -460,15 +478,42 @@ impl GitStore { tasks.push(async move { me.put_pointer(&pkey, &body, Precond::IfMatch(et)).await }); } let outcomes = futures_util::future::join_all(tasks).await; + // Drop-and-floor classification. A `Reqwest`/`Http`/`Io` error + // means the racer never got a classified response from the + // backend (couldn't open a socket, send flaked, etc.); its + // outcome is *unknown*, not negative. A3 is a claim about + // **observers**: dropping unknowns from the observer set + // sharpens the assertion ("exactly one winner among observers") + // and avoids smuggling a network-stack test into the + // conformance probe. Parse/decode errors (`Utf8`, + // `ReqwestHeaderToStr`, `SerdeXml`, ...) and `HttpFailWithBody` + // stay in the catch-all — those mean the backend *did* answer + // but not in the contract shape, which is a real conformance + // signal. + let mut classified = 0usize; let mut winners = 0usize; let mut new_etag: Option = None; for (i, outcome) in outcomes.into_iter().enumerate() { match outcome { Ok(CasOutcome::Won(e)) => { + classified += 1; winners += 1; new_etag = Some(e); } - Ok(CasOutcome::LostRace) => {} + Ok(CasOutcome::LostRace) => { + classified += 1; + } + Err(StoreError::Backend( + S3Error::Reqwest(_) | S3Error::Http(_) | S3Error::Io(_), + )) => { + transport_drops += 1; + tracing::warn!( + phase = "if_match_race", + round, + racer = i, + "transport drop (pre-classification: socket/send failure)" + ); + } Err(e) => { return Err(ProbeFailure { phase: "if_match_race", @@ -480,12 +525,29 @@ impl GitStore { } } } + // A3 needs ≥2 observers to *see* a race. With 31/32 classified + // and 1 transport drop, the race is well-observed; with 0/32 + // classified the probe didn't run at all — fail closed. + if classified < 2 { + return Err(ProbeFailure { + phase: "if_match_race", + round, + key: pointer_key, + reason: format!( + "race not observed: classified={classified}, transport_drops={}", + cfg.race_width - classified + ), + } + .into()); + } if winners != 1 { return Err(ProbeFailure { phase: "if_match_race", round, key: pointer_key, - reason: format!("expected exactly 1 winner, got {winners}"), + reason: format!( + "expected exactly 1 winner among {classified} classified observers, got {winners}" + ), } .into()); } @@ -509,12 +571,24 @@ impl GitStore { tasks.push(async move { me.put_immutable_raw(&k, &b).await }); } let results = futures_util::future::join_all(tasks).await; + // Drop-and-floor: same classification rule as Phase 2. Drop + // `Reqwest`/`Http`/`Io` (pre-classification — socket/send + // failure); count 2xx + 412 as the classified observers. Any + // other status or any non-transport `StoreError` is a real + // conformance signal and fails closed. + let mut classified = 0usize; let mut twos = 0usize; let mut twelves = 0usize; for (i, r) in results.into_iter().enumerate() { match r { - Ok(200..=299) => twos += 1, - Ok(412) => twelves += 1, + Ok(200..=299) => { + classified += 1; + twos += 1; + } + Ok(412) => { + classified += 1; + twelves += 1; + } Ok(code) => { return Err(ProbeFailure { phase: "if_none_match_race", @@ -524,25 +598,53 @@ impl GitStore { } .into()) } + Err(StoreError::Backend( + S3Error::Reqwest(_) | S3Error::Http(_) | S3Error::Io(_), + )) => { + transport_drops += 1; + tracing::warn!( + phase = "if_none_match_race", + round, + racer = i, + "transport drop (pre-classification: socket/send failure)" + ); + } Err(e) => { return Err(ProbeFailure { phase: "if_none_match_race", round, key, - reason: format!("racer {i}: {e}"), + reason: format!("racer {i} backend error: {e}"), } .into()) } } } - if twos != 1 || twelves != cfg.race_width - 1 { + // Floor: A3 needs ≥2 observers to *see* a race. + if classified < 2 { + return Err(ProbeFailure { + phase: "if_none_match_race", + round, + key, + reason: format!( + "race not observed: classified={classified}, transport_drops={}", + cfg.race_width - classified + ), + } + .into()); + } + // Create-only contract: exactly 1×2xx + (classified − 1)×412 + // *among observers*. The previous fixed `race_width − 1` would + // false-positive on any transport drop; this expression honors + // the drop-and-floor invariant. + if twos != 1 || twelves != classified - 1 { return Err(ProbeFailure { phase: "if_none_match_race", round, key, reason: format!( - "expected 1×2xx + {}×412, got {twos}×2xx + {twelves}×412", - cfg.race_width - 1 + "expected 1×2xx + {}×412 among {classified} classified observers, got {twos}×2xx + {twelves}×412", + classified - 1 ), } .into()); @@ -607,6 +709,7 @@ impl GitStore { Ok(ProbeReport { race_width: cfg.race_width, race_rounds: cfg.race_rounds, + transport_drops, }) } diff --git a/crates/sprout-relay/src/main.rs b/crates/sprout-relay/src/main.rs index 395b5f07d..f11d05a26 100644 --- a/crates/sprout-relay/src/main.rs +++ b/crates/sprout-relay/src/main.rs @@ -258,6 +258,7 @@ async fn main() -> anyhow::Result<()> { tracing::info!( race_width = report.race_width, race_rounds = report.race_rounds, + transport_drops = report.transport_drops, "git object-store backend admitted: A3 conformance probe passed" ); }