From 6d001cc731df2027052d4e18cdb3152f8bc95b06 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 08:07:53 +0000 Subject: [PATCH 1/4] spec: relay capability document (NIP-11-style HTTP sidecar) Adds docs/specs/2026-04-24-relay-capability-doc.md describing a plain- HTTP GET /.well-known/willow endpoint that Willow relays serve so clients can negotiate protocol version, discover limits, and learn about auth/payment/invite gates before opening a TCP or WebSocket connection. Covers: endpoint path + Content-Type rationale, full WillowRelayInfo Rust schema with Limitation and Retention sub-structs, two-axis version negotiation (wire framing + event schema), CORS + caching (ETag, 60 s max-age), error modes including degraded/read-only/404, operator-controlled security posture, and a three-tier test plan (serde unit, relay integration, browser fetch). Six open questions on signing, multi-tenancy, relay discovery, payment proofs, feature registry, and utilisation signalling. Co-authored-by: Claude --- docs/specs/2026-04-24-relay-capability-doc.md | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 docs/specs/2026-04-24-relay-capability-doc.md diff --git a/docs/specs/2026-04-24-relay-capability-doc.md b/docs/specs/2026-04-24-relay-capability-doc.md new file mode 100644 index 00000000..95be143f --- /dev/null +++ b/docs/specs/2026-04-24-relay-capability-doc.md @@ -0,0 +1,243 @@ +# Relay Capability Document + +> **One-sentence summary:** Willow relays expose a plain-HTTP +> `/.well-known/willow` JSON document — a NIP-11-style capability +> sidecar — that clients fetch *before* connecting so they can discover +> the relay's protocol versions, limits, auth/payment requirements, and +> operator metadata without a failed-connection round-trip. + +## Motivation + +Today a Willow client opens a TCP or WebSocket connection to the relay +listener in `crates/relay/src/main.rs:128` and *only then* discovers +whether the relay supports its wire version, whether it gates access on +`SyncProvider` permission, whether it happens to be storage-degraded, +or whether its topic cap has been reached. Failure is silent or shows +up as a confusing disconnect — exactly the "why did that connection +fail?" problem that NIP-11 was designed to solve for Nostr. + +A sidecar capability document lets clients pick the right wire +version before the handshake (see `PROTOCOL_VERSION` in +`crates/transport/src/lib.rs:30`); decide whether the user needs an +invite or payment proof before dialling; surface a "degraded / full" +banner; display operator name, contact, and ToS in a settings sheet; +and filter a relay directory without connecting to each candidate. + +## Endpoint + +| Property | Value | +|---|---| +| Path | `/.well-known/willow` | +| Method | `GET` (plus `OPTIONS` for preflight) | +| Response Content-Type | `application/willow+json; charset=utf-8` | +| CORS | required — `Access-Control-Allow-Origin: *` | +| Served on | the public relay HTTP port (default `3340`, configurable via `--relay-port`; see `crates/relay/src/main.rs:87`) | + +**Why `/.well-known/willow` over `/willow-info`?** The relay proxy in +`crates/relay/src/lib.rs:186` already dispatches on request path. +`/.well-known/*` (RFC 8615) gives a stable namespace for future +sidecars (e.g. `/.well-known/willow-payment`) without top-level +collisions. + +**Why a dedicated content type over `application/json`?** Nostr chose +`Accept: application/nostr+json` because relay and info share a path. +Willow's proxy multiplexes by path, so a distinct path plus a distinct +media type beats Accept-based disambiguation; the `+json` structured +suffix still opts us into generic JSON tooling. + +## Field schema + +The document is a single JSON object. All fields are optional except +`protocol_versions`; a minimal compliant document is +`{"protocol_versions":[1]}`. **Clients MUST ignore unknown top-level +fields** so additions remain forward-compatible. + +```rust +/// Capability document served at GET /.well-known/willow. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct WillowRelayInfo { + // Operator metadata (all optional, display-only). + pub name: Option, // ≤ 60 UTF-8 bytes + pub description: Option, // plain text, no markup + pub contact: Option, // mailto: / https: / matrix: + pub admin_pubkey: Option, // hex Ed25519, operator DM key + pub pubkey: Option, // hex Ed25519, relay's own key + pub software: Option, // project URL + pub version: Option, // semver or git SHA + pub terms_of_service: Option, + pub privacy_policy: Option, + pub icon: Option, // square, ≥ 64×64 + + /// REQUIRED. Wire-protocol versions the relay accepts, highest- + /// preference first. Mirrors `willow_transport::PROTOCOL_VERSION`. + pub protocol_versions: Vec, + + /// Short string feature tags. Initial set: "gossip", "history", + /// "blobs", "voice-signal", "invite-gate", "payment-gate". + #[serde(default)] + pub supported_features: Vec, + + /// Supported `EventKind` schema range `[min, max]` from + /// `crates/state/src/event.rs`. Absent = assume `[1, 1]`. + pub event_schema_range: Option<[u16; 2]>, + + pub limitation: Option, + pub retention: Option, + + pub payments_url: Option, // required iff payment_required + pub invites_url: Option, // required iff invite_required + + /// "ok" | "degraded" | "read_only". "degraded" = up but a worker + /// (e.g. storage) is offline; clients SHOULD still connect. + pub status: Option, + pub status_detail: Option, // human-readable, plain text +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct Limitation { + pub max_message_bytes: Option, // mirrors MAX_DESER_SIZE, + // `crates/transport/src/lib.rs:36` + pub max_topic_len: Option, // MAX_TOPIC_LEN, + // `crates/relay/src/lib.rs:84` + pub max_topics: Option, // MAX_TOPICS, + // `crates/relay/src/lib.rs:80` + pub max_connections: Option, // MAX_CONCURRENT_BOOTSTRAP_ + // CONNECTIONS, lib.rs:59 + pub max_blob_bytes: Option, // 0 = blob pinning off + #[serde(default)] pub invite_required: bool, + #[serde(default)] pub payment_required: bool, + /// Relay drops traffic whose author isn't in its SyncProvider + /// allowlist. The relay CAN'T enforce the state-level grant (it + /// has no DAG), so this is a best-effort operator allowlist. + #[serde(default)] pub sync_provider_only: bool, + pub hlc_lower_limit: Option, // oldest accepted HLC ms + pub min_client_version: Option, // reject older handshakes +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct Retention { + /// "replay" (in-memory, 1000-event cap per server) or "storage" + /// (SQLite). See `docs/specs/2026-03-27-worker-nodes-design.md`. + pub mode: String, + pub max_events_per_server: Option, // null = unbounded + pub max_age_seconds: Option, // null = keep everything + /// Willow default: false. Sealed channel keys stay peer-to-peer. + #[serde(default)] pub channel_key_escrow: bool, +} +``` + +## Versioning + +Two independent version axes travel in this document: + +| Axis | Field | Negotiation rule | +|---|---|---| +| Wire framing (`Envelope`) | `protocol_versions: Vec` | Client picks the highest integer in both its list and the relay's. Empty intersection → refuse to connect, surface a "version mismatch" error. | +| Event schema | `event_schema_range: [min, max]` | Client's active schema MUST lie within `[min, max]`. Outside the range the client either upgrades or treats the relay as a byte-forwarder and disables state replay through it. | + +The capability document itself is unversioned: compatibility comes from +"add fields, never repurpose" plus the mandatory ignore-unknown-fields +rule. A deprecated field graduates to a fresh name; peers that only +understand the old name keep working. + +## CORS + +WASM clients in `crates/web` fetch this endpoint cross-origin, so the +relay MUST respond with the following on both `GET` and `OPTIONS` +preflight: + +``` +Access-Control-Allow-Origin: * +Access-Control-Allow-Methods: GET, OPTIONS +Access-Control-Allow-Headers: Accept, Content-Type, If-None-Match +``` + +This mirrors the pattern already in `handle_bootstrap_connection` +(`crates/relay/src/lib.rs:114`). + +## Caching + +- Response SHOULD carry a weak `ETag` derived from SHA-256 over the + canonical JSON serialisation, and SHOULD honour `If-None-Match` + with `304 Not Modified`. +- Default `Cache-Control: public, max-age=60` — low enough that + operational-status transitions propagate within a minute, high + enough for a relay directory to fan out cheaply. +- Clients MUST NOT cache across `pubkey` changes; the relay's key + is part of the cache identity. + +## Error modes + +| Condition | Status | Body | +|---|---|---| +| OK | `200` | Full `WillowRelayInfo` JSON | +| Storage worker offline | `200` | `status: "degraded"` + `status_detail` | +| Relay shutting down | `503` | `{"status":"read_only","status_detail":"..."}` | +| Older relay without the sidecar | `404` | plain text | +| CORS preflight | `204` | empty, with ACAO/ACAM/ACAH | + +Clients MUST treat `404` as "older relay; assume +`protocol_versions:[1]`, no advertised limits, proceed at your own +risk" so the endpoint is purely additive. + +## Security considerations + +- Every field is operator-controlled. The relay MUST NOT populate + fields from peer-supplied metadata; otherwise a hostile peer can + rewrite the sidecar via injection. +- `admin_pubkey` and `pubkey` are *hints*. Trust is established by + the owner via `GrantPermission { permission: SyncProvider }` (see + `docs/specs/2026-04-12-state-authority-and-mutations.md`). Merely + appearing in the sidecar grants no authority. +- The endpoint is unauthenticated and MUST NOT expose connected-peer + lists, traffic counts, or anything that fingerprints users. +- `status_detail` and `description` are rendered in the web UI — the + client MUST escape them as text, never HTML. +- Rate-limit the endpoint by reusing the existing + `MAX_CONCURRENT_BOOTSTRAP_CONNECTIONS` semaphore + (`crates/relay/src/lib.rs:59`). + +## Tests + +**Unit (serde, in `crates/relay/src/`):** (1) round-trip a fully +populated `WillowRelayInfo` through `serde_json` with byte-for-byte +equality after canonical re-serialisation; (2) parse the minimum +document `{"protocol_versions":[1]}` and assert all other fields are +`None`/empty; (3) parse a document with an unknown top-level key and +assert it is ignored; (4) reject a document missing +`protocol_versions` with a typed error. + +**Integration** (new `crates/relay/tests/capability_endpoint.rs`, +alongside `bootstrap_endpoint.rs`): (1) `GET /.well-known/willow` → +`200`, `Content-Type: application/willow+json`, ACAO/ACAM/ACAH; +(2) `OPTIONS` preflight → `204` + CORS headers; (3) simulate +storage-worker offline → `status == "degraded"`; (4) replay `GET` with +the previous `ETag` via `If-None-Match` → `304`. + +**Browser (`crates/web/tests/browser.rs`):** stub `fetch` with a +document whose `protocol_versions` does not intersect the client's; +mount the connect flow; assert the "connect" button is disabled and a +mismatch banner is rendered. + +## Open questions + +1. **Signed documents.** Ship an Ed25519 signature over the canonical + JSON so clients can pin a relay by key across CDN proxies? Sibling + `/.well-known/willow.sig`, HTTP header, or inline `signature` field + over a canonicalised hash? +2. **Multi-tenant relays.** One host can serve many Willow servers; + per-server document at `/.well-known/willow/{server_id}`, or one + shared document since the relay is topic-agnostic? +3. **Relay discovery.** Advertise `suggested_relays: Vec` so + clients discover siblings after connecting, à la Nostr relay + exchange? +4. **Payment proof format.** Willow has no payments primitive yet — if + we keep `payment_required`, either spec the token format now or + gate the field behind a build flag so operators don't advertise + something no client can satisfy. +5. **`supported_features` registry.** Strings are friendlier than + NIP-11 integers but drift; pin the tag set in `crates/transport` + as an enum, or keep it free-form? +6. **Utilisation signalling.** Advertise current load (e.g. "9 998 / + 10 000 topics used") for client load balancing, or omit to avoid + telemetry leakage? Worth a follow-up spec either way. From e03a1a90272ccf7724bdb9155a9104d8a582922d Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 25 Apr 2026 07:08:54 +0000 Subject: [PATCH 2/4] spec(#215): resolve review feedback - audit pass Apply review decisions to the relay capability document spec: - Promote signing to v1 MUST (inline signature, RFC 8785 JCS canonical bytes, signature field excluded from canonicalisation). - Specify dispatch surgery: explicit branch in dispatch_connection for /.well-known/willow plus OPTIONS preflight; reuse BOOTSTRAP_IO_TIMEOUT and MAX_CONCURRENT_BOOTSTRAP_CONNECTIONS; extend (not mirror) the handle_bootstrap_connection pattern. - Drop event_schema_range (no EVENT_SCHEMA_VERSION exists in willow-state); list as future work. - Resolve multi-tenant question: one shared doc per host, relay is topic-agnostic. - Soften operator-metadata leakage: version is coarse semver, software is project name, both MAY be omitted. - Two-tier caching by status: ok=300s, degraded/read_only=5s with must-revalidate. - Recommend WS clients also send Sec-WebSocket-Protocol; JSON is advisory pre-connect. - Fix port framing: relay binds one port multiplexing TCP+WS, not two. - Drop sync_provider_only (operator vibes without a concrete pre-handshake check). - Add Cross-spec coordination table pinning feature tags for #214, #216, #217, #218, #219, #220, #221. - Rewrite Open Questions to keep only genuinely-open items (paid-relay semantics, utilisation telemetry, relay discovery, feature registry). https://claude.ai/code/session_01XmbVXWnKTRVjPp9kmKRSBn --- docs/specs/2026-04-24-relay-capability-doc.md | 251 ++++++++++++++---- 1 file changed, 197 insertions(+), 54 deletions(-) diff --git a/docs/specs/2026-04-24-relay-capability-doc.md b/docs/specs/2026-04-24-relay-capability-doc.md index 95be143f..5fd5e054 100644 --- a/docs/specs/2026-04-24-relay-capability-doc.md +++ b/docs/specs/2026-04-24-relay-capability-doc.md @@ -8,13 +8,16 @@ ## Motivation -Today a Willow client opens a TCP or WebSocket connection to the relay -listener in `crates/relay/src/main.rs:128` and *only then* discovers -whether the relay supports its wire version, whether it gates access on -`SyncProvider` permission, whether it happens to be storage-degraded, -or whether its topic cap has been reached. Failure is silent or shows -up as a confusing disconnect — exactly the "why did that connection -fail?" problem that NIP-11 was designed to solve for Nostr. +Today a Willow client opens a connection to the relay listener in +`crates/relay/src/main.rs:128` — a single public TCP port (default +`3340`) that multiplexes `/bootstrap-id` plus an HTTP/WebSocket-upgrade +proxy to the loopback iroh-relay — and *only then* discovers whether +the relay supports its wire version, whether it happens to be +storage-degraded, or whether its topic cap has been reached. Failure +is silent or shows up as a confusing disconnect — exactly the "why +did that connection fail?" problem that NIP-11 was designed to solve +for Nostr. The capability document is served on the **same** port as +the relay handshake, not a sidecar port. A sidecar capability document lets clients pick the right wire version before the handshake (see `PROTOCOL_VERSION` in @@ -23,6 +26,30 @@ invite or payment proof before dialling; surface a "degraded / full" banner; display operator name, contact, and ToS in a settings sheet; and filter a relay directory without connecting to each candidate. +## Dispatch surgery (relay) + +Today `dispatch_connection` in `crates/relay/src/lib.rs` carves out +exactly one path — `BOOTSTRAP_ID_PATH = "/bootstrap-id"` — and +forwards everything else to the loopback iroh-relay. As written, a +naive `GET /.well-known/willow` would land in the upstream and 404 +(or be misread as a relay handshake and dropped). Implementation +MUST add: + +1. An explicit branch in `dispatch_connection` matching + `GET /.well-known/willow` and `OPTIONS /.well-known/willow` + *before* the iroh-relay fallthrough. +2. A new handler analogous to `handle_bootstrap_request_after_line` + that emits the JSON body, ETag, and CORS headers (GET) or returns + `204` with full ACAO/ACAM/ACAH (OPTIONS preflight). +3. Reuse of `BOOTSTRAP_IO_TIMEOUT` for socket reads/writes and the + existing `MAX_CONCURRENT_BOOTSTRAP_CONNECTIONS` semaphore for + admission control. No new tuning knobs in v1. + +This is an **extension** of the `handle_bootstrap_connection` pattern, +not a mirror: that handler currently emits ACAO only (no ACAM/ACAH) +and does not respond to `OPTIONS` preflights. Both gaps are closed +here. + ## Endpoint | Property | Value | @@ -62,24 +89,33 @@ pub struct WillowRelayInfo { pub contact: Option, // mailto: / https: / matrix: pub admin_pubkey: Option, // hex Ed25519, operator DM key pub pubkey: Option, // hex Ed25519, relay's own key - pub software: Option, // project URL - pub version: Option, // semver or git SHA + pub software: Option, // project name; operators MAY omit + pub version: Option, // coarse semver (e.g. "0.3.x"); + // operators MAY omit; never a git SHA pub terms_of_service: Option, pub privacy_policy: Option, pub icon: Option, // square, ≥ 64×64 - /// REQUIRED. Wire-protocol versions the relay accepts, highest- - /// preference first. Mirrors `willow_transport::PROTOCOL_VERSION`. + /// REQUIRED. Wire-protocol versions the relay accepts, sorted + /// highest-first, no duplicates. Mirrors + /// `willow_transport::PROTOCOL_VERSION`. pub protocol_versions: Vec, /// Short string feature tags. Initial set: "gossip", "history", - /// "blobs", "voice-signal", "invite-gate", "payment-gate". + /// "blobs", "voice-signal", "invite-gate", "payment-gate". See + /// "Cross-spec coordination" below for the canonical tag table. #[serde(default)] pub supported_features: Vec, - /// Supported `EventKind` schema range `[min, max]` from - /// `crates/state/src/event.rs`. Absent = assume `[1, 1]`. - pub event_schema_range: Option<[u16; 2]>, + /// REQUIRED. Detached Ed25519 signature over the canonical JSON + /// (RFC 8785 JCS) of this object with the `signature` field + /// removed. Encoded as lowercase hex. Signed with the relay's + /// own Ed25519 key (the same `identity` constructed in + /// `crates/relay/src/main.rs:104`); the public half is published + /// in `pubkey`. Closes the "Clients MUST NOT cache across + /// `pubkey` changes" gap and prevents on-path rewrites of + /// `payment_required`, `min_client_version`, etc. + pub signature: String, pub limitation: Option, pub retention: Option, @@ -106,10 +142,6 @@ pub struct Limitation { pub max_blob_bytes: Option, // 0 = blob pinning off #[serde(default)] pub invite_required: bool, #[serde(default)] pub payment_required: bool, - /// Relay drops traffic whose author isn't in its SyncProvider - /// allowlist. The relay CAN'T enforce the state-level grant (it - /// has no DAG), so this is a best-effort operator allowlist. - #[serde(default)] pub sync_provider_only: bool, pub hlc_lower_limit: Option, // oldest accepted HLC ms pub min_client_version: Option, // reject older handshakes } @@ -128,18 +160,58 @@ pub struct Retention { ## Versioning -Two independent version axes travel in this document: - -| Axis | Field | Negotiation rule | -|---|---|---| -| Wire framing (`Envelope`) | `protocol_versions: Vec` | Client picks the highest integer in both its list and the relay's. Empty intersection → refuse to connect, surface a "version mismatch" error. | -| Event schema | `event_schema_range: [min, max]` | Client's active schema MUST lie within `[min, max]`. Outside the range the client either upgrades or treats the relay as a byte-forwarder and disables state replay through it. | +`protocol_versions: Vec` is the sole negotiated axis in v1. +Client picks the highest integer in both its list and the relay's. +Empty intersection → refuse to connect, surface a "version mismatch" +error. The list MUST be sorted highest-first and MUST NOT contain +duplicates so the negotiation rule is unambiguous. + +**WebSocket clients SHOULD also send `Sec-WebSocket-Protocol`** (e.g. +`willow.v2, willow.v1`) in the WS opening handshake. The JSON +document is *advisory* — useful for pre-connect filtering and +directory listings — but version selection at handshake time via +RFC 6455 subprotocol negotiation is authoritative, and gracefully +handles the case where a sidecar doc and the relay binary drift +(operator forgot to redeploy the JSON). + +Event-schema versioning is **deferred**: `willow-state` defines +`EventKind` as a Rust enum with no numeric tag and no +`EVENT_SCHEMA_VERSION` constant. Advertising a range here would be +vapor. Listed as future work below. The capability document itself is unversioned: compatibility comes from "add fields, never repurpose" plus the mandatory ignore-unknown-fields rule. A deprecated field graduates to a fresh name; peers that only understand the old name keep working. +## Signing + +The capability document MUST be signed. Without a signature, an +on-path attacker (or hostile CDN/reverse proxy fronting the relay) +could flip `payment_required: true`, downgrade `protocol_versions` +to `[1]`, or rewrite `pubkey` to a key the attacker controls. The +relay already has an Ed25519 key (`identity` in +`crates/relay/src/main.rs:104`); a detached signature is ~15 lines +of code and ~88 hex characters in the document. + +- **Algorithm:** Ed25519 over the canonical JSON serialisation of + the document with the `signature` field removed. +- **Canonicalisation:** RFC 8785 JSON Canonicalization Scheme (JCS). + Two relays running the same software with the same metadata MUST + produce byte-identical canonical bytes — this is what makes + cross-relay caching possible (see "Caching" below). +- **Encoding:** lowercase hex in the `signature` field. +- **Verification:** clients verify against the `pubkey` field. A + document whose signature does not verify MUST be treated as if + the endpoint returned `404`. Clients MUST NOT cache an + unverified document — this preserves the XEP-0115 → XEP-0390 + lesson that an unverified capability blob can poison cache + entries keyed by content hash. +- **Key rotation:** clients MUST NOT cache a document across a + `pubkey` change. The signature plus the publication of + `pubkey` together let clients pin a relay's identity across + CDN proxies and infrastructure migrations. + ## CORS WASM clients in `crates/web` fetch this endpoint cross-origin, so the @@ -152,19 +224,32 @@ Access-Control-Allow-Methods: GET, OPTIONS Access-Control-Allow-Headers: Accept, Content-Type, If-None-Match ``` -This mirrors the pattern already in `handle_bootstrap_connection` -(`crates/relay/src/lib.rs:114`). +This **extends** the pattern in `handle_bootstrap_connection` +(`crates/relay/src/lib.rs:102`, ACAO emitted at line 116) — that +handler currently sends ACAO only and does not respond to `OPTIONS` +preflights at all, so the new dispatch branch must add ACAM/ACAH +*and* an explicit `OPTIONS → 204` path in `dispatch_connection`. ## Caching -- Response SHOULD carry a weak `ETag` derived from SHA-256 over the - canonical JSON serialisation, and SHOULD honour `If-None-Match` - with `304 Not Modified`. -- Default `Cache-Control: public, max-age=60` — low enough that - operational-status transitions propagate within a minute, high - enough for a relay directory to fan out cheaply. +- Response SHOULD carry a strong `ETag` derived from SHA-256 over + the RFC 8785 canonical JSON serialisation (the same bytes the + signature covers, with `signature` included). The ETag is strong, + not weak, because canonical JSON gives byte-equality semantics — + this is what enables cross-relay caching keyed by content hash, + XEP-0115/0390 style. Honour `If-None-Match` with + `304 Not Modified`. +- **Two-tier `Cache-Control` keyed on `status`:** + - Steady-state (`status == "ok"` or absent): + `Cache-Control: public, max-age=300` — directories and clients + appreciate the longer TTL. + - Transitional (`status == "degraded"` or `"read_only"`): + `Cache-Control: public, max-age=5, must-revalidate` — clients + see the recovery quickly. The relay knows its own status and + varies the header per response. - Clients MUST NOT cache across `pubkey` changes; the relay's key - is part of the cache identity. + is part of the cache identity. Combined with the mandatory + signature this lets clients pin a relay across CDN proxies. ## Error modes @@ -197,6 +282,23 @@ risk" so the endpoint is purely additive. `MAX_CONCURRENT_BOOTSTRAP_CONNECTIONS` semaphore (`crates/relay/src/lib.rs:59`). +## Cross-spec coordination + +This document is the natural advertising surface for sibling specs in +the #214–#221 set. To prevent tag-name drift (`hist-eose` here vs. +`history_eose` somewhere else), the canonical `supported_features` +strings are pinned here: + +| Sibling | Feature tag | Notes | +|---|---|---| +| #214 (history EOSE) | `history-eose` | Set when relay emits an end-of-stored-events sentinel. | +| #216 (machine-readable rejections) | `rejection-codes-v1` | Bumped tag if/when codes evolve. May also bump `protocol_versions`. | +| #217 (bech32 pubkey HRP) | bech32-pubkey-format (no tag yet) | Coordinate `pubkey` / `admin_pubkey` encoding with #217; v1 here keeps hex but #217 may extend the schema. | +| #218 (gift-wrap DM) | `gift-wrap-dm` | Informational only — relays cannot tell whether traffic is gift-wrapped, so the tag advertises operator intent rather than a checked capability. | +| #219 (sync algorithm) | `negentropy` or `seq-vector-sync` | One tag per algorithm the relay implements; client picks. | +| #220 (epoch key rotation) | (none) | No relay impact — omit. | +| #221 (outbox / no `EventKind::RelayList`) | (none) | No relay impact in this doc; `suggested_relays` (future) overlaps and should be resolved jointly with #221. | + ## Tests **Unit (serde, in `crates/relay/src/`):** (1) round-trip a fully @@ -219,25 +321,66 @@ document whose `protocol_versions` does not intersect the client's; mount the connect flow; assert the "connect" button is disabled and a mismatch banner is rendered. +## Resolved during review + +- **Signing.** Resolved in favour of MUST-sign in v1 with an inline + `signature` field excluded from the canonical bytes. See "Signing" + above. +- **Multi-tenant relays.** Resolved in favour of **one shared + document per host**. The relay in this codebase is topic-agnostic + (`crates/relay/src/lib.rs:8-23`: "All routines in this crate + operate at the transport layer") — it does not know what servers + it relays for, only `TopicAnnounce` strings. Per-server + `/.well-known/willow/{server_id}` would require teaching the + relay to enumerate servers it has no semantic knowledge of, which + contradicts the trust-model layering. +- **Operator-metadata leakage.** `version` and `software` softened to + coarse semver and project name respectively; both MAY be omitted. +- **`sync_provider_only`.** Dropped from v1. The relay has no DAG + and the field reduces to "operator vibes" without a concrete + pre-handshake check. If resurrected later, it must be tied to a + typed-error pre-handshake rejection so a client reading the field + can do something actionable. +- **`event_schema_range`.** Dropped from v1; see "Future work". + ## Open questions -1. **Signed documents.** Ship an Ed25519 signature over the canonical - JSON so clients can pin a relay by key across CDN proxies? Sibling - `/.well-known/willow.sig`, HTTP header, or inline `signature` field - over a canonicalised hash? -2. **Multi-tenant relays.** One host can serve many Willow servers; - per-server document at `/.well-known/willow/{server_id}`, or one - shared document since the relay is topic-agnostic? -3. **Relay discovery.** Advertise `suggested_relays: Vec` so - clients discover siblings after connecting, à la Nostr relay - exchange? -4. **Payment proof format.** Willow has no payments primitive yet — if - we keep `payment_required`, either spec the token format now or - gate the field behind a build flag so operators don't advertise - something no client can satisfy. -5. **`supported_features` registry.** Strings are friendlier than - NIP-11 integers but drift; pin the tag set in `crates/transport` - as an enum, or keep it free-form? -6. **Utilisation signalling.** Advertise current load (e.g. "9 998 / - 10 000 topics used") for client load balancing, or omit to avoid - telemetry leakage? Worth a follow-up spec either way. +1. **Payment proof format.** Willow has no payments primitive yet. + `payment_required` ships as a boolean hint with no token format + — clients can surface it but cannot satisfy it. Either spec the + token format in a sibling doc or gate the field behind a build + flag in a follow-up. +2. **Utilisation telemetry.** Advertise current load (e.g. + `served_topics: u32`, "9998 / 10000 topics used") for client + load balancing, or omit to avoid fingerprinting? A counted + number (not list) avoids leaking server IDs but still gives + clients something to balance on. Worth a follow-up spec either + way. +3. **Relay discovery / `suggested_relays`.** Advertise sibling + relays so clients discover alternates after connecting, à la + Nostr relay exchange? Resolve jointly with #221 (outbox), since + the shapes overlap. +4. **`supported_features` registry.** The cross-spec table above + pins the v1 tags, but should the set be promoted to a Rust enum + in `crates/transport` so unknown tags fail to compile, or stay + free-form to allow out-of-tree operators to advertise local + features? + +## Future work + +- **Event schema versioning.** Once `willow-state` introduces an + `EVENT_SCHEMA_VERSION: u16` constant and a documented bump rule + (additive variants vs. breaking changes), add an + `event_schema_range: [min, max]` field to this document. Until + then, advertising a range would be vapor. +- **DNS SVCB/HTTPS hints (RFC 9460).** A `willow-versions=1,2` + SvcParam would let clients decide whether to dial at all with + zero HTTP round-trips. Complementary to this document, not a + replacement: SVCB cannot carry `terms_of_service`, + `description`, or `status_detail`. +- **Per-peer capabilities.** Matrix split `/versions` (public, + cacheable) from `/capabilities` (authenticated, per-user) at + v1.10. If/when Willow grows per-peer capability answers, route + them at a *new* path (e.g. `/willow/peer-capabilities`) rather + than overloading `/.well-known/willow` — Matrix's v1.10 retrofit + is a cautionary tale. From 543ba37149c544e9019b7b4224658eef7ef1785b Mon Sep 17 00:00:00 2001 From: Noah Date: Sat, 25 Apr 2026 01:59:13 -0700 Subject: [PATCH 3/4] spec(#215): apply audit findings - round 1 - Update Motivation to cite main.rs:129 (bind) and :202 (spawn) instead of stale :128 - Pin Dispatch surgery to handle_bootstrap_request_after_line (active prod path) while still acknowledging the test-only handle_bootstrap_connection - Note that MAX_CONCURRENT_BOOTSTRAP_CONNECTIONS is misnamed (gates the public proxy semaphore) and SHOULD be renamed alongside this endpoint - Fix Retention.mode doc: cap is per-author per server (default 1000), not per server; rename field to max_events_per_author and cite role.rs:49,64 - Update CORS section to reference both proxy handlers; clarify both lack ACAM/ACAH and OPTIONS preflight - Add a "Two canonical forms" callout under Signing naming CANON_SIGNED (excludes signature) vs CANON_ETAG (includes signature) and recommend a shared helper - Mirror the canonical-form naming in the Caching section - Tighten the multi-tenant citation from lib.rs:8-23 to 8-22 (line 10) Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/specs/2026-04-24-relay-capability-doc.md | 92 +++++++++++++------ 1 file changed, 63 insertions(+), 29 deletions(-) diff --git a/docs/specs/2026-04-24-relay-capability-doc.md b/docs/specs/2026-04-24-relay-capability-doc.md index 5fd5e054..237a10bf 100644 --- a/docs/specs/2026-04-24-relay-capability-doc.md +++ b/docs/specs/2026-04-24-relay-capability-doc.md @@ -8,8 +8,9 @@ ## Motivation -Today a Willow client opens a connection to the relay listener in -`crates/relay/src/main.rs:128` — a single public TCP port (default +Today a Willow client opens a connection to the relay listener bound +in `crates/relay/src/main.rs:129` and spawned via `run_proxy_listener` +at `crates/relay/src/main.rs:202` — a single public TCP port (default `3340`) that multiplexes `/bootstrap-id` plus an HTTP/WebSocket-upgrade proxy to the loopback iroh-relay — and *only then* discovers whether the relay supports its wire version, whether it happens to be @@ -30,9 +31,15 @@ and filter a relay directory without connecting to each candidate. Today `dispatch_connection` in `crates/relay/src/lib.rs` carves out exactly one path — `BOOTSTRAP_ID_PATH = "/bootstrap-id"` — and -forwards everything else to the loopback iroh-relay. As written, a -naive `GET /.well-known/willow` would land in the upstream and 404 -(or be misread as a relay handshake and dropped). Implementation +forwards everything else to the loopback iroh-relay. The active +production handler is `handle_bootstrap_request_after_line` +(`crates/relay/src/lib.rs:266-314`), reached through +`run_proxy_listener` → `dispatch_connection`; the older +`handle_bootstrap_connection` (`crates/relay/src/lib.rs:102`) is now +only exercised by the test-only `run_bootstrap_listener` path used in +`crates/relay/tests/bootstrap_endpoint.rs`. As written, a naive +`GET /.well-known/willow` would land in the upstream iroh-relay and +404 (or be misread as a relay handshake and dropped). Implementation MUST add: 1. An explicit branch in `dispatch_connection` matching @@ -43,12 +50,17 @@ MUST add: `204` with full ACAO/ACAM/ACAH (OPTIONS preflight). 3. Reuse of `BOOTSTRAP_IO_TIMEOUT` for socket reads/writes and the existing `MAX_CONCURRENT_BOOTSTRAP_CONNECTIONS` semaphore for - admission control. No new tuning knobs in v1. - -This is an **extension** of the `handle_bootstrap_connection` pattern, -not a mirror: that handler currently emits ACAO only (no ACAM/ACAH) -and does not respond to `OPTIONS` preflights. Both gaps are closed -here. + admission control. No new tuning knobs in v1. Note: the constant + name is already stale — it gates the public proxy semaphore in + `crates/relay/src/main.rs:201-207`, not just bootstrap-id traffic + — and SHOULD be renamed (e.g. `MAX_CONCURRENT_PROXY_CONNECTIONS`) + in the same change that introduces this endpoint. + +This is an **extension** of the existing proxy-handler pattern (both +`handle_bootstrap_request_after_line` in production and +`handle_bootstrap_connection` in tests), not a mirror: both handlers +currently emit ACAO only (no ACAM/ACAH) and neither responds to +`OPTIONS` preflights. Both gaps are closed here. ## Endpoint @@ -138,7 +150,11 @@ pub struct Limitation { pub max_topics: Option, // MAX_TOPICS, // `crates/relay/src/lib.rs:80` pub max_connections: Option, // MAX_CONCURRENT_BOOTSTRAP_ - // CONNECTIONS, lib.rs:59 + // CONNECTIONS, lib.rs:59 — + // misnamed; gates the + // public proxy semaphore. + // Rename in the same change + // (see "Dispatch surgery"). pub max_blob_bytes: Option, // 0 = blob pinning off #[serde(default)] pub invite_required: bool, #[serde(default)] pub payment_required: bool, @@ -148,10 +164,14 @@ pub struct Limitation { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct Retention { - /// "replay" (in-memory, 1000-event cap per server) or "storage" - /// (SQLite). See `docs/specs/2026-03-27-worker-nodes-design.md`. + /// "replay" (in-memory; per-author cap, default 1000 events per + /// author per server — see `ReplayConfig::max_events_per_author` + /// in `crates/replay/src/role.rs:49,64`) or "storage" (SQLite). + /// See `docs/specs/2026-03-27-worker-nodes-design.md`. pub mode: String, - pub max_events_per_server: Option, // null = unbounded + pub max_events_per_author: Option, // null = unbounded; + // mirrors the replay + // role's per-author cap pub max_age_seconds: Option, // null = keep everything /// Willow default: false. Sealed channel keys stay peer-to-peer. #[serde(default)] pub channel_key_escrow: bool, @@ -200,6 +220,13 @@ of code and ~88 hex characters in the document. Two relays running the same software with the same metadata MUST produce byte-identical canonical bytes — this is what makes cross-relay caching possible (see "Caching" below). +- **Two canonical forms.** Be careful: the bytes the *signature* + covers exclude the `signature` field (`CANON_SIGNED`), but the + bytes the `ETag` covers include it (`CANON_ETAG`). Both subsections + must be implemented in lockstep — sign first, then re-canonicalise + the document with `signature` populated to derive the ETag. A + shared helper that takes a `include_signature: bool` parameter + keeps the two paths from drifting. - **Encoding:** lowercase hex in the `signature` field. - **Verification:** clients verify against the `pubkey` field. A document whose signature does not verify MUST be treated as if @@ -224,21 +251,25 @@ Access-Control-Allow-Methods: GET, OPTIONS Access-Control-Allow-Headers: Accept, Content-Type, If-None-Match ``` -This **extends** the pattern in `handle_bootstrap_connection` -(`crates/relay/src/lib.rs:102`, ACAO emitted at line 116) — that -handler currently sends ACAO only and does not respond to `OPTIONS` -preflights at all, so the new dispatch branch must add ACAM/ACAH -*and* an explicit `OPTIONS → 204` path in `dispatch_connection`. +This **extends** the pattern used by both proxy handlers — the +production-active `handle_bootstrap_request_after_line` +(`crates/relay/src/lib.rs:266-314`, ACAO emitted at line 298) and the +test-only `handle_bootstrap_connection` +(`crates/relay/src/lib.rs:102`, ACAO emitted at line 116). Both +currently send ACAO only and neither responds to `OPTIONS` preflights, +so the new dispatch branch must add ACAM/ACAH *and* an explicit +`OPTIONS → 204` path in `dispatch_connection`. ## Caching - Response SHOULD carry a strong `ETag` derived from SHA-256 over - the RFC 8785 canonical JSON serialisation (the same bytes the - signature covers, with `signature` included). The ETag is strong, - not weak, because canonical JSON gives byte-equality semantics — - this is what enables cross-relay caching keyed by content hash, - XEP-0115/0390 style. Honour `If-None-Match` with - `304 Not Modified`. + the `CANON_ETAG` form: the RFC 8785 canonical JSON serialisation + with `signature` **included**. (The signature itself covers + `CANON_SIGNED`, the same canonicalisation with `signature` + **removed**; see "Signing".) The ETag is strong, not weak, + because canonical JSON gives byte-equality semantics — this is + what enables cross-relay caching keyed by content hash, XEP-0115 + / 0390 style. Honour `If-None-Match` with `304 Not Modified`. - **Two-tier `Cache-Control` keyed on `status`:** - Steady-state (`status == "ok"` or absent): `Cache-Control: public, max-age=300` — directories and clients @@ -280,7 +311,10 @@ risk" so the endpoint is purely additive. client MUST escape them as text, never HTML. - Rate-limit the endpoint by reusing the existing `MAX_CONCURRENT_BOOTSTRAP_CONNECTIONS` semaphore - (`crates/relay/src/lib.rs:59`). + (`crates/relay/src/lib.rs:59`), which already gates every + connection accepted by `run_proxy_listener`. The constant's name + is stale — see "Dispatch surgery" — and SHOULD be renamed + alongside the new endpoint. ## Cross-spec coordination @@ -328,8 +362,8 @@ mismatch banner is rendered. above. - **Multi-tenant relays.** Resolved in favour of **one shared document per host**. The relay in this codebase is topic-agnostic - (`crates/relay/src/lib.rs:8-23`: "All routines in this crate - operate at the transport layer") — it does not know what servers + (`crates/relay/src/lib.rs:8-22`: "All routines in this crate + operate at the transport layer", line 10) — it does not know what servers it relays for, only `TopicAnnounce` strings. Per-server `/.well-known/willow/{server_id}` would require teaching the relay to enumerate servers it has no semantic knowledge of, which From 1002ff4384227f1d626510c3c844cd96981ff187 Mon Sep 17 00:00:00 2001 From: Noah Date: Sat, 25 Apr 2026 02:06:20 -0700 Subject: [PATCH 4/4] spec(#215): fix internal schema contradictions - round 3 - signature: prose now matches schema (required); minimal-doc example updated - pubkey: now required (Option dropped) since v1 signing is mandatory - --relay-port: cite main.rs:87-88 (attribute + field) Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/specs/2026-04-24-relay-capability-doc.md | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/specs/2026-04-24-relay-capability-doc.md b/docs/specs/2026-04-24-relay-capability-doc.md index 237a10bf..8bfa9508 100644 --- a/docs/specs/2026-04-24-relay-capability-doc.md +++ b/docs/specs/2026-04-24-relay-capability-doc.md @@ -70,7 +70,7 @@ currently emit ACAO only (no ACAM/ACAH) and neither responds to | Method | `GET` (plus `OPTIONS` for preflight) | | Response Content-Type | `application/willow+json; charset=utf-8` | | CORS | required — `Access-Control-Allow-Origin: *` | -| Served on | the public relay HTTP port (default `3340`, configurable via `--relay-port`; see `crates/relay/src/main.rs:87`) | +| Served on | the public relay HTTP port (default `3340`, configurable via `--relay-port`; see `crates/relay/src/main.rs:87-88`) | **Why `/.well-known/willow` over `/willow-info`?** The relay proxy in `crates/relay/src/lib.rs:186` already dispatches on request path. @@ -87,9 +87,13 @@ suffix still opts us into generic JSON tooling. ## Field schema The document is a single JSON object. All fields are optional except -`protocol_versions`; a minimal compliant document is -`{"protocol_versions":[1]}`. **Clients MUST ignore unknown top-level -fields** so additions remain forward-compatible. +`protocol_versions`, `pubkey`, and `signature` (the last two are required +because v1 mandates signing — see "Signing" below). The unsigned core of +a minimal compliant document is `{"protocol_versions":[1]}`; the on-wire +form additionally carries `pubkey` and `signature`, e.g. +`{"protocol_versions":[1],"pubkey":"","signature":""}`. +**Clients MUST ignore unknown top-level fields** so additions remain +forward-compatible. ```rust /// Capability document served at GET /.well-known/willow. @@ -100,7 +104,10 @@ pub struct WillowRelayInfo { pub description: Option, // plain text, no markup pub contact: Option, // mailto: / https: / matrix: pub admin_pubkey: Option, // hex Ed25519, operator DM key - pub pubkey: Option, // hex Ed25519, relay's own key + /// REQUIRED. Hex Ed25519 public half of the relay's own key — + /// the verifier for `signature`. Mandatory in v1 because signing + /// is mandatory; see "Signing" below. + pub pubkey: String, pub software: Option, // project name; operators MAY omit pub version: Option, // coarse semver (e.g. "0.3.x"); // operators MAY omit; never a git SHA @@ -338,10 +345,12 @@ strings are pinned here: **Unit (serde, in `crates/relay/src/`):** (1) round-trip a fully populated `WillowRelayInfo` through `serde_json` with byte-for-byte equality after canonical re-serialisation; (2) parse the minimum -document `{"protocol_versions":[1]}` and assert all other fields are -`None`/empty; (3) parse a document with an unknown top-level key and -assert it is ignored; (4) reject a document missing -`protocol_versions` with a typed error. +on-wire document +`{"protocol_versions":[1],"pubkey":"","signature":""}` and +assert all other fields are `None`/empty; (3) parse a document with an +unknown top-level key and assert it is ignored; (4) reject a document +missing any of `protocol_versions`, `pubkey`, or `signature` with a +typed error. **Integration** (new `crates/relay/tests/capability_endpoint.rs`, alongside `bootstrap_endpoint.rs`): (1) `GET /.well-known/willow` →