diff --git a/.changeset/fresh-alarms-remember-names.md b/.changeset/fresh-alarms-remember-names.md new file mode 100644 index 00000000..b74e655d --- /dev/null +++ b/.changeset/fresh-alarms-remember-names.md @@ -0,0 +1,5 @@ +--- +"partyserver": patch +--- + +Persist a `__ps_name` fallback for name-based Durable Objects during initialization. This lets alarm handlers recover `this.name` even when firing on a stale on-disk alarm record that was scheduled by an older workerd version that didn't yet persist `name` into the alarm record. See cloudflare/partykit#390. diff --git a/fixtures/alarm-restart-e2e/README.md b/fixtures/alarm-restart-e2e/README.md new file mode 100644 index 00000000..6a98479d --- /dev/null +++ b/fixtures/alarm-restart-e2e/README.md @@ -0,0 +1,88 @@ +# `alarm-restart-e2e` + +Reproducer for the runtime contract that motivates partyserver's +`__ps_name` fallback record. Pins down behavior reported in +[cloudflare/partykit#390](https://github.com/cloudflare/partykit/issues/390) +across three Durable Objects in the same Worker: + +| DO | Class | Extends | +| ------------ | --------------------------------- | -------------------------------------------------------------------------- | +| `RawAlarm` | `RawAlarm` | `DurableObject` (no PartyServer) | +| `StockAlarm` | `StockAlarm` (built from a mixin) | `Server` from `partyserver@0.5.3` (aliased as `partyserver-stock`) | +| `FixedAlarm` | `FixedAlarm` (built from a mixin) | `Server` from this workspace's local `partyserver` (with the fallback fix) | + +Each DO records an observation (`{source, ctxIdName, storedPsName, +partyName, partyNameError, at}`) to its own SQLite-backed storage on +every entry through `fetch()` or `alarm()`. Observations accumulate +across dev-server restarts. + +## Run the experiment + +```bash +npm install +npm run start +``` + +In a second shell, schedule an alarm into a fresh room and observe: + +```bash +ROOM="cold-strict-$(date +%s)" + +# Session A: schedule into a fresh room. This is the only entry into +# the DO instances during session A. After this, the alarm record on +# disk is what carries the DO across the restart. +curl -s "http://localhost:5173/raw/$ROOM?schedule=45" +curl -s "http://localhost:5173/parties/stock-alarm/$ROOM?schedule=45" +curl -s "http://localhost:5173/parties/fixed-alarm/$ROOM?schedule=45" +``` + +Then kill `vite dev` (Ctrl-C), restart it (`npm run start`), and +**don't touch the room** until well past the 45-second mark. Then: + +```bash +curl -s "http://localhost:5173/raw/$ROOM?snapshot=1" | jq +curl -s -i "http://localhost:5173/parties/stock-alarm/$ROOM?snapshot=1" | head -n 12 +curl -s "http://localhost:5173/parties/fixed-alarm/$ROOM?snapshot=1" | jq +``` + +Observed behavior on `workerd@1.20260424.1`, +`compatibility_date: "2026-01-28"`: + +- `RawAlarm`: alarm observation has no `ctxIdName` (i.e. `ctx.id.name` + is `undefined`). Subsequent fetches via `idFromName(...)` ALSO see + `ctx.id.name === undefined` for the lifetime of that DO instance — + the instance is "born nameless" and stays that way. + +- `StockAlarm`: `Server.fetch` returns 500 with the "Cannot determine + the name" error. Reproduces the failure reported in cloudflare/partykit#390. + +- `FixedAlarm`: `alarm()` runs successfully. `ctx.id.name` is + `undefined` in the observation, but `this.name` resolves from the + on-disk `__ps_name` record that PartyServer wrote during session + A's fetch. `partyserver` recovers the name; the DO continues + working normally. + +## Why three DOs + +`RawAlarm` pins down what workerd actually does, free of any +framework. `StockAlarm` reproduces the user-reported bug under +`partyserver@0.5.3`. `FixedAlarm` validates that the workspace fix +restores normal operation under the same conditions. + +## Critical: don't warm the DOs before the alarm fires + +Any HTTP fetch or websocket message sent to a DO between session B +startup and the alarm firing time will wake the DO via that entry +point first. workerd captures `ctx.id.name` from the first entry +point and that value persists for the instance's lifetime. So a +pre-alarm fetch silently warms `ctx.id.name` and masks the bug. The +critical window is from `vite dev` starting back up until the +expected alarm fire time. Don't open the page in a browser, don't +curl `?snapshot`, don't let any client reconnect to the room. Just +wait. + +The frontend `index.html` exists for manual exploration but is +deliberately separate from the cold-DO experiment so a developer +running the page won't accidentally warm a different room. To run +the cold experiment, drive everything from `curl` against rooms the +frontend isn't subscribed to. diff --git a/fixtures/alarm-restart-e2e/env.d.ts b/fixtures/alarm-restart-e2e/env.d.ts new file mode 100644 index 00000000..90b90bd0 --- /dev/null +++ b/fixtures/alarm-restart-e2e/env.d.ts @@ -0,0 +1,10 @@ +/* eslint-disable */ +// Generated by Wrangler by running `wrangler types env.d.ts --include-runtime false` (hash: 9d5eb238d4dbfdedf1bf7b0674d6a12c) +declare namespace Cloudflare { + interface Env { + RawAlarm: DurableObjectNamespace /* RawAlarm */; + StockAlarm: DurableObjectNamespace /* StockAlarm */; + FixedAlarm: DurableObjectNamespace /* FixedAlarm */; + } +} +interface Env extends Cloudflare.Env {} diff --git a/fixtures/alarm-restart-e2e/index.html b/fixtures/alarm-restart-e2e/index.html new file mode 100644 index 00000000..21722872 --- /dev/null +++ b/fixtures/alarm-restart-e2e/index.html @@ -0,0 +1,137 @@ + + + + + partyserver alarm-restart e2e + + + +

partyserver alarm-restart e2e

+
+ Room name: + + Schedule alarm in + s + + +
+
+
+

RawAlarm ?

+
+ +
+
(no events yet)
+
+
+

+ StockAlarm (partyserver@0.5.3) + ? +

+
+ +
+
(no events yet)
+
+
+

+ FixedAlarm (workspace partyserver) + ? +

+
+ +
+
(no events yet)
+
+
+ + + diff --git a/fixtures/alarm-restart-e2e/package.json b/fixtures/alarm-restart-e2e/package.json new file mode 100644 index 00000000..78f7c317 --- /dev/null +++ b/fixtures/alarm-restart-e2e/package.json @@ -0,0 +1,21 @@ +{ + "name": "@partyserver/fixture-alarm-restart-e2e", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "start": "vite dev", + "types": "wrangler types env.d.ts --include-runtime false" + }, + "dependencies": { + "partyserver": "*", + "partyserver-stock": "npm:partyserver@0.5.3", + "partysocket": "^1.1.18" + }, + "devDependencies": { + "@cloudflare/vite-plugin": "^1.33.2", + "@cloudflare/workers-types": "^4.20260424.1", + "vite": "^8.0.10", + "wrangler": "^4.85.0" + } +} diff --git a/fixtures/alarm-restart-e2e/src/client.ts b/fixtures/alarm-restart-e2e/src/client.ts new file mode 100644 index 00000000..5bc2bcdd --- /dev/null +++ b/fixtures/alarm-restart-e2e/src/client.ts @@ -0,0 +1,207 @@ +import PartySocket from "partysocket"; + +type CardId = "raw" | "stock" | "fixed"; + +const cards: Record< + CardId, + { + el: HTMLElement; + log: HTMLElement; + status: HTMLElement; + summary: HTMLElement; + } +> = { + raw: getCard("raw"), + stock: getCard("stock"), + fixed: getCard("fixed") +}; + +function getCard(id: CardId) { + const el = document.getElementById(`card-${id}`)!; + return { + el, + log: el.querySelector("[data-log]") as HTMLElement, + status: el.querySelector("[data-status]") as HTMLElement, + summary: el.querySelector("[data-summary]") as HTMLElement + }; +} + +function append(id: CardId, line: string) { + const ts = new Date().toISOString().slice(11, 23); + const card = cards[id]; + if (card.log.textContent === "(no events yet)") { + card.log.textContent = ""; + } + card.log.textContent += `${ts} ${line}\n`; + card.log.scrollTop = card.log.scrollHeight; +} + +function setStatus(id: CardId, label: string, kind: "ok" | "warn" | "err") { + cards[id].status.textContent = label; + cards[id].status.className = `badge ${kind}`; +} + +function setSummary(id: CardId, text: string) { + cards[id].summary.textContent = text; +} + +const roomInput = document.getElementById("room") as HTMLInputElement; +const secondsInput = document.getElementById("seconds") as HTMLInputElement; + +const sockets: Record = { + raw: null, + stock: null, + fixed: null +}; + +// RawAlarm uses a plain HTTP fetch (no websocket). We poll its +// snapshot endpoint to surface what alarm() observed across the +// dev-server restart. This is closer to how a "raw" DO would be +// observed in practice and avoids re-implementing partyserver's +// hibernation handshake by hand. +function getRoom() { + return roomInput.value || "default-room"; +} + +function connect(id: "stock" | "fixed", party: string, room: string) { + if (sockets[id]) { + sockets[id]!.close(); + } + setStatus(id, "connecting", "warn"); + const ws = new PartySocket({ + host: location.host, + party, + room, + minUptime: 1000, + maxRetries: Infinity, + maxReconnectionDelay: 2000 + }); + ws.addEventListener("open", () => { + setStatus(id, "open", "ok"); + append(id, "ws open"); + }); + ws.addEventListener("close", (e) => { + setStatus(id, "closed", "warn"); + append(id, `ws close (${e.code} ${e.reason || ""})`); + }); + ws.addEventListener("error", (e) => { + setStatus(id, "error", "err"); + append(id, `ws error ${(e as ErrorEvent).message ?? ""}`); + }); + ws.addEventListener("message", (e) => { + let msg: Record | null = null; + try { + msg = JSON.parse(e.data) as Record; + } catch { + append(id, `← ${String(e.data).slice(0, 200)}`); + return; + } + append(id, `← ${JSON.stringify(msg)}`); + if (msg.type === "snapshot") { + setSummary(id, summarizeSnapshot(msg)); + } else if (msg.type === "connected") { + const ctxName = msg.ctxIdName as string | undefined; + const partyName = msg.partyName as string | null; + setSummary( + id, + `ctx.id.name=${ctxName ?? "undefined"}, this.name=${partyName ?? "(throws)"}` + ); + } + }); + sockets[id] = ws; +} + +function summarizeSnapshot(msg: Record): string { + const obs = + (msg.observations as { source: string; ctxIdName?: string }[]) ?? []; + const fetches = obs.filter((o) => o.source === "fetch").length; + const alarms = obs.filter((o) => o.source === "alarm").length; + const lastAlarm = [...obs].reverse().find((o) => o.source === "alarm"); + const lastFetch = [...obs].reverse().find((o) => o.source === "fetch"); + return ( + `obs=${obs.length} (fetch=${fetches}, alarm=${alarms})` + + (lastFetch + ? ` | lastFetch.ctxIdName=${lastFetch.ctxIdName ?? "undefined"}` + : "") + + (lastAlarm + ? ` | lastAlarm.ctxIdName=${lastAlarm.ctxIdName ?? "undefined"}` + : "") + + ` | storedPsName=${(msg.storedPsName as string | undefined) ?? "undefined"}` + ); +} + +async function refreshRaw() { + const room = getRoom(); + setStatus("raw", "fetching", "warn"); + try { + const res = await fetch(`/raw/${encodeURIComponent(room)}?snapshot=1`); + if (!res.ok) { + setStatus("raw", `http ${res.status}`, "err"); + return; + } + setStatus("raw", "ok", "ok"); + const data = (await res.json()) as { + ctxIdName: string | undefined; + observations: { + source: string; + ctxIdName?: string; + storedPsName?: string; + }[]; + }; + append("raw", `← ${JSON.stringify(data).slice(0, 400)}`); + const fetches = data.observations.filter( + (o) => o.source === "fetch" + ).length; + const alarms = data.observations.filter((o) => o.source === "alarm").length; + const lastAlarm = [...data.observations] + .reverse() + .find((o) => o.source === "alarm"); + setSummary( + "raw", + `ctx.id.name(now)=${data.ctxIdName ?? "undefined"}, obs=${data.observations.length} ` + + `(fetch=${fetches}, alarm=${alarms})` + + (lastAlarm + ? ` | lastAlarm.ctxIdName=${lastAlarm.ctxIdName ?? "undefined"}` + : "") + ); + } catch (e) { + setStatus("raw", "fetch failed", "err"); + append("raw", `error ${(e as Error).message}`); + } +} + +async function scheduleRaw(inSeconds: number) { + const room = getRoom(); + await fetch( + `/raw/${encodeURIComponent(room)}?schedule=${encodeURIComponent(String(inSeconds))}` + ); + append("raw", `→ schedule(${inSeconds}s)`); + await refreshRaw(); +} + +document.getElementById("schedule-all")!.addEventListener("click", async () => { + const seconds = Number(secondsInput.value || "0"); + await scheduleRaw(seconds); + for (const id of ["stock", "fixed"] as const) { + sockets[id]?.send(JSON.stringify({ type: "schedule", inSeconds: seconds })); + append(id, `→ schedule(${seconds}s)`); + } +}); + +document.getElementById("snapshot-all")!.addEventListener("click", async () => { + await refreshRaw(); + for (const id of ["stock", "fixed"] as const) { + sockets[id]?.send(JSON.stringify({ type: "snapshot" })); + append(id, `→ snapshot`); + } +}); + +function reconnectAll() { + const room = getRoom(); + connect("stock", "stock-alarm", room); + connect("fixed", "fixed-alarm", room); + refreshRaw(); +} + +roomInput.addEventListener("change", () => reconnectAll()); +reconnectAll(); diff --git a/fixtures/alarm-restart-e2e/src/server.ts b/fixtures/alarm-restart-e2e/src/server.ts new file mode 100644 index 00000000..624a1395 --- /dev/null +++ b/fixtures/alarm-restart-e2e/src/server.ts @@ -0,0 +1,267 @@ +/** + * Three Durable Objects side-by-side that all do the same thing + * (schedule an alarm a few seconds out, record what they observe in + * `alarm()`) so we can disentangle three layers of behavior: + * + * - RawAlarm: plain `DurableObject`, no PartyServer at all. + * Pins the workerd contract for `ctx.id.name` in + * alarm handlers. + * + * - StockAlarm: extends Server from the unmodified, npm-published + * `partyserver@0.5.3` (aliased here as + * `partyserver-stock`). Reproduces the failure mode + * reported in cloudflare/partykit#390 if the runtime + * truly drops `ctx.id.name` on alarm wake. + * + * - FixedAlarm: extends Server from this workspace's local + * partyserver, which now persists a `__ps_name` + * fallback during initialization. Verifies that the + * fix recovers `this.name` even when `ctx.id.name` + * is absent in the alarm handler. + * + * Each DO appends an "observation" to its own SQLite-backed storage + * every time `alarm()` runs. Observations are never overwritten so + * we can read them all back across a dev-server restart and see what + * fired before / after. + */ + +import { DurableObject, env } from "cloudflare:workers"; +import { Server as FixedServer, routePartykitRequest } from "partyserver"; +import { Server as StockServer } from "partyserver-stock"; + +import type { Connection, WSMessage } from "partyserver"; + +type Observation = { + /** Wall clock time when the entry point ran. */ + at: number; + /** Which entry point recorded this observation. */ + source: "fetch" | "alarm"; + /** Whatever the runtime told us at observation time. */ + ctxIdName: string | undefined; + /** Snapshot of the legacy `__ps_name` fallback record on disk. */ + storedPsName: string | undefined; + /** + * For PartyServer-based DOs only: the value of `this.name`, + * captured behind a try/catch since the getter throws when it + * can't resolve. + */ + partyName: string | null; + /** + * For PartyServer-based DOs only: the message thrown by the + * `this.name` getter, if it threw. + */ + partyNameError: string | null; +}; + +const OBSERVATIONS_KEY = "__observations"; + +async function appendObservation( + storage: DurableObjectStorage, + obs: Observation +): Promise { + const existing = (await storage.get(OBSERVATIONS_KEY)) ?? []; + existing.push(obs); + await storage.put(OBSERVATIONS_KEY, existing); +} + +async function readObservations( + storage: DurableObjectStorage +): Promise { + return (await storage.get(OBSERVATIONS_KEY)) ?? []; +} + +/** Read partyserver's legacy fallback record without coupling to internals. */ +async function readStoredPsName( + storage: DurableObjectStorage +): Promise { + return storage.get("__ps_name"); +} + +/** + * Raw runtime probe. No PartyServer, just a `DurableObject`. Captures + * `ctx.id.name` from both `fetch()` and `alarm()` so we can see exactly + * what workerd hands us in each entry point, without any framework + * fallback in between. + */ +export class RawAlarm extends DurableObject { + async fetch(request: Request): Promise { + const url = new URL(request.url); + if (url.searchParams.has("schedule")) { + const inSeconds = Number(url.searchParams.get("schedule") ?? "0"); + await this.ctx.storage.setAlarm(Date.now() + inSeconds * 1000); + await appendObservation(this.ctx.storage, { + at: Date.now(), + source: "fetch", + ctxIdName: this.ctx.id.name, + storedPsName: await readStoredPsName(this.ctx.storage), + partyName: null, + partyNameError: null + }); + return Response.json({ scheduled: true }); + } + if (url.searchParams.has("snapshot")) { + return Response.json({ + ctxIdName: this.ctx.id.name, + observations: await readObservations(this.ctx.storage) + }); + } + return new Response("RawAlarm"); + } + + async alarm(): Promise { + await appendObservation(this.ctx.storage, { + at: Date.now(), + source: "alarm", + ctxIdName: this.ctx.id.name, + storedPsName: await readStoredPsName(this.ctx.storage), + partyName: null, + partyNameError: null + }); + } +} + +/** + * Mixin that gives a PartyServer-based DO the same observation shape as + * RawAlarm. Defined as a function so we can apply identical behavior to + * the stock and the fixed PartyServer subclasses without duplicating + * the code or relying on inheritance gymnastics across the two + * versions. + */ +function definePartyServerAlarm< + S extends typeof FixedServer | typeof StockServer +>(Base: S) { + // The two `Server` types resolve to incompatible class types because + // they come from different package versions, so we widen to `any` for + // the inheritance and recover types at the boundary. + return class extends (Base as unknown as typeof FixedServer) { + static options = { hibernate: true }; + + /** + * HTTP-only entry points for the cold-DO experiment. We keep this + * separate from the websocket flow so we can exercise scenarios + * where no client is subscribed at all — meaning the alarm is + * the very first entry into a cold DO instance. + */ + async onRequest(request: Request): Promise { + const url = new URL(request.url); + if (url.searchParams.has("schedule")) { + const inSeconds = Number(url.searchParams.get("schedule") ?? "0"); + await this.ctx.storage.setAlarm(Date.now() + inSeconds * 1000); + await this.#appendObs("fetch"); + return Response.json({ scheduled: true }); + } + if (url.searchParams.has("snapshot")) { + const obs = await readObservations(this.ctx.storage); + const { name, error } = this.#readNameSafely(); + return Response.json({ + ctxIdName: this.ctx.id.name, + partyName: name, + partyNameError: error, + storedPsName: await readStoredPsName(this.ctx.storage), + observations: obs + }); + } + return new Response("ok"); + } + + onConnect(connection: Connection): void { + const { name, error } = this.#readNameSafely(); + connection.send( + JSON.stringify({ + type: "connected", + ctxIdName: this.ctx.id.name, + partyName: name, + partyNameError: error + }) + ); + } + + async onMessage(connection: Connection, message: WSMessage): Promise { + type Incoming = { type?: string; inSeconds?: number }; + let parsed: Incoming | null = null; + try { + parsed = JSON.parse(String(message)) as Incoming; + } catch { + parsed = null; + } + const incoming = parsed; + if (incoming?.type === "schedule") { + const seconds = incoming.inSeconds ?? 0; + await this.ctx.storage.setAlarm(Date.now() + seconds * 1000); + await this.#appendObs("fetch"); + connection.send( + JSON.stringify({ type: "scheduled", inSeconds: seconds }) + ); + } else if (incoming?.type === "snapshot") { + const obs = await readObservations(this.ctx.storage); + const { name, error } = this.#readNameSafely(); + connection.send( + JSON.stringify({ + type: "snapshot", + ctxIdName: this.ctx.id.name, + partyName: name, + partyNameError: error, + storedPsName: await readStoredPsName(this.ctx.storage), + observations: obs + }) + ); + } + } + + async onAlarm(): Promise { + await this.#appendObs("alarm"); + } + + async #appendObs(source: "fetch" | "alarm"): Promise { + const { name, error } = this.#readNameSafely(); + await appendObservation(this.ctx.storage, { + at: Date.now(), + source, + ctxIdName: this.ctx.id.name, + storedPsName: await readStoredPsName(this.ctx.storage), + partyName: name, + partyNameError: error + }); + } + + #readNameSafely(): { name: string | null; error: string | null } { + try { + return { name: this.name, error: null }; + } catch (e) { + return { + name: null, + error: e instanceof Error ? e.message : String(e) + }; + } + } + }; +} + +export const StockAlarm = definePartyServerAlarm(StockServer); +export const FixedAlarm = definePartyServerAlarm(FixedServer); + +/** + * Single fetch entry point that fans out to whichever DO the URL + * targeted. We use partyserver's `routePartykitRequest` for the + * PartyServer-backed DOs (since that's the realistic path) and a + * direct `idFromName` lookup for `RawAlarm`. + */ +export default { + async fetch(request: Request): Promise { + const url = new URL(request.url); + + // /raw/?schedule=N | ?snapshot=1 + if (url.pathname.startsWith("/raw/")) { + const name = url.pathname.slice("/raw/".length).split("/")[0] ?? ""; + if (!name) return new Response("missing name", { status: 400 }); + const id = env.RawAlarm.idFromName(name); + const stub = env.RawAlarm.get(id); + return stub.fetch(request); + } + + const partykit = await routePartykitRequest(request, env); + if (partykit) return partykit; + + return new Response("Not Found", { status: 404 }); + } +} satisfies ExportedHandler; diff --git a/fixtures/alarm-restart-e2e/tsconfig.json b/fixtures/alarm-restart-e2e/tsconfig.json new file mode 100644 index 00000000..51b070ef --- /dev/null +++ b/fixtures/alarm-restart-e2e/tsconfig.json @@ -0,0 +1,7 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "types": ["@cloudflare/workers-types", "vite/client"], + "lib": ["DOM", "DOM.Iterable", "ESNext"] + } +} diff --git a/fixtures/alarm-restart-e2e/vite.config.ts b/fixtures/alarm-restart-e2e/vite.config.ts new file mode 100644 index 00000000..f3057167 --- /dev/null +++ b/fixtures/alarm-restart-e2e/vite.config.ts @@ -0,0 +1,6 @@ +import { cloudflare } from "@cloudflare/vite-plugin"; +import { defineConfig } from "vite"; + +export default defineConfig({ + plugins: [cloudflare()] +}); diff --git a/fixtures/alarm-restart-e2e/wrangler.jsonc b/fixtures/alarm-restart-e2e/wrangler.jsonc new file mode 100644 index 00000000..6b77e00c --- /dev/null +++ b/fixtures/alarm-restart-e2e/wrangler.jsonc @@ -0,0 +1,18 @@ +{ + "name": "partyserver-fixture-alarm-restart-e2e", + "main": "src/server.ts", + "compatibility_date": "2026-01-28", + "durable_objects": { + "bindings": [ + { "name": "RawAlarm", "class_name": "RawAlarm" }, + { "name": "StockAlarm", "class_name": "StockAlarm" }, + { "name": "FixedAlarm", "class_name": "FixedAlarm" } + ] + }, + "migrations": [ + { + "tag": "v1", + "new_sqlite_classes": ["RawAlarm", "StockAlarm", "FixedAlarm"] + } + ] +} diff --git a/package-lock.json b/package-lock.json index 2e6d9c5d..b26d2a1a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -36,6 +36,21 @@ "wrangler": "^4.85.0" } }, + "fixtures/alarm-restart-e2e": { + "name": "@partyserver/fixture-alarm-restart-e2e", + "version": "0.0.0", + "dependencies": { + "partyserver": "*", + "partyserver-stock": "npm:partyserver@0.5.3", + "partysocket": "^1.1.18" + }, + "devDependencies": { + "@cloudflare/vite-plugin": "^1.33.2", + "@cloudflare/workers-types": "^4.20260424.1", + "vite": "^8.0.10", + "wrangler": "^4.85.0" + } + }, "fixtures/chat": { "name": "@partyserver/fixture-chat", "version": "0.0.11", @@ -915,7 +930,6 @@ "version": "4.20260424.1", "resolved": "https://registry.npmjs.org/@cloudflare/workers-types/-/workers-types-4.20260424.1.tgz", "integrity": "sha512-0DLJ9yEk1KKzPbqop80Gw/P1wkKKzawmipULiJWdBXIBCoMvE0OVWms3IrL/Q/G7tfmPop9yF4XlZ69k9JLYng==", - "dev": true, "license": "MIT OR Apache-2.0" }, "node_modules/@cspotcode/source-map-support": { @@ -3635,6 +3649,10 @@ "node": "^20.19.0 || >=22.12.0" } }, + "node_modules/@partyserver/fixture-alarm-restart-e2e": { + "resolved": "fixtures/alarm-restart-e2e", + "link": true + }, "node_modules/@partyserver/fixture-chat": { "resolved": "fixtures/chat", "link": true @@ -8936,6 +8954,37 @@ "resolved": "packages/partyserver", "link": true }, + "node_modules/partyserver-stock": { + "name": "partyserver", + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/partyserver/-/partyserver-0.5.3.tgz", + "integrity": "sha512-6frMT7gVqSR2oO6fiJN8uHRT+hl3TY7bhVCDyfym2fKpSO08dyJQxIJphzR9Ox1eJ+JH9ce+QcwI6y3dZwU86g==", + "license": "ISC", + "dependencies": { + "nanoid": "^5.1.9" + }, + "peerDependencies": { + "@cloudflare/workers-types": "^4.20260424.1" + } + }, + "node_modules/partyserver-stock/node_modules/nanoid": { + "version": "5.1.9", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-5.1.9.tgz", + "integrity": "sha512-ZUvP7KeBLe3OZ1ypw6dI/TzYJuvHP77IM4Ry73waSQTLn8/g8rpdjfyVAh7t1/+FjBtG4lCP42MEbDxOsRpBMw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.js" + }, + "engines": { + "node": "^18 || >=20" + } + }, "node_modules/partysession": { "resolved": "packages/partysession", "link": true diff --git a/packages/partyserver/README.md b/packages/partyserver/README.md index e35f01f2..b017fc6a 100644 --- a/packages/partyserver/README.md +++ b/packages/partyserver/README.md @@ -129,7 +129,7 @@ These methods can be optionally `async`: ## Properties -- `.name` - (readonly) the server's name. Resolves from the underlying Durable Object's `ctx.id.name`, populated whenever the DO is addressed via `idFromName()` / `getByName()` — which is the supported way to address PartyServer DOs. Available inside every entry point including the constructor, `onStart()`, `onAlarm()`, and hibernating websocket handlers. (For the narrow case of legacy DOs bootstrapped via `setName()` — e.g. a `__ps_name` storage record from an older version — that override is recovered automatically. PartyServer no longer writes that record itself for new DOs.) +- `.name` - (readonly) the server's name. Resolves from the underlying Durable Object's `ctx.id.name`, populated whenever the DO is addressed via `idFromName()` / `getByName()` — which is the supported way to address PartyServer DOs. Available inside every entry point including the constructor, `onStart()`, `onAlarm()`, and hibernating websocket handlers. PartyServer also persists this name to a small `__ps_name` fallback record during initialization so that alarm handlers firing on stale on-disk alarm records (scheduled by older workerd versions that didn't persist `name`) can still recover the name. Legacy DOs bootstrapped via `setName()` use the same fallback. DOs addressed via `idFromString()` or `newUniqueId()` without a `setName()` bootstrap are not supported and `.name` will throw. @@ -215,7 +215,7 @@ export class ParentServer extends Server { } ``` -Without the explicit `id`, the facet's `ctx.id.name` is the parent's name, `this.name` returns the parent's name, and `setName(facetName)` throws (it would mismatch `ctx.id.name`). With the explicit `id`, the facet has its own native `ctx.id.name`, no `setName()` is needed, no storage write is involved, and cold wakes recover the name automatically (the factory re-runs and `idFromName(facetName)` is deterministic). +Without the explicit `id`, the facet's `ctx.id.name` is the parent's name, `this.name` returns the parent's name, and `setName(facetName)` throws (it would mismatch `ctx.id.name`). With the explicit `id`, the facet has its own native `ctx.id.name`, no `setName()` is needed, and cold wakes recover the name automatically (the factory re-runs and `idFromName(facetName)` is deterministic). PartyServer may also persist that native name as an alarm fallback for older compatibility dates. Plain strings (`id: "child-foo"`) are NOT a substitute for `idFromName(facetName)` — workerd treats a string `id` as `idFromString`-like and the resulting facet has no `ctx.id.name`, so `this.name` will throw. diff --git a/packages/partyserver/src/index.ts b/packages/partyserver/src/index.ts index 35f7a819..933eb3e0 100644 --- a/packages/partyserver/src/index.ts +++ b/packages/partyserver/src/index.ts @@ -548,22 +548,18 @@ export class Server< * Read the legacy `__ps_name` storage record as a fallback source of * `this.name` when `ctx.id.name` is unavailable. Covers: * - * 1. Pre-2026-03-15 alarms, which fire without `ctx.id.name` - * populated on the alarm handler (see the Durable Objects - * ID docs: https://developers.cloudflare.com/durable-objects/api/id/#name). + * 1. Alarm handlers firing on alarm records that were scheduled by + * a workerd version that did not yet persist `name` into the + * alarm record (see the Durable Objects ID docs: + * https://developers.cloudflare.com/durable-objects/api/id/#name). + * The runtime contract for current workerd populates `ctx.id.name` + * in alarm handlers — see the "Raw runtime contract" tests — so + * this fallback exists primarily for stale on-disk alarm records + * and for defense-in-depth against future runtime changes. * 2. Legacy framework-level bootstrap patterns that write * `__ps_name` directly (or call `setName()`) before triggering * `__unsafe_ensureInitialized()` — typically DOs addressed via * `idFromString()` / `newUniqueId()` plus a name override. - * - * PartyServer no longer writes this record itself. Everything that - * reads it is reading something written by an older version of - * PartyServer or by a framework that embeds it. - * - * Not relevant to Cloudflare Agents facets — the recommended - * facet pattern passes an explicit `id` in `FacetStartupOptions`, - * so the facet has its own `ctx.id.name` and never hits this - * fallback. See the README for the full pattern. */ async #hydrateNameFromLegacyStorage(): Promise { if (this.#_name) return; @@ -573,6 +569,17 @@ export class Server< } } + async #persistNameFallbackFromCtxId(): Promise { + const ctxName = this.ctx.id.name; + if (ctxName === undefined || this.#_name) return; + + const stored = await this.ctx.storage.get(NAME_STORAGE_KEY); + if (stored !== ctxName) { + await this.ctx.storage.put(NAME_STORAGE_KEY, ctxName); + } + this.#_name = ctxName; + } + /** * @internal — Do not use directly. This is an escape hatch for frameworks * (like Agents) that receive calls via native DO RPC, bypassing the @@ -587,13 +594,15 @@ export class Server< async #ensureInitialized(): Promise { if (this.#status === "started") return; - // Name resolution fallback. The happy path (DO addressed via - // idFromName/getByName) short-circuits here because `ctx.id.name` - // is already populated — no storage read. The slow path covers - // pre-2026-03-15 alarms and framework bootstrap patterns (e.g. - // Agents facets) that write `__ps_name` directly before the - // first `onStart()` runs. - if (!this.ctx.id.name && !this.#_name) { + // Persist a fallback record for name-based DOs before user startup + // code can schedule alarms. Current workerd populates `ctx.id.name` + // in alarm handlers, but stale on-disk alarm records scheduled by + // older workerd versions do not, and we want recovery from those + // without requiring users to wipe `.wrangler/state` or to reschedule + // alarms from a fetch handler. See cloudflare/partykit#390. + if (this.ctx.id.name !== undefined) { + await this.#persistNameFallbackFromCtxId(); + } else if (!this.#_name) { await this.#hydrateNameFromLegacyStorage(); } @@ -655,10 +664,9 @@ export class Server< * This is available inside every entry point (including the constructor, * alarms, and hibernating websocket handlers). * - * For the narrow case of alarms that were scheduled before 2026-03-15 - * (where `ctx.id.name` is undefined inside the alarm handler), the name - * is recovered from a legacy storage record written by older versions - * of PartyServer. See `alarm()`. + * For alarm handlers firing on stale on-disk alarm records from + * older workerd versions that didn't persist `name` into the alarm + * record, the name is recovered from a storage fallback record. * * Throws if neither source is available — typically this means the DO * was addressed via `idFromString()` or `newUniqueId()`, which is not @@ -669,7 +677,7 @@ export class Server< if (ctxName !== undefined) return ctxName; if (this.#_name) return this.#_name; throw new Error( - `Attempting to read .name on ${this.#ParentClass.name}, but this.ctx.id.name is not set. PartyServer requires DOs to be addressed via idFromName()/getByName(). If this is a legacy alarm scheduled before 2026-03-15, reschedule it from a fetch handler to restore the name.` + `Attempting to read .name on ${this.#ParentClass.name}, but this.ctx.id.name is not set and no ${NAME_STORAGE_KEY} fallback record is available. PartyServer requires DOs to be addressed via idFromName()/getByName(), or explicitly bootstrapped with setName() when using idFromString()/newUniqueId(). If this happens in an alarm handler firing on a stale alarm record, initialize the DO from a fetch/RPC entry point first so PartyServer can persist the fallback name.` ); } @@ -688,7 +696,9 @@ export class Server< * * For DOs addressed via `idFromName()` / `getByName()`, calling * `setName()` is redundant — `this.name` is available automatically - * from `ctx.id.name`. Throws if `name` does not match `ctx.id.name`. + * from `ctx.id.name`. The normal initialization path also persists + * a fallback record so old-compat alarm handlers can recover the name. + * Throws if `name` does not match `ctx.id.name`. * * **Not appropriate for facets.** Cloudflare Agents and any other * framework using `ctx.facets.get(...)` should pass an explicit diff --git a/packages/partyserver/src/tests/index.test.ts b/packages/partyserver/src/tests/index.test.ts index 15aa48ed..57bca01c 100644 --- a/packages/partyserver/src/tests/index.test.ts +++ b/packages/partyserver/src/tests/index.test.ts @@ -578,6 +578,7 @@ describe("Name resolution", () => { ) ); expect(await setupRes.text()).toBe("alarm set"); + await expect(stub.readStoredName()).resolves.toBe("alarm-name-normal"); const ran = await runDurableObjectAlarm(stub); expect(ran).toBe(true); @@ -684,16 +685,18 @@ describe("Name resolution", () => { // name. Consumers reading this.name from inside an implicit-id // facet will see the wrong value. expect(result.facet.name).toBe(parentName); - // No storage record is involved — the implicit-id path doesn't - // touch __ps_name. - expect(result.facet.storedName).toBeUndefined(); + // PartyServer persists the native ctx.id.name as an alarm fallback. + // For implicit-id facets that is the parent's name, which is another + // reason this flow is not recommended. + expect(result.facet.storedName).toBe(parentName); }); - it("facet WITH explicit id survives cold wake without any storage hydrate (ctx.id.name is the source of truth)", async () => { + it("facet WITH explicit id survives cold wake with its own persisted fallback", async () => { // Variant of the explicit-id path that exercises cold wake. // The factory passed to ctx.facets.get() runs again on resume, // and idFromName(facetName) is deterministic, so the resumed - // facet gets the same ctx.id.name. No storage record needed. + // facet gets the same ctx.id.name. PartyServer also persists it + // so old-compat alarm handlers have a fallback. const parentName = "facet-parent-" + Math.random().toString(36).slice(2); const facetName = "facet-child-" + Math.random().toString(36).slice(2); @@ -701,13 +704,11 @@ describe("Name resolution", () => { const stub = env.FacetParent.get(id); await stub.spawnWithExplicitId(facetName, "env-namespace"); - // No __ps_name was written, so the only thing that can carry - // the name across eviction is the deterministic id factory. - // This implicitly tests that it works. + // The fallback record is populated from the facet's own explicit id. const result = await stub.spawnWithExplicitId(facetName, "env-namespace"); expect(result.facet.name).toBe(facetName); expect(result.facet.ctxIdName).toBe(facetName); - expect(result.facet.storedName).toBeUndefined(); + expect(result.facet.storedName).toBe(facetName); }); it("facet WITH explicit id (FacetStartupOptions.id) gets its own ctx.id.name — no setName needed", async () => { @@ -716,7 +717,7 @@ describe("Name resolution", () => { // `ctx.id` reflects that id rather than inheriting the parent's. // This is the recommended pattern — the facet gets a real // `ctx.id.name === facetName` and partyserver's `name` getter - // returns it without any setName/__ps_name override mechanism. + // returns it without any setName override mechanism. // // The id can be constructed from any DurableObjectNamespace. // Two ergonomic options: @@ -758,7 +759,7 @@ describe("Name resolution", () => { // env-namespace: works. expect(fromEnvNs.name).toBe(`${facetName}-env-namespace`); expect(fromEnvNs.ctxIdName).toBe(`${facetName}-env-namespace`); - expect(fromEnvNs.storedName).toBeUndefined(); + expect(fromEnvNs.storedName).toBe(`${facetName}-env-namespace`); expect(fromEnvNs.onStartName).toBe(`${facetName}-env-namespace`); // ctx-exports-namespace: also works, no env knowledge needed. @@ -767,7 +768,9 @@ describe("Name resolution", () => { expect(fromCtxExportsNs.ctxIdName).toBe( `${facetName}-ctx-exports-namespace` ); - expect(fromCtxExportsNs.storedName).toBeUndefined(); + expect(fromCtxExportsNs.storedName).toBe( + `${facetName}-ctx-exports-namespace` + ); expect(fromCtxExportsNs.onStartName).toBe( `${facetName}-ctx-exports-namespace` ); @@ -887,20 +890,19 @@ describe("setName() as bootstrap API for non-idFromName DOs", () => { expect(data.name).toBe("setname-coldwake-test"); }); - it("does NOT write storage when the DO was addressed via idFromName", async () => { - // For normal idFromName DOs, ctx.id.name carries the name and - // setName() is redundant. It must not write `__ps_name` to - // storage in this path — that would re-introduce the per-setName - // storage write that 0.5.0 eliminated. - const id = env.SetNameBootstrapServer.idFromName("setname-no-write"); + it("persists a fallback when the DO was addressed via idFromName", async () => { + // For normal idFromName DOs, ctx.id.name carries the name. PartyServer + // also writes a fallback before onStart so old-compat alarm handlers + // can recover the name after a cold wake. + const id = env.SetNameBootstrapServer.idFromName("setname-fallback-write"); const stub = env.SetNameBootstrapServer.get(id); - // Calling setName with the matching name is a no-op on storage; - // it just runs onStart. - await stub.setName("setname-no-write"); + // Calling setName with the matching name is redundant, but still + // triggers initialization and the fallback persistence path. + await stub.setName("setname-fallback-write"); const stored = await stub.readStoredName(); - expect(stored).toBeUndefined(); + expect(stored).toBe("setname-fallback-write"); }); }); @@ -948,6 +950,64 @@ describe("Framework bootstrap fallback (Agents facets etc.)", () => { }); }); +describe("Raw runtime contract: ctx.id.name in alarms vs. compatibility_date", () => { + // The test runtime's `wrangler.jsonc` declares + // `compatibility_date: "2026-01-28"`, which is BEFORE the + // 2026-03-15 date the Cloudflare DO docs cite for when alarms + // started persisting `name`: + // https://developers.cloudflare.com/durable-objects/api/id/#name + // + // This test probes a raw `DurableObject` (no PartyServer wrapping) + // to pin the actual workerd contract. As of the workerd version + // used by this test pool, `ctx.id.name` IS available in `alarm()` + // for an idFromName-addressed DO under compat_date 2026-01-28 — + // i.e., the local runtime does not appear to gate alarm-name + // propagation by compat date. If this test ever fails, the runtime + // contract has changed and PartyServer's fallback becomes + // load-bearing rather than defensive. Either way, the fallback + // protects us from the empirically-observed failure mode reported + // in cloudflare/partykit#390. + it("ctx.id.name is available in fetch() and alarm() for idFromName DOs (compat 2026-01-28)", async () => { + const name = "raw-alarm-" + Math.random().toString(36).slice(2); + const id = env.RawAlarmDO.idFromName(name); + const stub = env.RawAlarmDO.get(id); + + const fetchRes = await stub.fetch(new Request("http://example.com/")); + const fetchData = (await fetchRes.json()) as { + fetchCtxIdName: string | undefined; + alarmCtxIdName: string | undefined | null; + }; + expect(fetchData.fetchCtxIdName).toBe(name); + // alarm() hasn't fired yet; sentinel is null so we can distinguish + // "didn't run" from "ran with undefined". + expect(fetchData.alarmCtxIdName).toBeNull(); + + const ran = await runDurableObjectAlarm(stub); + expect(ran).toBe(true); + + const snap = await stub.snapshot(); + expect(snap.fetchCtxIdName).toBe(name); + expect(snap.alarmCtxIdName).toBe(name); + }); + + it("ctx.id.name is undefined in alarm() for newUniqueId DOs (no name to propagate)", async () => { + // Sanity check: when there is genuinely no name to begin with, + // ctx.id.name remains undefined throughout — confirming that the + // previous test isn't trivially passing because workerd is + // synthesizing a name from somewhere else. + const id = env.RawAlarmDO.newUniqueId(); + const stub = env.RawAlarmDO.get(id); + + await stub.fetch(new Request("http://example.com/")); + const ran = await runDurableObjectAlarm(stub); + expect(ran).toBe(true); + + const snap = await stub.snapshot(); + expect(snap.fetchCtxIdName).toBeUndefined(); + expect(snap.alarmCtxIdName).toBeUndefined(); + }); +}); + describe("Legacy fallbacks", () => { it("reads __ps_name from storage when ctx.id.name is undefined in an alarm", async () => { // Simulates the pre-2026-03-15 alarm migration scenario: an alarm was diff --git a/packages/partyserver/src/tests/worker.ts b/packages/partyserver/src/tests/worker.ts index 8d43d9a8..3ef10b18 100644 --- a/packages/partyserver/src/tests/worker.ts +++ b/packages/partyserver/src/tests/worker.ts @@ -1,3 +1,5 @@ +import { DurableObject } from "cloudflare:workers"; + import { routePartykitRequest, Server } from "../index"; import type { Connection, ConnectionContext, WSMessage } from "../index"; @@ -35,6 +37,7 @@ export type Env = { FacetParent: DurableObjectNamespace; // FacetChild has no binding — it's reached via ctx.facets.get() from // FacetParent's isolate, just like Cloudflare Agents sub-agents. + RawAlarmDO: DurableObjectNamespace; }; export class Stateful extends Server { @@ -231,6 +234,10 @@ export class AlarmNameServer extends Server { } } + async readStoredName(): Promise { + return this.ctx.storage.get("__ps_name"); + } + async onRequest(request: Request): Promise { const url = new URL(request.url); if (url.searchParams.get("setAlarm")) { @@ -805,6 +812,43 @@ export class FacetParent extends Server { } } +/** + * Raw `DurableObject` fixture (no PartyServer wrapping) used to probe + * the underlying runtime contract for `ctx.id.name`. Schedules an alarm + * during `fetch()` and records the value of `ctx.id.name` observed in + * both `fetch()` and `alarm()`. The compat date for the test runtime + * (`packages/partyserver/src/tests/wrangler.jsonc`) is `2026-01-28`, + * which is BEFORE the `2026-03-15` cutoff at which workerd starts + * persisting `name` into alarm records. + */ +export class RawAlarmDO extends DurableObject { + fetchCtxIdName: string | undefined; + alarmCtxIdName: string | undefined | null = null; + + async fetch(_request: Request): Promise { + this.fetchCtxIdName = this.ctx.id.name; + await this.ctx.storage.setAlarm(Date.now() + 60_000); + return Response.json({ + fetchCtxIdName: this.fetchCtxIdName, + alarmCtxIdName: this.alarmCtxIdName + }); + } + + async alarm(): Promise { + this.alarmCtxIdName = this.ctx.id.name; + } + + async snapshot(): Promise<{ + fetchCtxIdName: string | undefined; + alarmCtxIdName: string | undefined | null; + }> { + return { + fetchCtxIdName: this.fetchCtxIdName, + alarmCtxIdName: this.alarmCtxIdName + }; + } +} + export default { async fetch(request: Request, env: Env, _ctx: ExecutionContext) { const url = new URL(request.url); diff --git a/packages/partyserver/src/tests/wrangler.jsonc b/packages/partyserver/src/tests/wrangler.jsonc index 89b8c285..54aa40ef 100644 --- a/packages/partyserver/src/tests/wrangler.jsonc +++ b/packages/partyserver/src/tests/wrangler.jsonc @@ -106,6 +106,10 @@ { "name": "FacetParent", "class_name": "FacetParent" + }, + { + "name": "RawAlarmDO", + "class_name": "RawAlarmDO" } ] }, @@ -137,7 +141,8 @@ "FacetLikeBootstrapServer", "NameInConstructorServer", "FacetParent", - "FacetChild" + "FacetChild", + "RawAlarmDO" ] } ]