From 631d24344adecfbb507e70fb61a1e9419d3f8f02 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 29 Mar 2026 11:34:33 +0000 Subject: [PATCH 01/26] Add willow-actor design spec for cross-platform actor system Formalizes the ad-hoc channel/task patterns scattered across five crates into a single reusable actor crate targeting both native (tokio) and WASM (wasm-bindgen-futures). Covers typed mailboxes, request-reply, supervision, streams, intervals, and a phased migration path. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- crates/actor/DESIGN.md | 516 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 516 insertions(+) create mode 100644 crates/actor/DESIGN.md diff --git a/crates/actor/DESIGN.md b/crates/actor/DESIGN.md new file mode 100644 index 00000000..45b8d0ab --- /dev/null +++ b/crates/actor/DESIGN.md @@ -0,0 +1,516 @@ +# willow-actor Design Spec + +## Problem + +Willow has five different channel/concurrency patterns across its crates: + +| Layer | Channels | Target | +|-------|----------|--------| +| Bevy bridge | `std::sync::mpsc` | native | +| libp2p node | `tokio::sync::mpsc` (native) / `futures::channel::mpsc` (WASM) | both | +| Client lib | `futures::channel::mpsc` | both | +| Worker actors | `tokio::sync::mpsc` + `oneshot` + `watch` | native only | +| Web UI | `futures::channel::mpsc` + `spawn_local` | WASM only | + +The worker crate already uses an actor pattern (state, network, heartbeat, +sync actors communicating via channels), but it's hand-rolled, tokio-only, +and not reusable. Every other crate reinvents the same pattern: spawn a +task, create channels, loop on `select!`, handle shutdown. + +`willow-actor` formalizes this into a single crate that works on both +native and WASM, eliminating the per-crate boilerplate while preserving +the existing architecture's strengths. + +## Goals + +1. **Dual-target**: native (tokio) + WASM (wasm-bindgen-futures), single API +2. **Typed mailboxes**: each actor defines its message type, no `Box` +3. **Request-reply**: first-class `ask()` with typed responses, no manual oneshot wiring +4. **Supervision**: restart policies for crashed actors (native), error propagation (WASM) +5. **Lightweight**: no `Arc>` in the hot path, no dynamic dispatch on send +6. **Incremental adoption**: existing crates can migrate one actor at a time + +## Non-Goals + +- Distributed actors / remote messaging (libp2p handles that) +- Actor persistence / event sourcing (willow-state handles that) +- Replacing Bevy's ECS (the bridge stays, but becomes thinner) + +## Core Types + +### Message Trait + +```rust +/// Marker trait for actor messages. Must be Send on native. +/// On WASM, Send is not required since everything is single-threaded. +pub trait Message: 'static + MaybeSend { + /// The response type for request-reply. Use `()` for fire-and-forget. + type Result: 'static + MaybeSend; +} +``` + +`MaybeSend` is a conditional trait alias: + +```rust +#[cfg(not(target_arch = "wasm32"))] +pub trait MaybeSend: Send {} +#[cfg(not(target_arch = "wasm32"))] +impl MaybeSend for T {} + +#[cfg(target_arch = "wasm32")] +pub trait MaybeSend {} +#[cfg(target_arch = "wasm32")] +impl MaybeSend for T {} +``` + +### Actor Trait + +```rust +/// An actor processes messages sequentially in its own task. +#[async_trait(?Send)] // ?Send for WASM compat +pub trait Actor: 'static + MaybeSend + Sized { + /// Called once when the actor starts, before processing messages. + async fn started(&mut self, ctx: &mut Context) {} + + /// Called when the actor is stopping (mailbox closed or explicit stop). + async fn stopped(&mut self) {} +} +``` + +### Handler Trait + +```rust +/// Implement Handler for each message type an actor accepts. +#[async_trait(?Send)] +pub trait Handler: Actor { + async fn handle(&mut self, msg: M, ctx: &mut Context) -> M::Result; +} +``` + +An actor can implement `Handler` for multiple message types. Each handler +is type-checked at compile time. + +### Context + +```rust +/// Provided to handlers — gives access to the actor's own address and system. +pub struct Context { + addr: Addr, + system: SystemHandle, + stop_flag: bool, +} + +impl Context { + /// Get this actor's own address (for self-sends or passing to children). + pub fn address(&self) -> Addr { ... } + + /// Spawn a child actor supervised by this actor. + pub fn spawn(&self, child: C) -> Addr { ... } + + /// Request a graceful stop after the current message finishes. + pub fn stop(&mut self) { ... } + + /// Access the actor system (for spawning unrelated actors). + pub fn system(&self) -> &SystemHandle { ... } +} +``` + +### Addr (Actor Address / Handle) + +```rust +/// Type-safe handle for sending messages to an actor. +/// Cheaply cloneable (wraps an Arc'd channel sender). +pub struct Addr { + tx: MessageSender, // platform-specific channel sender + _phantom: PhantomData, +} + +impl Addr { + /// Fire-and-forget: send a message, don't wait for a response. + /// Returns Err if the actor's mailbox is closed. + pub fn send(&self, msg: M) -> Result<(), SendError> + where + A: Handler, + M: Message, + { ... } + + /// Request-reply: send a message and await the response. + /// Returns a future that resolves to M::Result. + pub fn ask(&self, msg: M) -> impl Future> + where + A: Handler, + M: Message, + { ... } + + /// Check if the actor is still alive. + pub fn is_alive(&self) -> bool { ... } +} + +impl Clone for Addr { ... } +``` + +### AnyAddr (Type-Erased Address) + +For cases where you need to store addresses of different actor types +together (e.g. a supervisor tracking children): + +```rust +/// Type-erased actor address. Can send shutdown signals but not typed messages. +pub struct AnyAddr { ... } + +impl AnyAddr { + pub fn stop(&self) { ... } + pub fn is_alive(&self) -> bool { ... } +} + +impl From> for AnyAddr { ... } +``` + +### Recipient (Multi-Actor Message Target) + +For when multiple actor types handle the same message and you want to +abstract over the concrete actor: + +```rust +/// Type-erased handle that can send a specific message type. +/// Useful for pub-sub patterns where the sender doesn't know the actor type. +pub struct Recipient { + tx: Box>, +} + +impl Recipient { + pub fn send(&self, msg: M) -> Result<(), SendError> { ... } + pub fn ask(&self, msg: M) -> impl Future> { ... } +} + +impl From> for Recipient +where + A: Handler, + M: Message, +{ ... } +``` + +## Actor System + +```rust +/// The actor system — owns the runtime and tracks all top-level actors. +pub struct System { + handle: SystemHandle, +} + +/// Cheap cloneable handle into the system. +#[derive(Clone)] +pub struct SystemHandle { ... } + +impl System { + /// Create a new actor system. + pub fn new() -> Self { ... } + + /// Spawn a top-level actor and return its address. + pub fn spawn(&self, actor: A) -> Addr { ... } + + /// Spawn with a specific mailbox capacity (default: 256). + pub fn spawn_with_capacity(&self, actor: A, capacity: usize) -> Addr { ... } + + /// Get a handle that can be passed to other contexts. + pub fn handle(&self) -> SystemHandle { ... } + + /// Shut down all actors gracefully. + pub async fn shutdown(self) { ... } +} +``` + +## Platform Abstraction + +The crate uses a thin `runtime` module to abstract over native vs WASM: + +```rust +// crate::runtime (internal) + +/// Spawn a future as a background task. +pub fn spawn + MaybeSend + 'static>(fut: F) { + #[cfg(not(target_arch = "wasm32"))] + tokio::task::spawn(fut); + + #[cfg(target_arch = "wasm32")] + wasm_bindgen_futures::spawn_local(fut); +} + +/// One-shot channel (platform-specific). +pub fn oneshot() -> (OneshotTx, OneshotRx) { + #[cfg(not(target_arch = "wasm32"))] + { /* tokio::sync::oneshot */ } + + #[cfg(target_arch = "wasm32")] + { /* futures::channel::oneshot */ } +} + +/// Bounded MPSC channel. +pub fn channel(cap: usize) -> (Sender, Receiver) { + #[cfg(not(target_arch = "wasm32"))] + { /* tokio::sync::mpsc */ } + + #[cfg(target_arch = "wasm32")] + { /* futures::channel::mpsc */ } +} + +/// Sleep for a duration (native: tokio::time::sleep, WASM: gloo_timers). +pub async fn sleep(duration: Duration) { ... } +``` + +## Mailbox Internals + +Each actor gets a mailbox backed by a bounded MPSC channel. Messages are +type-erased inside the mailbox using a closure-based envelope pattern: + +```rust +// Internal — not part of the public API. + +type BoxEnvelope = Box) -> BoxFuture<'_, ()> + MaybeSend>; + +// When Addr.send(msg) is called for M where A: Handler: +// 1. msg is wrapped in an envelope closure +// 2. The closure calls A::handle(msg, ctx) when executed +// 3. For ask(), a oneshot sender is captured in the closure +// and the response is sent back through it +``` + +This means the channel carries `BoxEnvelope` — one channel per actor, +handling all message types. No dynamic dispatch on the sender side; the +dispatch happens once when the envelope is executed. + +## Supervision + +```rust +/// Restart policy for supervised actors. +pub enum RestartPolicy { + /// Never restart (default). Errors are logged. + Never, + /// Restart immediately on panic/error, up to `max` times. + OnFailure { max: u32 }, + /// Restart with exponential backoff. + Backoff { initial: Duration, max_delay: Duration, max_retries: u32 }, +} + +impl Context { + /// Spawn a supervised child actor. + pub fn spawn_supervised( + &self, + child: C, + policy: RestartPolicy, + ) -> Addr { ... } +} +``` + +On WASM, `RestartPolicy::OnFailure` and `Backoff` still work but panics +are caught via `std::panic::catch_unwind` only if the actor is +`UnwindSafe`. Otherwise, `Never` is the only safe option on WASM. + +## Streams + +Actors can subscribe to external event streams (e.g., network events, +timers) that feed into their mailbox: + +```rust +#[async_trait(?Send)] +pub trait StreamHandler: Actor { + async fn handle_stream_item(&mut self, item: S, ctx: &mut Context); + + /// Called when the stream ends. + async fn stream_finished(&mut self, _ctx: &mut Context) {} +} + +impl Context { + /// Attach a stream to this actor. Items are delivered as messages. + pub fn add_stream(&mut self, stream: St) + where + A: StreamHandler, + S: 'static + MaybeSend, + St: Stream + MaybeSend + 'static, + { ... } +} +``` + +## Intervals + +Built-in support for periodic ticks (replaces the manual +`tokio::select! + sleep` pattern in heartbeat/sync actors): + +```rust +impl Context { + /// Start a periodic interval. Delivers `Tick` messages to the actor. + /// Returns a handle that can cancel the interval. + pub fn run_interval>( + &mut self, + duration: Duration, + msg_factory: impl Fn() -> M + MaybeSend + 'static, + ) -> IntervalHandle + where + A: Handler, + { ... } +} + +pub struct IntervalHandle { ... } +impl IntervalHandle { + pub fn cancel(self) { ... } +} +``` + +## Error Handling + +```rust +#[derive(Debug, thiserror::Error)] +pub enum SendError { + #[error("actor mailbox is closed")] + Closed(M), + #[error("actor mailbox is full")] + Full(M), +} + +#[derive(Debug, thiserror::Error)] +pub enum AskError { + #[error("actor mailbox is closed")] + Closed, + #[error("actor did not respond (dropped the reply channel)")] + NoResponse, +} +``` + +## Migration Path + +### Phase 1: New crate, worker migration + +Create `crates/actor/` with the core types. Migrate the worker crate's +four actors to use `willow-actor`: + +**Before** (current `crates/worker/src/actors/state.rs`): +```rust +pub async fn run(mut role: Box, mut rx: mpsc::Receiver) { + while let Some(msg) = rx.recv().await { + match msg { + StateMsg::Event(event) => role.on_event(&event), + StateMsg::Request { req, reply } => { + let response = role.handle_request(req); + let _ = reply.send(response); + } + StateMsg::Shutdown => break, + } + } +} +``` + +**After**: +```rust +pub struct StateActor { + role: Box, +} + +impl Actor for StateActor {} + +impl Handler for StateActor { + async fn handle(&mut self, msg: EventMsg, _ctx: &mut Context) { + self.role.on_event(&msg.0); + } +} + +impl Handler for StateActor { + async fn handle(&mut self, msg: RequestMsg, _ctx: &mut Context) -> WorkerResponse { + self.role.handle_request(msg.0) + } +} +``` + +**Before** (current `crates/worker/src/runtime.rs`): +```rust +let (state_tx, state_rx) = mpsc::channel::(256); +let (network_tx, network_rx) = mpsc::channel::(256); +let (shutdown_tx, shutdown_rx) = watch::channel(false); + +let state_handle = tokio::spawn(state::run(role, state_rx)); +let heartbeat_handle = tokio::spawn(heartbeat::run(..., shutdown_rx.clone())); +// ... manual join + shutdown +``` + +**After**: +```rust +let system = System::new(); +let state_addr = system.spawn(StateActor { role }); +let network_addr = system.spawn(NetworkActor::new(node, events, state_addr.clone())); +let _heartbeat = system.spawn(HeartbeatActor::new(peer_id, state_addr.clone(), network_addr.clone())); +let _sync = system.spawn(SyncActor::new(peer_id, state_addr, network_addr)); + +tokio::signal::ctrl_c().await?; +system.shutdown().await; +``` + +### Phase 2: Client library + +Replace `ClientHandle`'s `futures::channel::mpsc` pair with actor addresses. +The `ClientEventLoop` becomes an actor with `StreamHandler`. + +### Phase 3: Network bridge + +The Bevy bridge becomes a thin adapter: a Bevy system polls a `Receiver` +that an actor feeds. The bridge actor replaces `run_network()`. + +### Phase 4: Web UI + +The Leptos event loop (`spawn_local` + `futures::channel::mpsc`) becomes +a `StreamHandler` on a UI actor. Signal updates happen in the handler. + +## Dependency Graph + +``` +willow-actor (new) +├── futures-core (Stream trait) +├── async-trait +├── thiserror +├── tracing +├── cfg-if +├── [native] tokio (spawn, mpsc, oneshot, sleep) +└── [wasm] wasm-bindgen-futures, futures-channel, gloo-timers +``` + +`willow-actor` has **no dependency on any other willow crate**. It is a +pure infrastructure crate. + +## Crate Structure + +``` +crates/actor/ +├── Cargo.toml +├── DESIGN.md (this file) +└── src/ + ├── lib.rs — public API re-exports + ├── actor.rs — Actor, Handler, StreamHandler traits + ├── addr.rs — Addr, AnyAddr, Recipient + ├── context.rs — Context, interval, stream attachment + ├── envelope.rs — BoxEnvelope, type-erased message dispatch + ├── mailbox.rs — bounded channel wrapper, recv loop + ├── message.rs — Message trait, MaybeSend + ├── runtime.rs — platform abstraction (spawn, channel, sleep) + ├── supervisor.rs — RestartPolicy, supervised spawn + ├── system.rs — System, SystemHandle + └── error.rs — SendError, AskError +``` + +## Open Questions + +1. **Backpressure policy**: When a mailbox is full, should `send()` drop + the message (lossy), block (native only), or return an error? Current + design returns `SendError::Full`. An `async fn send_async()` that + awaits capacity could be added for native. + +2. **Priority messages**: Should shutdown/stop bypass the queue? Current + design: no, messages are FIFO. Shutdown is just another message. The + `Context::stop()` flag is checked between messages. + +3. **Actor state snapshots**: Should there be a way to query an actor's + internal state for debugging/metrics? Could add an optional + `Inspect` trait that serializes state, but this risks breaking + encapsulation. + +4. **Bounded vs unbounded mailboxes**: The current network layers use + unbounded channels to avoid dropping gossipsub messages. Should + `System::spawn_unbounded()` be offered? Probably yes, with a lint + warning in docs. From cedd5855af57394c553d91af80dc299e5f078d86 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 29 Mar 2026 11:55:18 +0000 Subject: [PATCH 02/26] Add willow-actor design spec to superpowers/specs Design spec for a cross-platform actor system crate targeting both native (tokio) and WASM (wasm-bindgen-futures). Also documents the docs/superpowers/ directory in CLAUDE.md so future specs land there. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- CLAUDE.md | 4 + .../specs/2026-03-29-actor-system-design.md | 82 ++++++++++--------- 2 files changed, 46 insertions(+), 40 deletions(-) rename crates/actor/DESIGN.md => docs/superpowers/specs/2026-03-29-actor-system-design.md (98%) diff --git a/CLAUDE.md b/CLAUDE.md index a4c5cd16..f86a41d7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -46,6 +46,10 @@ crates/ ├── channels.rs — Channel/role/member management, invites, trust ├── settings.rs — Settings view systems └── files.rs — File picker systems + +docs/superpowers/ +├── specs/ — Design specs for new features and architecture changes +└── plans/ — Implementation plans referencing the specs ``` ## Build & Test diff --git a/crates/actor/DESIGN.md b/docs/superpowers/specs/2026-03-29-actor-system-design.md similarity index 98% rename from crates/actor/DESIGN.md rename to docs/superpowers/specs/2026-03-29-actor-system-design.md index 45b8d0ab..04612f93 100644 --- a/crates/actor/DESIGN.md +++ b/docs/superpowers/specs/2026-03-29-actor-system-design.md @@ -1,6 +1,9 @@ -# willow-actor Design Spec +# Actor System Design Spec -## Problem +**Date**: 2026-03-29 +**Status**: Draft + +## Overview Willow has five different channel/concurrency patterns across its crates: @@ -338,8 +341,8 @@ Built-in support for periodic ticks (replaces the manual ```rust impl Context { - /// Start a periodic interval. Delivers `Tick` messages to the actor. - /// Returns a handle that can cancel the interval. + /// Start a periodic interval. Delivers messages to the actor + /// on each tick. Returns a handle that can cancel the interval. pub fn run_interval>( &mut self, duration: Duration, @@ -376,6 +379,41 @@ pub enum AskError { } ``` +## Crate Structure + +``` +crates/actor/ +├── Cargo.toml +└── src/ + ├── lib.rs — public API re-exports + ├── actor.rs — Actor, Handler, StreamHandler traits + ├── addr.rs — Addr, AnyAddr, Recipient + ├── context.rs — Context, interval, stream attachment + ├── envelope.rs — BoxEnvelope, type-erased message dispatch + ├── mailbox.rs — bounded channel wrapper, recv loop + ├── message.rs — Message trait, MaybeSend + ├── runtime.rs — platform abstraction (spawn, channel, sleep) + ├── supervisor.rs — RestartPolicy, supervised spawn + ├── system.rs — System, SystemHandle + └── error.rs — SendError, AskError +``` + +## Dependency Graph + +``` +willow-actor (new) +├── futures-core (Stream trait) +├── async-trait +├── thiserror +├── tracing +├── cfg-if +├── [native] tokio (spawn, mpsc, oneshot, sleep) +└── [wasm] wasm-bindgen-futures, futures-channel, gloo-timers +``` + +`willow-actor` has **no dependency on any other willow crate**. It is a +pure infrastructure crate. + ## Migration Path ### Phase 1: New crate, worker migration @@ -458,42 +496,6 @@ that an actor feeds. The bridge actor replaces `run_network()`. The Leptos event loop (`spawn_local` + `futures::channel::mpsc`) becomes a `StreamHandler` on a UI actor. Signal updates happen in the handler. -## Dependency Graph - -``` -willow-actor (new) -├── futures-core (Stream trait) -├── async-trait -├── thiserror -├── tracing -├── cfg-if -├── [native] tokio (spawn, mpsc, oneshot, sleep) -└── [wasm] wasm-bindgen-futures, futures-channel, gloo-timers -``` - -`willow-actor` has **no dependency on any other willow crate**. It is a -pure infrastructure crate. - -## Crate Structure - -``` -crates/actor/ -├── Cargo.toml -├── DESIGN.md (this file) -└── src/ - ├── lib.rs — public API re-exports - ├── actor.rs — Actor, Handler, StreamHandler traits - ├── addr.rs — Addr, AnyAddr, Recipient - ├── context.rs — Context, interval, stream attachment - ├── envelope.rs — BoxEnvelope, type-erased message dispatch - ├── mailbox.rs — bounded channel wrapper, recv loop - ├── message.rs — Message trait, MaybeSend - ├── runtime.rs — platform abstraction (spawn, channel, sleep) - ├── supervisor.rs — RestartPolicy, supervised spawn - ├── system.rs — System, SystemHandle - └── error.rs — SendError, AskError -``` - ## Open Questions 1. **Backpressure policy**: When a mailbox is full, should `send()` drop From 62c919812615900279e6a370f374c8e9d506d358 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 29 Mar 2026 12:14:05 +0000 Subject: [PATCH 03/26] Add prior art survey to actor system design spec Evaluated 8 existing Rust actor crates for WASM compatibility. Only xtra and xtor support wasm32-unknown-unknown. xtra is the closest match but has hard Send bounds, no supervision, and no interval support. Documents the rationale for building willow-actor rather than adopting an existing crate. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- .../specs/2026-03-29-actor-system-design.md | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/docs/superpowers/specs/2026-03-29-actor-system-design.md b/docs/superpowers/specs/2026-03-29-actor-system-design.md index 04612f93..7aefa1ac 100644 --- a/docs/superpowers/specs/2026-03-29-actor-system-design.md +++ b/docs/superpowers/specs/2026-03-29-actor-system-design.md @@ -3,6 +3,79 @@ **Date**: 2026-03-29 **Status**: Draft +## Prior Art + +A survey of existing Rust actor crates was conducted to determine whether +an off-the-shelf solution could be adopted. Summary: + +| Crate | Version | WASM | Send req | Supervision | Status | +|-------|---------|------|----------|-------------|--------| +| **actix** | 0.13.5 | No | Send+Sync | Yes | Maintained, stalled | +| **ractor** | 0.15.12 | No | Send+Sync+'static | Yes (Erlang-style) | Active | +| **kameo** | 0.19.2 | No | Send+Sync | Yes | Active | +| **coerce** | 0.8.11 | No | Send+Sync | Yes | Active | +| **xactor** | 0.7.11 | No | Send | No | Dormant (2022) | +| **stakker** | 0.2.14 | No | — (sync) | No | Low activity | +| **xtor** | 0.9.10 | Yes (feature) | Send | Yes | Dormant (2022) | +| **xtra** | 0.6.0 | Yes (feature) | Send+'static | No | Low activity (2024) | + +### xtra — closest match + +[xtra](https://github.com/Restioson/xtra) (83k downloads, MPL-2.0) is +the closest existing solution. It provides: + +- **Multi-runtime**: tokio, async-std, smol, and `wasm_bindgen` via + feature flags. WASM spawns use `wasm_bindgen_futures::spawn_local`. +- **Typed handlers**: `Handler` trait with `type Return`, async + `handle()` method. Request-reply via `Address::send()` returning a + `SendFuture` that resolves to the handler's return value. +- **Lightweight**: core deps are `catty`, `futures-core`, `event-listener`, + `spin`. No proc macros required (optional `xtra-macros`). +- **Actor lifecycle**: `started(&mut self, &Mailbox)` and + `stopped(self) -> Self::Stop`. +- **Address/Mailbox split**: `Address` for sending, `Mailbox` for + the actor's receive loop. + +**Why not adopt xtra directly:** + +1. **Hard `Send` bound on `Actor`**: The `Actor` trait requires + `Send + 'static`. On WASM, all types are trivially `Send` (single + thread), but this forces `Send` constraints to propagate through the + entire Willow type graph. Many Willow types use `Rc>` in + WASM paths (e.g., `ClientHandle.shared`), which are not `Send`. + Switching to `Arc>` everywhere adds overhead on WASM for no + benefit. +2. **No supervision**: No restart policies or supervisor trees. Actors + that panic are simply gone. +3. **No `Recipient` / type-erased message targets**: xtra has + `MessageChannel` but it's less ergonomic than a standalone + `Recipient` type for pub-sub patterns. +4. **No interval support**: No built-in periodic tick mechanism. The + heartbeat/sync actors would still need manual timer loops. +5. **No `MaybeSend` pattern**: The `Send` bound is unconditional. Our + design needs conditional `Send` to avoid unnecessary synchronization + on WASM. +6. **Low activity**: Last release Feb 2024, limited maintenance signal. + +### xtor — explicit WASM feature + +[xtor](https://github.com/nicktqwewe/xtor) supports WASM via a +`wasm_bindgen` feature flag and multiple runtimes. However: +- Last updated May 2022, only 2.8k downloads +- No supervision +- Thin documentation +- Unclear maintenance future + +### Recommendation: build `willow-actor` + +No existing crate satisfies all requirements (dual-target with +conditional Send, supervision, intervals, stream handlers, lightweight). +The design below draws on xtra's `Handler` pattern and envelope-based +mailbox while adding `MaybeSend`, supervision, intervals, and +`Recipient`. + +--- + ## Overview Willow has five different channel/concurrency patterns across its crates: From 5b5a4c23d4499d2dc4f2616958b446b101ecab85 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 29 Mar 2026 15:32:31 +0000 Subject: [PATCH 04/26] Expand prior art with ractor WASM findings and kameo API analysis ractor has first-class WASM support via tokio_with_wasm but uses single-enum message types and requires Send+Sync. kameo has the best API shape (per-message handlers, ask/tell) but no WASM runtime. Design combines xtra/kameo's Handler pattern with ractor's platform abstraction approach, adding MaybeSend and supervision. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- .../specs/2026-03-29-actor-system-design.md | 113 +++++++++++++----- 1 file changed, 86 insertions(+), 27 deletions(-) diff --git a/docs/superpowers/specs/2026-03-29-actor-system-design.md b/docs/superpowers/specs/2026-03-29-actor-system-design.md index 7aefa1ac..f4b31fb7 100644 --- a/docs/superpowers/specs/2026-03-29-actor-system-design.md +++ b/docs/superpowers/specs/2026-03-29-actor-system-design.md @@ -8,27 +8,72 @@ A survey of existing Rust actor crates was conducted to determine whether an off-the-shelf solution could be adopted. Summary: -| Crate | Version | WASM | Send req | Supervision | Status | -|-------|---------|------|----------|-------------|--------| -| **actix** | 0.13.5 | No | Send+Sync | Yes | Maintained, stalled | -| **ractor** | 0.15.12 | No | Send+Sync+'static | Yes (Erlang-style) | Active | -| **kameo** | 0.19.2 | No | Send+Sync | Yes | Active | -| **coerce** | 0.8.11 | No | Send+Sync | Yes | Active | -| **xactor** | 0.7.11 | No | Send | No | Dormant (2022) | -| **stakker** | 0.2.14 | No | — (sync) | No | Low activity | -| **xtor** | 0.9.10 | Yes (feature) | Send | Yes | Dormant (2022) | -| **xtra** | 0.6.0 | Yes (feature) | Send+'static | No | Low activity (2024) | - -### xtra — closest match +| Crate | Version | WASM | Send req | Supervision | Handlers | Status | +|-------|---------|------|----------|-------------|----------|--------| +| **ractor** | 0.15.12 | **Yes** (`tokio_with_wasm`) | Send+Sync | Yes (Erlang) | Single `Msg` enum | Active (2026-03) | +| **kameo** | 0.19.2 | No (tokio) | Send | Yes (OneForOne) | Per-message `Message` | Active (2025-11) | +| **actix** | 0.13.5 | No (tokio) | Unpin (no Send) | Basic | Per-message `Handler` | Passive | +| **xtra** | 0.6.0 | **Yes** (`wasm_bindgen`) | Send | No | Per-message `Handler` | Low (2024-02) | +| **coerce** | 0.8.11 | No (tokio full) | Send+Sync | Yes | Per-message `Handler` | Dormant (2023) | +| **xactor** | 0.7.11 | No | Send | No | Per-message `Handler` | Dead (2020) | +| **xtor** | 0.9.10 | **Yes** (`wasm_bindgen`) | Send | Yes | Per-message handler | Dead (2022) | +| **stakker** | 0.2.14 | Provisional | Not Send | No | Macro-based callbacks | Niche | + +### ractor — first-class WASM, Erlang-style + +[ractor](https://github.com/slawlor/ractor) (546k downloads, MIT) is +the most actively maintained option and has **first-class WASM support** +with 84 passing browser tests. Key details: + +- **WASM runtime**: Uses `tokio_with_wasm` (a shim that provides + tokio-compatible channels/spawn/timers on `wasm32-unknown-unknown` + backed by the JS event loop). Platform abstraction lives in a + `concurrency` module with three backends: `tokio_primitives`, + `async_std_primitives`, `wasm_browser_primitives`. +- **Erlang-style API**: Each actor declares a single `type Msg` enum. + The `handle()` method pattern-matches on it. State is separated from + the handler (`&self` + `&mut State`). +- **Supervision**: `spawn_linked()` establishes parent-child links. + `SupervisionEvent` notifies parents of child panics/deaths. No + built-in restart policies — left to the `handle_supervisor_evt` impl + (like Erlang's custom supervisor). +- **Request-reply**: `RpcReplyPort` for typed replies. `call()` and + `cast()` for ask/tell patterns. + +**Why not adopt ractor directly:** + +1. **Single-enum message type**: `type Msg: Message` requires one enum + per actor for all message types. This means every actor needs a + hand-written `match` over its message enum in `handle()`, and adding + a new message type requires modifying the enum + the match arm. With + per-message `Handler` traits, new message types are additive (just + implement another trait). For Willow's actors that handle 5-10+ + message types each, the enum approach produces large match blocks. +2. **Separated `&self` + `&mut State`**: The actor handler is immutable; + mutable state lives in a separate `State` type. This is idiomatic + Erlang but awkward in Rust — fields that logically belong together + (e.g., a `WorkerRole` + its config) are split across two types. +3. **Hard `Send + Sync` on `Actor`**: Requires all actor types to be + `Send + Sync`. This is stricter than necessary — actors are + single-owner by design, so `Sync` is never needed. +4. **`tokio_with_wasm` dependency**: Pulls in a full tokio-compatible + shim for WASM. Willow already uses `futures::channel::mpsc` and + `wasm_bindgen_futures::spawn_local` directly — adding another layer + of abstraction over tokio's API on WASM is unnecessary indirection. +5. **Heavy dependency tree**: `dashmap`, `bon`, `strum`, `once_cell`, + plus the full `tokio_with_wasm` crate on WASM. Willow's actor system + needs only channels, oneshot, and spawn. + +### xtra — per-message handlers, lightweight [xtra](https://github.com/Restioson/xtra) (83k downloads, MPL-2.0) is -the closest existing solution. It provides: +the closest match to our desired API shape: - **Multi-runtime**: tokio, async-std, smol, and `wasm_bindgen` via feature flags. WASM spawns use `wasm_bindgen_futures::spawn_local`. -- **Typed handlers**: `Handler` trait with `type Return`, async - `handle()` method. Request-reply via `Address::send()` returning a - `SendFuture` that resolves to the handler's return value. +- **Per-message `Handler`**: Each message type gets its own `Handler` + impl with `type Return`. Request-reply via `Address::send()` returning + a `SendFuture` that resolves to the handler's return value. - **Lightweight**: core deps are `catty`, `futures-core`, `event-listener`, `spin`. No proc macros required (optional `xtra-macros`). - **Actor lifecycle**: `started(&mut self, &Mailbox)` and @@ -57,22 +102,36 @@ the closest existing solution. It provides: on WASM. 6. **Low activity**: Last release Feb 2024, limited maintenance signal. -### xtor — explicit WASM feature +### kameo — best API shape, no WASM -[xtor](https://github.com/nicktqwewe/xtor) supports WASM via a -`wasm_bindgen` feature flag and multiple runtimes. However: -- Last updated May 2022, only 2.8k downloads -- No supervision -- Thin documentation -- Unclear maintenance future +[kameo](https://github.com/tqwewe/kameo) (190k downloads, MIT) has the +cleanest API design with per-message `Message` trait impls and +ask/tell naming: + +```rust +impl Message for MyActor { + type Reply = MyReply; + async fn handle(&mut self, msg: MyMsg, ctx: &mut Context<..>) -> Self::Reply; +} +``` + +It has OneForOne supervision, stream attachment, and actor linking. But +it depends on tokio directly with no WASM runtime support and no feature +flags for alternative runtimes. Would need forking to add WASM. ### Recommendation: build `willow-actor` No existing crate satisfies all requirements (dual-target with -conditional Send, supervision, intervals, stream handlers, lightweight). -The design below draws on xtra's `Handler` pattern and envelope-based -mailbox while adding `MaybeSend`, supervision, intervals, and -`Recipient`. +conditional Send, supervision, intervals, stream handlers, per-message +handlers). The design below combines: + +- **xtra/kameo's `Handler` pattern** — per-message-type trait impls + with typed returns, not a single enum +- **ractor's `concurrency` module approach** — platform-abstracted + spawn/channel/timer with cfg-switched backends +- **New: `MaybeSend`** — conditional `Send` bounds dropped on WASM +- **New: supervision, intervals, `Recipient`** — features missing + from xtra --- From 6c1dea87810958b86ac33fa1a170282e45205cde Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 29 Mar 2026 16:06:51 +0000 Subject: [PATCH 05/26] Add kameo fork feasibility analysis to actor system spec Audited kameo's tokio coupling: only 6 primitives across 4 files (~150-200 LOC to abstract). Fork is technically feasible but not clearly better than building from scratch (~1000-1500 LOC) given the Send bound mismatch, upstream merge burden, and extra deps. Also renamed "Prior Art" section to "Existing Solutions". https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- .../specs/2026-03-29-actor-system-design.md | 60 ++++++++++++++++++- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/docs/superpowers/specs/2026-03-29-actor-system-design.md b/docs/superpowers/specs/2026-03-29-actor-system-design.md index f4b31fb7..b60018d2 100644 --- a/docs/superpowers/specs/2026-03-29-actor-system-design.md +++ b/docs/superpowers/specs/2026-03-29-actor-system-design.md @@ -3,7 +3,7 @@ **Date**: 2026-03-29 **Status**: Draft -## Prior Art +## Existing Solutions A survey of existing Rust actor crates was conducted to determine whether an off-the-shelf solution could be adopted. Summary: @@ -117,7 +117,63 @@ impl Message for MyActor { It has OneForOne supervision, stream attachment, and actor linking. But it depends on tokio directly with no WASM runtime support and no feature -flags for alternative runtimes. Would need forking to add WASM. +flags for alternative runtimes. + +### kameo fork feasibility + +A source audit of kameo's tokio coupling reveals it is **shallow and +concentrated**. All tokio usage falls into 6 primitives across 4 files: + +| Primitive | Call sites | WASM replacement | +|-----------|-----------|------------------| +| `tokio::spawn` | 5 | `wasm_bindgen_futures::spawn_local` | +| `tokio::sync::mpsc` (bounded+unbounded) | 1 module (~15 method delegations) | `futures::channel::mpsc` | +| `tokio::sync::Mutex` | 1 | `futures::lock::Mutex` | +| `tokio::sync::SetOnce` | 2 | `OnceCell` or custom | +| `tokio::select!` | 1 | `futures::select!` | +| `tokio::runtime::Handle` | 1 (for `spawn_in_thread`) | `#[cfg(not(wasm32))]` gate | +| `task_local!` | 1 | `thread_local!` (WASM is single-threaded) | + +**Total estimated changes**: ~150-200 lines to introduce a `runtime` +abstraction module with `cfg(target_arch = "wasm32")` branches, plus +Cargo.toml feature flag changes. The supervision module has **zero** +production tokio usage. The mailbox module is the densest — it wraps +tokio mpsc types — but it's a clean 1:1 delegation layer that maps +directly to `futures::channel::mpsc`. + +**Challenges:** + +1. **`Send` bounds everywhere**: kameo requires `Actor: Send + 'static` + and all futures must be `Send`. On WASM this compiles (everything is + trivially Send on single-threaded targets) but it forces Willow types + that currently use `Rc>` to switch to `Arc>`. This + is a Willow-side change, not a kameo fork issue. +2. **`spawn_in_thread()`**: Uses `tokio::runtime::Handle::current()` and + `std::thread::spawn`. Must be `cfg`-gated out on WASM entirely. +3. **`blocking_send()` / `blocking_recv()`**: These tokio mpsc methods + have no WASM equivalent. Must be gated or removed on WASM. +4. **Minimum Rust 1.88.0**: kameo requires edition 2024 / Rust 1.88+. + Willow would need to match this MSRV. +5. **Upstream maintenance**: kameo is actively developed (v0.19.2, last + commit March 2026). Forking means maintaining divergence or getting + the runtime abstraction upstreamed. + +**Verdict: fork is feasible but not clearly better than writing our own.** + +The fork saves ~800 lines of actor machinery (mailbox, supervision, +actor lifecycle) but introduces: +- Ongoing merge burden with an actively evolving upstream +- The `Send` bound issue remains (kameo won't accept a `MaybeSend` + change upstream — it's a fundamental API decision) +- kameo's `remote` feature (libp2p-based distributed actors) would + conflict with Willow's existing libp2p networking layer +- kameo's dependency on `downcast-rs`, `dyn-clone`, `serde` (with + derive) adds weight Willow doesn't need + +Writing `willow-actor` from scratch is estimated at ~1000-1500 lines +for the core (message, actor, handler, addr, context, mailbox, envelope, +runtime, error modules). This is comparable to the fork effort when +accounting for the abstraction layer + ongoing maintenance cost. ### Recommendation: build `willow-actor` From cbc61ad1b350a32e5eae5de73f20c10bdb3d70d9 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 29 Mar 2026 16:15:09 +0000 Subject: [PATCH 06/26] Revise actor system spec for iroh compatibility If building on iroh, tokio is available on both native and WASM, which simplifies the design significantly: - Drop MaybeSend, use Send unconditionally (iroh requires it) - Use tokio channels directly on both targets (no futures::channel) - Adopt CancellationToken for shutdown (matches iroh's pattern) - Use RPITIT instead of async_trait (no proc macro needed) - Runtime module shrinks to just spawn() abstraction https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- .../specs/2026-03-29-actor-system-design.md | 220 +++++++++++------- 1 file changed, 133 insertions(+), 87 deletions(-) diff --git a/docs/superpowers/specs/2026-03-29-actor-system-design.md b/docs/superpowers/specs/2026-03-29-actor-system-design.md index b60018d2..dc3d498c 100644 --- a/docs/superpowers/specs/2026-03-29-actor-system-design.md +++ b/docs/superpowers/specs/2026-03-29-actor-system-design.md @@ -97,7 +97,7 @@ the closest match to our desired API shape: `Recipient` type for pub-sub patterns. 4. **No interval support**: No built-in periodic tick mechanism. The heartbeat/sync actors would still need manual timer loops. -5. **No `MaybeSend` pattern**: The `Send` bound is unconditional. Our +5. **No `Send` pattern**: The `Send` bound is unconditional. Our design needs conditional `Send` to avoid unnecessary synchronization on WASM. 6. **Low activity**: Last release Feb 2024, limited maintenance signal. @@ -163,7 +163,7 @@ directly to `futures::channel::mpsc`. The fork saves ~800 lines of actor machinery (mailbox, supervision, actor lifecycle) but introduces: - Ongoing merge burden with an actively evolving upstream -- The `Send` bound issue remains (kameo won't accept a `MaybeSend` +- The `Send` bound issue remains (kameo won't accept a `Send` change upstream — it's a fundamental API decision) - kameo's `remote` feature (libp2p-based distributed actors) would conflict with Willow's existing libp2p networking layer @@ -175,19 +175,80 @@ for the core (message, actor, handler, addr, context, mailbox, envelope, runtime, error modules). This is comparable to the fork effort when accounting for the abstraction layer + ongoing maintenance cost. +### Iroh as a foundation + +If Willow migrates from libp2p to [iroh](https://github.com/n0-computer/iroh) +(n0-computer's QUIC-based P2P connectivity library), this changes the +runtime story significantly: + +**What iroh is:** +- A peer-to-peer QUIC connectivity library: dial peers by Ed25519 public + key, get hole-punching + relay fallback transparently +- **Not** libp2p — a complete replacement for the transport layer +- Point-to-point connections (QUIC streams + datagrams), not message- + oriented pub/sub. `iroh-gossip` provides broadcast overlay separately. +- `ProtocolHandler` trait for multiplexing ALPN protocols over a single + `Endpoint` + +**Iroh's runtime model:** +- Built on **tokio** (mpsc, oneshot, CancellationToken, task spawning) +- **First-class WASM support**: `cfg(wasm_browser)` gates throughout, + CI validates WASM compilation, `spawn_local` on WASM via an internal + `Runtime` struct +- On WASM: only relay/WebSocket transport (no UDP/direct), all tasks + run via `spawn_local` +- **`Send` bounds are required** at compile time on both native and WASM + (iroh's `Runtime::spawn` takes `Future + Send`) + +**Impact on willow-actor:** + +1. **`Send` is unnecessary.** Iroh requires `Send` everywhere. + Since the actor system runs within iroh's tokio runtime, all futures + must be `Send` regardless of target. The `Send` pattern was + designed to allow `Rc>` on WASM, but iroh's `Send` + requirement already prevents that. **Use `Send` unconditionally** — + it compiles fine on WASM (single-threaded, everything is trivially + Send) and matches iroh's constraints. + +2. **Runtime module simplifies.** Instead of abstracting over + tokio vs `futures::channel` vs `wasm_bindgen_futures`, use tokio + channels on both targets (iroh already depends on tokio for WASM + via its internal runtime shim). The runtime module shrinks to just + `spawn()` (tokio::spawn on native, spawn_local on WASM) and + `sleep()` — channels are always `tokio::sync::mpsc`. + +3. **`CancellationToken` for shutdown.** Iroh uses + `tokio_util::sync::CancellationToken` pervasively. The actor system + should adopt the same pattern for graceful shutdown instead of custom + stop flags, so actor lifecycle integrates cleanly with iroh endpoint + shutdown. + +4. **`ProtocolHandler` as actor entry point.** Iroh's `Router` dispatches + incoming connections by ALPN to `ProtocolHandler::accept()` impls. + A network actor can implement `ProtocolHandler`, bridging iroh + connections into actor messages. + +5. **No built-in gossipsub.** Without libp2p's gossipsub, broadcast + patterns change. `iroh-gossip` provides a gossip overlay, or actors + can use point-to-point messaging with explicit fan-out. The actor + system doesn't need to handle this — it's a networking layer concern. + ### Recommendation: build `willow-actor` -No existing crate satisfies all requirements (dual-target with -conditional Send, supervision, intervals, stream handlers, per-message -handlers). The design below combines: +No existing crate satisfies all requirements (dual-target, supervision, +intervals, stream handlers, per-message handlers). The design below +combines: - **xtra/kameo's `Handler` pattern** — per-message-type trait impls with typed returns, not a single enum -- **ractor's `concurrency` module approach** — platform-abstracted - spawn/channel/timer with cfg-switched backends -- **New: `MaybeSend`** — conditional `Send` bounds dropped on WASM -- **New: supervision, intervals, `Recipient`** — features missing - from xtra +- **tokio channels directly** — no runtime abstraction needed since iroh + already provides tokio on both native and WASM +- **`Send` unconditionally** — matches iroh's requirement, compiles on + WASM (everything is trivially Send on single-threaded targets) +- **`CancellationToken` for lifecycle** — aligns with iroh's shutdown + pattern +- **Supervision, intervals, `Recipient`** — features missing from + xtra --- @@ -232,49 +293,43 @@ the existing architecture's strengths. ### Message Trait ```rust -/// Marker trait for actor messages. Must be Send on native. -/// On WASM, Send is not required since everything is single-threaded. -pub trait Message: 'static + MaybeSend { +/// Marker trait for actor messages. +pub trait Message: Send + 'static { /// The response type for request-reply. Use `()` for fire-and-forget. - type Result: 'static + MaybeSend; + type Result: Send + 'static; } ``` -`MaybeSend` is a conditional trait alias: - -```rust -#[cfg(not(target_arch = "wasm32"))] -pub trait MaybeSend: Send {} -#[cfg(not(target_arch = "wasm32"))] -impl MaybeSend for T {} - -#[cfg(target_arch = "wasm32")] -pub trait MaybeSend {} -#[cfg(target_arch = "wasm32")] -impl MaybeSend for T {} -``` +`Send` is required unconditionally. On WASM (single-threaded), all types +are trivially `Send`, so this compiles without issue. This matches iroh's +requirement that all futures and channel payloads are `Send`. ### Actor Trait ```rust /// An actor processes messages sequentially in its own task. -#[async_trait(?Send)] // ?Send for WASM compat -pub trait Actor: 'static + MaybeSend + Sized { +pub trait Actor: Send + 'static + Sized { /// Called once when the actor starts, before processing messages. - async fn started(&mut self, ctx: &mut Context) {} + fn started(&mut self, ctx: &mut Context) + -> impl Future + Send { async {} } /// Called when the actor is stopping (mailbox closed or explicit stop). - async fn stopped(&mut self) {} + fn stopped(&mut self) + -> impl Future + Send { async {} } } ``` +Uses RPITIT (return-position impl trait in trait, stabilized in Rust +1.75) instead of `async_trait` — avoids the proc macro dependency and +Box allocation per handler call. + ### Handler Trait ```rust /// Implement Handler for each message type an actor accepts. -#[async_trait(?Send)] pub trait Handler: Actor { - async fn handle(&mut self, msg: M, ctx: &mut Context) -> M::Result; + fn handle(&mut self, msg: M, ctx: &mut Context) + -> impl Future + Send; } ``` @@ -288,7 +343,7 @@ is type-checked at compile time. pub struct Context { addr: Addr, system: SystemHandle, - stop_flag: bool, + cancel: CancellationToken, } impl Context { @@ -301,6 +356,10 @@ impl Context { /// Request a graceful stop after the current message finishes. pub fn stop(&mut self) { ... } + /// Get the cancellation token (child of the system's root token). + /// Integrates with iroh's CancellationToken-based shutdown. + pub fn cancellation_token(&self) -> &CancellationToken { ... } + /// Access the actor system (for spawning unrelated actors). pub fn system(&self) -> &SystemHandle { ... } } @@ -413,42 +472,27 @@ impl System { ## Platform Abstraction -The crate uses a thin `runtime` module to abstract over native vs WASM: +Since iroh already depends on tokio for both native and WASM, the +runtime module is minimal — only task spawning differs by platform: ```rust // crate::runtime (internal) /// Spawn a future as a background task. -pub fn spawn + MaybeSend + 'static>(fut: F) { +pub fn spawn + Send + 'static>(fut: F) { #[cfg(not(target_arch = "wasm32"))] - tokio::task::spawn(fut); + { tokio::task::spawn(fut); } #[cfg(target_arch = "wasm32")] wasm_bindgen_futures::spawn_local(fut); } - -/// One-shot channel (platform-specific). -pub fn oneshot() -> (OneshotTx, OneshotRx) { - #[cfg(not(target_arch = "wasm32"))] - { /* tokio::sync::oneshot */ } - - #[cfg(target_arch = "wasm32")] - { /* futures::channel::oneshot */ } -} - -/// Bounded MPSC channel. -pub fn channel(cap: usize) -> (Sender, Receiver) { - #[cfg(not(target_arch = "wasm32"))] - { /* tokio::sync::mpsc */ } - - #[cfg(target_arch = "wasm32")] - { /* futures::channel::mpsc */ } -} - -/// Sleep for a duration (native: tokio::time::sleep, WASM: gloo_timers). -pub async fn sleep(duration: Duration) { ... } ``` +Channels use `tokio::sync::mpsc` and `tokio::sync::oneshot` on both +targets — iroh's WASM shim makes these available. Timers use +`tokio::time::sleep` on native and `gloo_timers` (or iroh's internal +timer abstraction) on WASM. + ## Mailbox Internals Each actor gets a mailbox backed by a bounded MPSC channel. Messages are @@ -457,7 +501,7 @@ type-erased inside the mailbox using a closure-based envelope pattern: ```rust // Internal — not part of the public API. -type BoxEnvelope = Box) -> BoxFuture<'_, ()> + MaybeSend>; +type BoxEnvelope = Box) -> BoxFuture<'_, ()> + Send + 'static>; // When Addr.send(msg) is called for M where A: Handler: // 1. msg is wrapped in an envelope closure @@ -493,9 +537,9 @@ impl Context { } ``` -On WASM, `RestartPolicy::OnFailure` and `Backoff` still work but panics -are caught via `std::panic::catch_unwind` only if the actor is -`UnwindSafe`. Otherwise, `Never` is the only safe option on WASM. +Panics are caught via `std::panic::catch_unwind`. On WASM, this works +only if the actor is `UnwindSafe`; otherwise `Never` is the only safe +option. ## Streams @@ -504,7 +548,7 @@ timers) that feed into their mailbox: ```rust #[async_trait(?Send)] -pub trait StreamHandler: Actor { +pub trait StreamHandler: Actor { async fn handle_stream_item(&mut self, item: S, ctx: &mut Context); /// Called when the stream ends. @@ -516,8 +560,8 @@ impl Context { pub fn add_stream(&mut self, stream: St) where A: StreamHandler, - S: 'static + MaybeSend, - St: Stream + MaybeSend + 'static, + S: 'static + Send, + St: Stream + Send + 'static, { ... } } ``` @@ -534,7 +578,7 @@ impl Context { pub fn run_interval>( &mut self, duration: Duration, - msg_factory: impl Fn() -> M + MaybeSend + 'static, + msg_factory: impl Fn() -> M + Send + 'static, ) -> IntervalHandle where A: Handler, @@ -574,15 +618,14 @@ crates/actor/ ├── Cargo.toml └── src/ ├── lib.rs — public API re-exports - ├── actor.rs — Actor, Handler, StreamHandler traits + ├── actor.rs — Actor, Handler, StreamHandler, Message traits ├── addr.rs — Addr, AnyAddr, Recipient ├── context.rs — Context, interval, stream attachment ├── envelope.rs — BoxEnvelope, type-erased message dispatch - ├── mailbox.rs — bounded channel wrapper, recv loop - ├── message.rs — Message trait, MaybeSend - ├── runtime.rs — platform abstraction (spawn, channel, sleep) + ├── mailbox.rs — tokio mpsc wrapper, recv loop + ├── runtime.rs — spawn abstraction (tokio::spawn vs spawn_local) ├── supervisor.rs — RestartPolicy, supervised spawn - ├── system.rs — System, SystemHandle + ├── system.rs — System, SystemHandle (CancellationToken) └── error.rs — SendError, AskError ``` @@ -590,17 +633,17 @@ crates/actor/ ``` willow-actor (new) +├── tokio (sync: mpsc, oneshot; time: sleep, interval) +├── tokio-util (CancellationToken) ├── futures-core (Stream trait) -├── async-trait ├── thiserror ├── tracing -├── cfg-if -├── [native] tokio (spawn, mpsc, oneshot, sleep) -└── [wasm] wasm-bindgen-futures, futures-channel, gloo-timers +└── [wasm] wasm-bindgen-futures (spawn_local) ``` -`willow-actor` has **no dependency on any other willow crate**. It is a -pure infrastructure crate. +No `async-trait` needed — uses RPITIT (Rust 1.75+). `willow-actor` has +**no dependency on any other willow crate**. It is a pure infrastructure +crate. It shares `tokio` with iroh — no additional runtime overhead. ## Migration Path @@ -687,20 +730,23 @@ a `StreamHandler` on a UI actor. Signal updates happen in the handler. ## Open Questions 1. **Backpressure policy**: When a mailbox is full, should `send()` drop - the message (lossy), block (native only), or return an error? Current - design returns `SendError::Full`. An `async fn send_async()` that - awaits capacity could be added for native. + the message (lossy) or return an error? Current design returns + `SendError::Full`. An `async fn send_async()` that awaits capacity + is straightforward since we're always on tokio. 2. **Priority messages**: Should shutdown/stop bypass the queue? Current design: no, messages are FIFO. Shutdown is just another message. The - `Context::stop()` flag is checked between messages. + `Context::stop()` flag is checked between messages. The + `CancellationToken` provides an independent shutdown signal that + doesn't go through the mailbox. -3. **Actor state snapshots**: Should there be a way to query an actor's - internal state for debugging/metrics? Could add an optional - `Inspect` trait that serializes state, but this risks breaking - encapsulation. - -4. **Bounded vs unbounded mailboxes**: The current network layers use +3. **Bounded vs unbounded mailboxes**: The current network layers use unbounded channels to avoid dropping gossipsub messages. Should `System::spawn_unbounded()` be offered? Probably yes, with a lint warning in docs. + +4. **`Rc>` migration**: Willow's client library currently uses + `Rc>` on WASM paths. With `Send` required + unconditionally, these must become `Arc>`. On WASM the + overhead is negligible (no actual locking), but this is a codebase- + wide change that should happen before or alongside actor adoption. From 4a6d5fe172810ae2ecd17420e671d0d4b0d4c215 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 29 Mar 2026 16:22:22 +0000 Subject: [PATCH 07/26] Mark actor system spec as blocked on iroh integration Implementation deferred until iroh is fully integrated. Updated migration path to reference iroh prerequisites and adjusted phase descriptions to reflect iroh's Endpoint/Router/ProtocolHandler. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- .../specs/2026-03-29-actor-system-design.md | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/docs/superpowers/specs/2026-03-29-actor-system-design.md b/docs/superpowers/specs/2026-03-29-actor-system-design.md index dc3d498c..de228721 100644 --- a/docs/superpowers/specs/2026-03-29-actor-system-design.md +++ b/docs/superpowers/specs/2026-03-29-actor-system-design.md @@ -2,6 +2,7 @@ **Date**: 2026-03-29 **Status**: Draft +**Depends on**: iroh integration (implementation blocked until complete) ## Existing Solutions @@ -647,10 +648,18 @@ crate. It shares `tokio` with iroh — no additional runtime overhead. ## Migration Path -### Phase 1: New crate, worker migration +**Prerequisite**: iroh integration must be complete before implementation +begins. The actor system depends on iroh's tokio runtime being available +on both native and WASM targets. Once iroh is integrated, the networking +layer will already use iroh's `Endpoint`, `Router`, and `ProtocolHandler` +— the actor system builds on that foundation. + +### Phase 1: Core crate + worker migration Create `crates/actor/` with the core types. Migrate the worker crate's -four actors to use `willow-actor`: +four hand-rolled actor loops to use `willow-actor`. This is the smallest +useful scope — workers are native-only, so WASM correctness isn't tested +yet but the API is designed for it. **Before** (current `crates/worker/src/actors/state.rs`): ```rust @@ -714,18 +723,19 @@ system.shutdown().await; ### Phase 2: Client library -Replace `ClientHandle`'s `futures::channel::mpsc` pair with actor addresses. -The `ClientEventLoop` becomes an actor with `StreamHandler`. +Replace `ClientHandle`'s channel pair with actor addresses. The +`ClientEventLoop` becomes an actor with `StreamHandler` for iroh +network events. `Rc>` becomes `Arc>`. ### Phase 3: Network bridge The Bevy bridge becomes a thin adapter: a Bevy system polls a `Receiver` -that an actor feeds. The bridge actor replaces `run_network()`. +that an actor feeds. The bridge actor wraps the iroh `Endpoint`. ### Phase 4: Web UI -The Leptos event loop (`spawn_local` + `futures::channel::mpsc`) becomes -a `StreamHandler` on a UI actor. Signal updates happen in the handler. +The Leptos event loop becomes a `StreamHandler` on a UI actor. Signal +updates happen in the handler. Validates WASM target correctness. ## Open Questions From 8fb357af913d15407649e9500efce555924a2180 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 16:40:34 +0000 Subject: [PATCH 08/26] Update actor system spec for completed iroh migration - Status: Draft -> Ready for implementation - Overview: reflects current trait-based networking (Network, TopicHandle, TopicEvents) instead of old libp2p patterns - Iroh section: updated from hypothetical to concrete, references actual traits and patterns now in the codebase - Migration path: updated Before examples to match current worker code (generic over N: Network, TopicHandle for broadcast, watch for shutdown) - Open questions: Rc> migration already done by iroh work https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/specs/2026-03-29-actor-system-design.md | 221 +++++++------------ 1 file changed, 82 insertions(+), 139 deletions(-) diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index de228721..9b6bbca6 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -1,8 +1,8 @@ # Actor System Design Spec **Date**: 2026-03-29 -**Status**: Draft -**Depends on**: iroh integration (implementation blocked until complete) +**Status**: Ready for implementation +**Depends on**: ~~iroh integration~~ (complete — merged to main) ## Existing Solutions @@ -176,63 +176,38 @@ for the core (message, actor, handler, addr, context, mailbox, envelope, runtime, error modules). This is comparable to the fork effort when accounting for the abstraction layer + ongoing maintenance cost. -### Iroh as a foundation - -If Willow migrates from libp2p to [iroh](https://github.com/n0-computer/iroh) -(n0-computer's QUIC-based P2P connectivity library), this changes the -runtime story significantly: - -**What iroh is:** -- A peer-to-peer QUIC connectivity library: dial peers by Ed25519 public - key, get hole-punching + relay fallback transparently -- **Not** libp2p — a complete replacement for the transport layer -- Point-to-point connections (QUIC streams + datagrams), not message- - oriented pub/sub. `iroh-gossip` provides broadcast overlay separately. -- `ProtocolHandler` trait for multiplexing ALPN protocols over a single - `Endpoint` - -**Iroh's runtime model:** -- Built on **tokio** (mpsc, oneshot, CancellationToken, task spawning) -- **First-class WASM support**: `cfg(wasm_browser)` gates throughout, - CI validates WASM compilation, `spawn_local` on WASM via an internal - `Runtime` struct -- On WASM: only relay/WebSocket transport (no UDP/direct), all tasks - run via `spawn_local` -- **`Send` bounds are required** at compile time on both native and WASM - (iroh's `Runtime::spawn` takes `Future + Send`) - -**Impact on willow-actor:** - -1. **`Send` is unnecessary.** Iroh requires `Send` everywhere. - Since the actor system runs within iroh's tokio runtime, all futures - must be `Send` regardless of target. The `Send` pattern was - designed to allow `Rc>` on WASM, but iroh's `Send` - requirement already prevents that. **Use `Send` unconditionally** — - it compiles fine on WASM (single-threaded, everything is trivially - Send) and matches iroh's constraints. - -2. **Runtime module simplifies.** Instead of abstracting over - tokio vs `futures::channel` vs `wasm_bindgen_futures`, use tokio - channels on both targets (iroh already depends on tokio for WASM - via its internal runtime shim). The runtime module shrinks to just - `spawn()` (tokio::spawn on native, spawn_local on WASM) and - `sleep()` — channels are always `tokio::sync::mpsc`. - -3. **`CancellationToken` for shutdown.** Iroh uses - `tokio_util::sync::CancellationToken` pervasively. The actor system - should adopt the same pattern for graceful shutdown instead of custom - stop flags, so actor lifecycle integrates cleanly with iroh endpoint - shutdown. - -4. **`ProtocolHandler` as actor entry point.** Iroh's `Router` dispatches - incoming connections by ALPN to `ProtocolHandler::accept()` impls. - A network actor can implement `ProtocolHandler`, bridging iroh - connections into actor messages. - -5. **No built-in gossipsub.** Without libp2p's gossipsub, broadcast - patterns change. `iroh-gossip` provides a gossip overlay, or actors - can use point-to-point messaging with explicit fan-out. The actor - system doesn't need to handle this — it's a networking layer concern. +### Iroh integration (complete) + +Willow has migrated from libp2p to iroh. The networking layer now uses +trait-based abstractions (`Network`, `TopicHandle`, `TopicEvents`, +`BlobStore`) backed by iroh's QUIC transport and `iroh-gossip` for +broadcast. See `docs/specs/2026-03-29-iroh-migration-design.md`. + +**Key facts for the actor system:** + +1. **Tokio everywhere.** Iroh depends on tokio for both native and WASM. + The client already uses `Arc>` (not `Rc>`). All + futures must be `Send`. Channels are `tokio::sync::mpsc` throughout. + +2. **`Network` trait is generic.** Workers and client are generic over + `N: Network`, with `IrohNetwork` for production and `MemNetwork` for + tests. Actor types should also be generic over `Network` where they + interact with gossip, following the same pattern. + +3. **`TopicEvents` is a stream.** The `TopicEvents` trait has + `async fn next() -> Option>` — a natural fit for + `StreamHandler`. The network actor currently wraps this in a manual + `while let` loop; the actor system replaces that. + +4. **`TopicHandle` for broadcast.** The heartbeat and sync actors + currently take `T: TopicHandle` and call `topic.broadcast()`. With + the actor system, they hold the `TopicHandle` as actor state and + call it from message handlers. + +5. **Shutdown via `watch` channel.** Workers currently use + `tokio::sync::watch` for shutdown signaling. The actor system + replaces this with `CancellationToken` (used by iroh internally) + or simply dropping the `Addr` (closing the mailbox). ### Recommendation: build `willow-actor` @@ -255,24 +230,25 @@ combines: ## Overview -Willow has five different channel/concurrency patterns across its crates: +With the iroh migration complete, Willow's networking is now trait-based +(`Network`, `TopicHandle`, `TopicEvents`) and generic — but the +concurrency patterns above the network layer are still hand-rolled: -| Layer | Channels | Target | -|-------|----------|--------| -| Bevy bridge | `std::sync::mpsc` | native | -| libp2p node | `tokio::sync::mpsc` (native) / `futures::channel::mpsc` (WASM) | both | -| Client lib | `futures::channel::mpsc` | both | -| Worker actors | `tokio::sync::mpsc` + `oneshot` + `watch` | native only | -| Web UI | `futures::channel::mpsc` + `spawn_local` | WASM only | +| Layer | Current pattern | Problem | +|-------|----------------|---------| +| Worker actors | `tokio::sync::mpsc` + `oneshot` + `watch`, 4 manual loops | Not reusable, manual shutdown via watch channel | +| Client lib | `Arc>` + `futures::channel::mpsc` | Event loop is a monolithic match block | +| Bevy bridge | `std::sync::mpsc` polling | Tightly coupled to network internals | +| Web UI | `futures::channel::mpsc` + `spawn_local` | Duplicates client event loop logic | The worker crate already uses an actor pattern (state, network, heartbeat, -sync actors communicating via channels), but it's hand-rolled, tokio-only, -and not reusable. Every other crate reinvents the same pattern: spawn a +sync actors communicating via channels), but it's hand-rolled and not +reusable. The client and web crates reinvent the same pattern: spawn a task, create channels, loop on `select!`, handle shutdown. -`willow-actor` formalizes this into a single crate that works on both -native and WASM, eliminating the per-crate boilerplate while preserving -the existing architecture's strengths. +`willow-actor` formalizes this into a single crate, building on iroh's +tokio runtime (available on both native and WASM) to eliminate the +per-crate boilerplate. ## Goals @@ -648,89 +624,59 @@ crate. It shares `tokio` with iroh — no additional runtime overhead. ## Migration Path -**Prerequisite**: iroh integration must be complete before implementation -begins. The actor system depends on iroh's tokio runtime being available -on both native and WASM targets. Once iroh is integrated, the networking -layer will already use iroh's `Endpoint`, `Router`, and `ProtocolHandler` -— the actor system builds on that foundation. - ### Phase 1: Core crate + worker migration Create `crates/actor/` with the core types. Migrate the worker crate's four hand-rolled actor loops to use `willow-actor`. This is the smallest -useful scope — workers are native-only, so WASM correctness isn't tested -yet but the API is designed for it. +useful scope and the cleanest test case — the workers already have +well-defined actor boundaries. -**Before** (current `crates/worker/src/actors/state.rs`): +**Before** (current `crates/worker/src/runtime.rs`): ```rust -pub async fn run(mut role: Box, mut rx: mpsc::Receiver) { - while let Some(msg) = rx.recv().await { - match msg { - StateMsg::Event(event) => role.on_event(&event), - StateMsg::Request { req, reply } => { - let response = role.handle_request(req); - let _ = reply.send(response); - } - StateMsg::Shutdown => break, - } - } +pub async fn run(role: Box, config: WorkerConfig, network: N) { + let (state_tx, state_rx) = mpsc::channel::(256); + let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false); + let state_handle = tokio::spawn(state::run(role, state_rx)); + let network_handle = tokio::spawn(network::run(workers_events, state_tx.clone(), peer_id)); + let heartbeat_handle = tokio::spawn(heartbeat::run(peer_id, ..., workers_sender.clone(), shutdown_rx.clone())); + let sync_handle = tokio::spawn(sync::run(peer_id, ..., workers_sender, shutdown_rx)); + tokio::signal::ctrl_c().await?; + let _ = shutdown_tx.send(true); + let _ = state_tx.send(StateMsg::Shutdown).await; + let _ = tokio::join!(state_handle, network_handle, heartbeat_handle, sync_handle); } ``` **After**: ```rust -pub struct StateActor { - role: Box, -} - -impl Actor for StateActor {} - -impl Handler for StateActor { - async fn handle(&mut self, msg: EventMsg, _ctx: &mut Context) { - self.role.on_event(&msg.0); - } -} - -impl Handler for StateActor { - async fn handle(&mut self, msg: RequestMsg, _ctx: &mut Context) -> WorkerResponse { - self.role.handle_request(msg.0) - } +pub async fn run(role: Box, config: WorkerConfig, network: N) { + let system = System::new(); + let state_addr = system.spawn(StateActor { role }); + let _network = system.spawn(NetworkActor::new(workers_events, state_addr.clone(), peer_id)); + let _heartbeat = system.spawn(HeartbeatActor::new(peer_id, state_addr.clone(), workers_sender.clone())); + let _sync = system.spawn(SyncActor::new(peer_id, state_addr, workers_sender)); + tokio::signal::ctrl_c().await?; + system.shutdown().await; } ``` -**Before** (current `crates/worker/src/runtime.rs`): -```rust -let (state_tx, state_rx) = mpsc::channel::(256); -let (network_tx, network_rx) = mpsc::channel::(256); -let (shutdown_tx, shutdown_rx) = watch::channel(false); - -let state_handle = tokio::spawn(state::run(role, state_rx)); -let heartbeat_handle = tokio::spawn(heartbeat::run(..., shutdown_rx.clone())); -// ... manual join + shutdown -``` - -**After**: -```rust -let system = System::new(); -let state_addr = system.spawn(StateActor { role }); -let network_addr = system.spawn(NetworkActor::new(node, events, state_addr.clone())); -let _heartbeat = system.spawn(HeartbeatActor::new(peer_id, state_addr.clone(), network_addr.clone())); -let _sync = system.spawn(SyncActor::new(peer_id, state_addr, network_addr)); - -tokio::signal::ctrl_c().await?; -system.shutdown().await; -``` +The `NetworkActor` uses `StreamHandler` to receive from +`TopicEvents`. The `HeartbeatActor` uses `ctx.run_interval()` instead +of a manual `tokio::select! + sleep` loop. Shutdown propagates via +`CancellationToken` — no more `watch` channel. ### Phase 2: Client library -Replace `ClientHandle`'s channel pair with actor addresses. The -`ClientEventLoop` becomes an actor with `StreamHandler` for iroh -network events. `Rc>` becomes `Arc>`. +Replace `ClientHandle`'s `futures::channel::mpsc` event dispatching +with actor addresses. The client event loop becomes an actor with +`StreamHandler` for `TopicEvents`. The client already uses +`Arc>` — no `Rc>` migration needed (iroh already +required this). ### Phase 3: Network bridge The Bevy bridge becomes a thin adapter: a Bevy system polls a `Receiver` -that an actor feeds. The bridge actor wraps the iroh `Endpoint`. +that an actor feeds. The bridge actor wraps iroh network interactions. ### Phase 4: Web UI @@ -755,8 +701,5 @@ updates happen in the handler. Validates WASM target correctness. `System::spawn_unbounded()` be offered? Probably yes, with a lint warning in docs. -4. **`Rc>` migration**: Willow's client library currently uses - `Rc>` on WASM paths. With `Send` required - unconditionally, these must become `Arc>`. On WASM the - overhead is negligible (no actual locking), but this is a codebase- - wide change that should happen before or alongside actor adoption. +4. **`Rc>` migration**: ~~Already done~~ — the iroh migration + switched the client to `Arc>`. No further changes needed. From 3ec3cc94041c84117ae6a4b86c943aba0250a7b6 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 16:58:49 +0000 Subject: [PATCH 09/26] Apply review feedback to actor system spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove stale libp2p references, update overview table to current state - Goal: no locks — shared state lives in actors, eliminating Arc> - Remove Bevy from scope (non-goal, not part of migration) - Unbounded mailboxes by default, remove SendError::Full - FIFO shutdown is fine, CancellationToken for out-of-band - Replace open questions with resolved decisions - Fix StreamHandler to use RPITIT instead of async_trait - Remove Phase 3 (Bevy bridge), renumber Web UI to Phase 3 - All crates migrate as part of the plan (not incremental) https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/specs/2026-03-29-actor-system-design.md | 95 ++++++++------------ 1 file changed, 35 insertions(+), 60 deletions(-) diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index 9b6bbca6..ba302bbb 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -84,24 +84,14 @@ the closest match to our desired API shape: **Why not adopt xtra directly:** -1. **Hard `Send` bound on `Actor`**: The `Actor` trait requires - `Send + 'static`. On WASM, all types are trivially `Send` (single - thread), but this forces `Send` constraints to propagate through the - entire Willow type graph. Many Willow types use `Rc>` in - WASM paths (e.g., `ClientHandle.shared`), which are not `Send`. - Switching to `Arc>` everywhere adds overhead on WASM for no - benefit. -2. **No supervision**: No restart policies or supervisor trees. Actors +1. **No supervision**: No restart policies or supervisor trees. Actors that panic are simply gone. -3. **No `Recipient` / type-erased message targets**: xtra has +2. **No `Recipient` / type-erased message targets**: xtra has `MessageChannel` but it's less ergonomic than a standalone `Recipient` type for pub-sub patterns. -4. **No interval support**: No built-in periodic tick mechanism. The +3. **No interval support**: No built-in periodic tick mechanism. The heartbeat/sync actors would still need manual timer loops. -5. **No `Send` pattern**: The `Send` bound is unconditional. Our - design needs conditional `Send` to avoid unnecessary synchronization - on WASM. -6. **Low activity**: Last release Feb 2024, limited maintenance signal. +4. **Low activity**: Last release Feb 2024, limited maintenance signal. ### kameo — best API shape, no WASM @@ -167,7 +157,7 @@ actor lifecycle) but introduces: - The `Send` bound issue remains (kameo won't accept a `Send` change upstream — it's a fundamental API decision) - kameo's `remote` feature (libp2p-based distributed actors) would - conflict with Willow's existing libp2p networking layer + conflict with Willow's iroh networking layer - kameo's dependency on `downcast-rs`, `dyn-clone`, `serde` (with derive) adds weight Willow doesn't need @@ -237,8 +227,7 @@ concurrency patterns above the network layer are still hand-rolled: | Layer | Current pattern | Problem | |-------|----------------|---------| | Worker actors | `tokio::sync::mpsc` + `oneshot` + `watch`, 4 manual loops | Not reusable, manual shutdown via watch channel | -| Client lib | `Arc>` + `futures::channel::mpsc` | Event loop is a monolithic match block | -| Bevy bridge | `std::sync::mpsc` polling | Tightly coupled to network internals | +| Client lib | `Arc>` + `futures::channel::mpsc` | Shared mutable state behind locks, monolithic event loop | | Web UI | `futures::channel::mpsc` + `spawn_local` | Duplicates client event loop logic | The worker crate already uses an actor pattern (state, network, heartbeat, @@ -256,14 +245,14 @@ per-crate boilerplate. 2. **Typed mailboxes**: each actor defines its message type, no `Box` 3. **Request-reply**: first-class `ask()` with typed responses, no manual oneshot wiring 4. **Supervision**: restart policies for crashed actors (native), error propagation (WASM) -5. **Lightweight**: no `Arc>` in the hot path, no dynamic dispatch on send -6. **Incremental adoption**: existing crates can migrate one actor at a time +5. **No locks**: shared state lives inside actors, eliminating `Arc>` / `Arc>` — access is serialized through message passing +6. **Lightweight**: no dynamic dispatch on send ## Non-Goals -- Distributed actors / remote messaging (libp2p handles that) +- Distributed actors / remote messaging (iroh gossip handles that) - Actor persistence / event sourcing (willow-state handles that) -- Replacing Bevy's ECS (the bridge stays, but becomes thinner) +- Bevy desktop app (out of scope for this migration) ## Core Types @@ -436,9 +425,6 @@ impl System { /// Spawn a top-level actor and return its address. pub fn spawn(&self, actor: A) -> Addr { ... } - /// Spawn with a specific mailbox capacity (default: 256). - pub fn spawn_with_capacity(&self, actor: A, capacity: usize) -> Addr { ... } - /// Get a handle that can be passed to other contexts. pub fn handle(&self) -> SystemHandle { ... } @@ -472,7 +458,7 @@ timer abstraction) on WASM. ## Mailbox Internals -Each actor gets a mailbox backed by a bounded MPSC channel. Messages are +Each actor gets a mailbox backed by an unbounded MPSC channel. Messages are type-erased inside the mailbox using a closure-based envelope pattern: ```rust @@ -524,12 +510,13 @@ Actors can subscribe to external event streams (e.g., network events, timers) that feed into their mailbox: ```rust -#[async_trait(?Send)] -pub trait StreamHandler: Actor { - async fn handle_stream_item(&mut self, item: S, ctx: &mut Context); +pub trait StreamHandler: Actor { + fn handle_stream_item(&mut self, item: S, ctx: &mut Context) + -> impl Future + Send; /// Called when the stream ends. - async fn stream_finished(&mut self, _ctx: &mut Context) {} + fn stream_finished(&mut self, _ctx: &mut Context) + -> impl Future + Send { async {} } } impl Context { @@ -575,8 +562,6 @@ impl IntervalHandle { pub enum SendError { #[error("actor mailbox is closed")] Closed(M), - #[error("actor mailbox is full")] - Full(M), } #[derive(Debug, thiserror::Error)] @@ -667,39 +652,29 @@ of a manual `tokio::select! + sleep` loop. Shutdown propagates via ### Phase 2: Client library -Replace `ClientHandle`'s `futures::channel::mpsc` event dispatching -with actor addresses. The client event loop becomes an actor with -`StreamHandler` for `TopicEvents`. The client already uses -`Arc>` — no `Rc>` migration needed (iroh already -required this). - -### Phase 3: Network bridge +Replace `ClientHandle`'s `Arc>` with actors. +Shared state moves into a state actor — no more locks. The client event +loop becomes an actor with `StreamHandler` for `TopicEvents`. External +callers use `Addr` to send commands and `ask()` to query state. -The Bevy bridge becomes a thin adapter: a Bevy system polls a `Receiver` -that an actor feeds. The bridge actor wraps iroh network interactions. - -### Phase 4: Web UI +### Phase 3: Web UI The Leptos event loop becomes a `StreamHandler` on a UI actor. Signal -updates happen in the handler. Validates WASM target correctness. - -## Open Questions +updates happen in the handler. Validates WASM target correctness and +completes the migration across all active crates. -1. **Backpressure policy**: When a mailbox is full, should `send()` drop - the message (lossy) or return an error? Current design returns - `SendError::Full`. An `async fn send_async()` that awaits capacity - is straightforward since we're always on tokio. +## Decisions -2. **Priority messages**: Should shutdown/stop bypass the queue? Current - design: no, messages are FIFO. Shutdown is just another message. The - `Context::stop()` flag is checked between messages. The - `CancellationToken` provides an independent shutdown signal that - doesn't go through the mailbox. +1. **Mailboxes are unbounded.** `send()` returns `Err` only if the + actor is dead (mailbox closed). Bounded mailboxes can be added later + if backpressure becomes necessary. -3. **Bounded vs unbounded mailboxes**: The current network layers use - unbounded channels to avoid dropping gossipsub messages. Should - `System::spawn_unbounded()` be offered? Probably yes, with a lint - warning in docs. +2. **FIFO, no priority messages.** Shutdown is just another message. + `CancellationToken` provides an independent out-of-band shutdown + signal that doesn't go through the mailbox. -4. **`Rc>` migration**: ~~Already done~~ — the iroh migration - switched the client to `Arc>`. No further changes needed. +3. **Shared state lives in actors.** `Arc>` in the + client library is replaced by a state actor. External code queries + state via `ask()`. This eliminates all locks from the hot path — + the actor processes messages sequentially, so no synchronization is + needed inside the actor. From d8df2cbe916969db13af4c7c0af2219ac9802528 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:03:20 +0000 Subject: [PATCH 10/26] Review pass: fix incorrect tokio-on-WASM assumption Tokio is native-only (cfg(not(wasm32))). On WASM the codebase uses futures::channel::mpsc, wasm-bindgen-futures, and gloo-timers. Restored the full runtime abstraction module with cfg-switched backends. Removed CancellationToken (tokio-util, native-only) from Context and shutdown. Also cleaned up stale references in ractor/kameo/xtra sections. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/specs/2026-03-29-actor-system-design.md | 180 +++++++++---------- 1 file changed, 87 insertions(+), 93 deletions(-) diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index ba302bbb..b26c8a32 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -58,9 +58,9 @@ with 84 passing browser tests. Key details: `Send + Sync`. This is stricter than necessary — actors are single-owner by design, so `Sync` is never needed. 4. **`tokio_with_wasm` dependency**: Pulls in a full tokio-compatible - shim for WASM. Willow already uses `futures::channel::mpsc` and - `wasm_bindgen_futures::spawn_local` directly — adding another layer - of abstraction over tokio's API on WASM is unnecessary indirection. + shim for WASM. Willow uses iroh (which handles WASM internally) + + `wasm_bindgen_futures::spawn_local` — adding another abstraction + layer over tokio's API on WASM is unnecessary indirection. 5. **Heavy dependency tree**: `dashmap`, `bon`, `strum`, `once_cell`, plus the full `tokio_with_wasm` crate on WASM. Willow's actor system needs only channels, oneshot, and spawn. @@ -113,58 +113,23 @@ flags for alternative runtimes. ### kameo fork feasibility A source audit of kameo's tokio coupling reveals it is **shallow and -concentrated**. All tokio usage falls into 6 primitives across 4 files: - -| Primitive | Call sites | WASM replacement | -|-----------|-----------|------------------| -| `tokio::spawn` | 5 | `wasm_bindgen_futures::spawn_local` | -| `tokio::sync::mpsc` (bounded+unbounded) | 1 module (~15 method delegations) | `futures::channel::mpsc` | -| `tokio::sync::Mutex` | 1 | `futures::lock::Mutex` | -| `tokio::sync::SetOnce` | 2 | `OnceCell` or custom | -| `tokio::select!` | 1 | `futures::select!` | -| `tokio::runtime::Handle` | 1 (for `spawn_in_thread`) | `#[cfg(not(wasm32))]` gate | -| `task_local!` | 1 | `thread_local!` (WASM is single-threaded) | - -**Total estimated changes**: ~150-200 lines to introduce a `runtime` -abstraction module with `cfg(target_arch = "wasm32")` branches, plus -Cargo.toml feature flag changes. The supervision module has **zero** -production tokio usage. The mailbox module is the densest — it wraps -tokio mpsc types — but it's a clean 1:1 delegation layer that maps -directly to `futures::channel::mpsc`. - -**Challenges:** - -1. **`Send` bounds everywhere**: kameo requires `Actor: Send + 'static` - and all futures must be `Send`. On WASM this compiles (everything is - trivially Send on single-threaded targets) but it forces Willow types - that currently use `Rc>` to switch to `Arc>`. This - is a Willow-side change, not a kameo fork issue. -2. **`spawn_in_thread()`**: Uses `tokio::runtime::Handle::current()` and - `std::thread::spawn`. Must be `cfg`-gated out on WASM entirely. -3. **`blocking_send()` / `blocking_recv()`**: These tokio mpsc methods - have no WASM equivalent. Must be gated or removed on WASM. -4. **Minimum Rust 1.88.0**: kameo requires edition 2024 / Rust 1.88+. - Willow would need to match this MSRV. -5. **Upstream maintenance**: kameo is actively developed (v0.19.2, last - commit March 2026). Forking means maintaining divergence or getting - the runtime abstraction upstreamed. +concentrated** — 6 primitives across 4 core files. The supervision +module has zero production tokio usage. With iroh now providing tokio +on WASM, the fork effort shrinks further (no need to swap channel +implementations — tokio mpsc works on both targets). Main remaining +work: `cfg`-gate `spawn_in_thread()` and `blocking_send/recv` on WASM, +replace `tokio::spawn` with `spawn_local` on WASM (~150 lines). **Verdict: fork is feasible but not clearly better than writing our own.** -The fork saves ~800 lines of actor machinery (mailbox, supervision, -actor lifecycle) but introduces: -- Ongoing merge burden with an actively evolving upstream -- The `Send` bound issue remains (kameo won't accept a `Send` - change upstream — it's a fundamental API decision) -- kameo's `remote` feature (libp2p-based distributed actors) would - conflict with Willow's iroh networking layer -- kameo's dependency on `downcast-rs`, `dyn-clone`, `serde` (with - derive) adds weight Willow doesn't need - -Writing `willow-actor` from scratch is estimated at ~1000-1500 lines -for the core (message, actor, handler, addr, context, mailbox, envelope, -runtime, error modules). This is comparable to the fork effort when -accounting for the abstraction layer + ongoing maintenance cost. +The fork saves ~800 lines of actor machinery but introduces: +- Ongoing merge burden with an actively evolving upstream (v0.19.2) +- kameo's `remote` feature (libp2p-based) conflicts with iroh +- Extra deps: `downcast-rs`, `dyn-clone`, `serde` derive +- MSRV 1.88.0 (edition 2024) + +Writing `willow-actor` from scratch: ~1000-1500 lines for the core. +Comparable effort, no maintenance burden. ### Iroh integration (complete) @@ -175,29 +140,36 @@ broadcast. See `docs/specs/2026-03-29-iroh-migration-design.md`. **Key facts for the actor system:** -1. **Tokio everywhere.** Iroh depends on tokio for both native and WASM. - The client already uses `Arc>` (not `Rc>`). All - futures must be `Send`. Channels are `tokio::sync::mpsc` throughout. +1. **Split runtime.** Tokio is native-only (`cfg(not(wasm32))`). On + WASM, the codebase uses `wasm-bindgen-futures::spawn_local`, + `futures::channel::mpsc`, and `gloo-timers`. The actor system needs + a thin runtime abstraction for spawn, channels, and timers — just + like the original design proposed. + +2. **`Send` is still required.** The `Network` trait and its associated + types require `Send + Sync`. The client uses `Arc>`. All + types in the shared path must be `Send`. On WASM, everything is + trivially `Send` (single-threaded), so this compiles without issue. -2. **`Network` trait is generic.** Workers and client are generic over +3. **`Network` trait is generic.** Workers and client are generic over `N: Network`, with `IrohNetwork` for production and `MemNetwork` for tests. Actor types should also be generic over `Network` where they interact with gossip, following the same pattern. -3. **`TopicEvents` is a stream.** The `TopicEvents` trait has +4. **`TopicEvents` is a stream.** The `TopicEvents` trait has `async fn next() -> Option>` — a natural fit for `StreamHandler`. The network actor currently wraps this in a manual `while let` loop; the actor system replaces that. -4. **`TopicHandle` for broadcast.** The heartbeat and sync actors +5. **`TopicHandle` for broadcast.** The heartbeat and sync actors currently take `T: TopicHandle` and call `topic.broadcast()`. With the actor system, they hold the `TopicHandle` as actor state and call it from message handlers. -5. **Shutdown via `watch` channel.** Workers currently use +6. **Shutdown via `watch` channel.** Workers currently use `tokio::sync::watch` for shutdown signaling. The actor system - replaces this with `CancellationToken` (used by iroh internally) - or simply dropping the `Addr` (closing the mailbox). + replaces this — dropping all `Addr` handles closes the mailbox, + or `CancellationToken` provides explicit out-of-band shutdown. ### Recommendation: build `willow-actor` @@ -207,14 +179,12 @@ combines: - **xtra/kameo's `Handler` pattern** — per-message-type trait impls with typed returns, not a single enum -- **tokio channels directly** — no runtime abstraction needed since iroh - already provides tokio on both native and WASM -- **`Send` unconditionally** — matches iroh's requirement, compiles on - WASM (everything is trivially Send on single-threaded targets) -- **`CancellationToken` for lifecycle** — aligns with iroh's shutdown - pattern -- **Supervision, intervals, `Recipient`** — features missing from - xtra +- **Thin runtime abstraction** — `cfg`-switched spawn/channel/timer for + tokio (native) vs futures-channel + gloo-timers (WASM) +- **`Send` unconditionally** — matches the `Network` trait's bounds, + compiles on WASM (everything is trivially Send on single-threaded targets) +- **Supervision, intervals, streams, `Recipient`** — features + missing from xtra --- @@ -246,7 +216,7 @@ per-crate boilerplate. 3. **Request-reply**: first-class `ask()` with typed responses, no manual oneshot wiring 4. **Supervision**: restart policies for crashed actors (native), error propagation (WASM) 5. **No locks**: shared state lives inside actors, eliminating `Arc>` / `Arc>` — access is serialized through message passing -6. **Lightweight**: no dynamic dispatch on send +6. **Lightweight**: `Addr` send path has no dynamic dispatch (type-erased `Recipient` is opt-in) ## Non-Goals @@ -309,7 +279,6 @@ is type-checked at compile time. pub struct Context { addr: Addr, system: SystemHandle, - cancel: CancellationToken, } impl Context { @@ -322,10 +291,6 @@ impl Context { /// Request a graceful stop after the current message finishes. pub fn stop(&mut self) { ... } - /// Get the cancellation token (child of the system's root token). - /// Integrates with iroh's CancellationToken-based shutdown. - pub fn cancellation_token(&self) -> &CancellationToken { ... } - /// Access the actor system (for spawning unrelated actors). pub fn system(&self) -> &SystemHandle { ... } } @@ -337,7 +302,7 @@ impl Context { /// Type-safe handle for sending messages to an actor. /// Cheaply cloneable (wraps an Arc'd channel sender). pub struct Addr { - tx: MessageSender, // platform-specific channel sender + tx: MessageSender, // unbounded mpsc sender (type-erased via runtime module) _phantom: PhantomData, } @@ -435,8 +400,9 @@ impl System { ## Platform Abstraction -Since iroh already depends on tokio for both native and WASM, the -runtime module is minimal — only task spawning differs by platform: +Tokio is native-only. On WASM, the codebase uses `futures` channels and +`wasm-bindgen-futures`. The actor crate needs a thin `runtime` module +that abstracts over the three primitives that differ: ```rust // crate::runtime (internal) @@ -449,12 +415,38 @@ pub fn spawn + Send + 'static>(fut: F) { #[cfg(target_arch = "wasm32")] wasm_bindgen_futures::spawn_local(fut); } + +/// Unbounded MPSC channel. +pub fn unbounded_channel() -> (Sender, Receiver) { + #[cfg(not(target_arch = "wasm32"))] + { /* tokio::sync::mpsc::unbounded_channel */ } + + #[cfg(target_arch = "wasm32")] + { /* futures::channel::mpsc::unbounded */ } +} + +/// One-shot channel. +pub fn oneshot() -> (OneshotTx, OneshotRx) { + #[cfg(not(target_arch = "wasm32"))] + { /* tokio::sync::oneshot */ } + + #[cfg(target_arch = "wasm32")] + { /* futures::channel::oneshot */ } +} + +/// Sleep for a duration. +pub async fn sleep(duration: Duration) { + #[cfg(not(target_arch = "wasm32"))] + tokio::time::sleep(duration).await; + + #[cfg(target_arch = "wasm32")] + gloo_timers::future::sleep(duration).await; +} ``` -Channels use `tokio::sync::mpsc` and `tokio::sync::oneshot` on both -targets — iroh's WASM shim makes these available. Timers use -`tokio::time::sleep` on native and `gloo_timers` (or iroh's internal -timer abstraction) on WASM. +The `Sender`/`Receiver` types are thin wrappers that unify the +`tokio::sync::mpsc` and `futures::channel::mpsc` APIs behind a common +interface. Both are unbounded and have nearly identical semantics. ## Mailbox Internals @@ -587,7 +579,7 @@ crates/actor/ ├── mailbox.rs — tokio mpsc wrapper, recv loop ├── runtime.rs — spawn abstraction (tokio::spawn vs spawn_local) ├── supervisor.rs — RestartPolicy, supervised spawn - ├── system.rs — System, SystemHandle (CancellationToken) + ├── system.rs — System, SystemHandle └── error.rs — SendError, AskError ``` @@ -595,17 +587,19 @@ crates/actor/ ``` willow-actor (new) -├── tokio (sync: mpsc, oneshot; time: sleep, interval) -├── tokio-util (CancellationToken) -├── futures-core (Stream trait) +├── futures-core (Stream trait) ├── thiserror ├── tracing -└── [wasm] wasm-bindgen-futures (spawn_local) +├── [native] tokio (sync: mpsc, oneshot; time: sleep) +├── [native] tokio-util (CancellationToken) +└── [wasm] wasm-bindgen-futures (spawn_local) + futures-channel (mpsc, oneshot) + gloo-timers (sleep) ``` No `async-trait` needed — uses RPITIT (Rust 1.75+). `willow-actor` has **no dependency on any other willow crate**. It is a pure infrastructure -crate. It shares `tokio` with iroh — no additional runtime overhead. +crate. ## Migration Path @@ -647,8 +641,9 @@ pub async fn run(role: Box, config: WorkerConfig, ne The `NetworkActor` uses `StreamHandler` to receive from `TopicEvents`. The `HeartbeatActor` uses `ctx.run_interval()` instead -of a manual `tokio::select! + sleep` loop. Shutdown propagates via -`CancellationToken` — no more `watch` channel. +of a manual `tokio::select! + sleep` loop. Shutdown is implicit — +`system.shutdown()` drops all addresses, closing mailboxes. No more +`watch` channel. ### Phase 2: Client library @@ -669,9 +664,8 @@ completes the migration across all active crates. actor is dead (mailbox closed). Bounded mailboxes can be added later if backpressure becomes necessary. -2. **FIFO, no priority messages.** Shutdown is just another message. - `CancellationToken` provides an independent out-of-band shutdown - signal that doesn't go through the mailbox. +2. **FIFO, no priority messages.** Shutdown is achieved by dropping all + `Addr` handles (closes the mailbox) or via `Context::stop()`. 3. **Shared state lives in actors.** `Arc>` in the client library is replaced by a state actor. External code queries From ced2a739918fddd2422afd9416ebcb81913755d1 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:05:49 +0000 Subject: [PATCH 11/26] Second review pass: fix remaining stale references - kameo fork section: remove claim that iroh provides tokio on WASM - Iroh section: replace CancellationToken with Context::stop() - Overview: fix claim about iroh's tokio runtime on both targets - Message trait docs: reference Network trait bounds, not iroh directly - Crate structure: fix mailbox.rs and runtime.rs descriptions - Dependency graph: remove tokio-util (CancellationToken was dropped) https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/specs/2026-03-29-actor-system-design.md | 26 +++++++++----------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index b26c8a32..4151ae2c 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -114,11 +114,10 @@ flags for alternative runtimes. A source audit of kameo's tokio coupling reveals it is **shallow and concentrated** — 6 primitives across 4 core files. The supervision -module has zero production tokio usage. With iroh now providing tokio -on WASM, the fork effort shrinks further (no need to swap channel -implementations — tokio mpsc works on both targets). Main remaining -work: `cfg`-gate `spawn_in_thread()` and `blocking_send/recv` on WASM, -replace `tokio::spawn` with `spawn_local` on WASM (~150 lines). +module has zero production tokio usage. Main work: swap `tokio::sync::mpsc` +for `futures::channel::mpsc` on WASM, `cfg`-gate `spawn_in_thread()` +and `blocking_send/recv`, replace `tokio::spawn` with `spawn_local` +(~200 lines). **Verdict: fork is feasible but not clearly better than writing our own.** @@ -169,7 +168,7 @@ broadcast. See `docs/specs/2026-03-29-iroh-migration-design.md`. 6. **Shutdown via `watch` channel.** Workers currently use `tokio::sync::watch` for shutdown signaling. The actor system replaces this — dropping all `Addr` handles closes the mailbox, - or `CancellationToken` provides explicit out-of-band shutdown. + or `Context::stop()` signals graceful shutdown from within. ### Recommendation: build `willow-actor` @@ -205,9 +204,9 @@ sync actors communicating via channels), but it's hand-rolled and not reusable. The client and web crates reinvent the same pattern: spawn a task, create channels, loop on `select!`, handle shutdown. -`willow-actor` formalizes this into a single crate, building on iroh's -tokio runtime (available on both native and WASM) to eliminate the -per-crate boilerplate. +`willow-actor` formalizes this into a single crate with a thin runtime +abstraction (tokio on native, futures-channel + gloo-timers on WASM) to +eliminate the per-crate boilerplate. ## Goals @@ -237,8 +236,8 @@ pub trait Message: Send + 'static { ``` `Send` is required unconditionally. On WASM (single-threaded), all types -are trivially `Send`, so this compiles without issue. This matches iroh's -requirement that all futures and channel payloads are `Send`. +are trivially `Send`, so this compiles without issue. This matches the +`Network` trait's bounds (`Send + Sync` on associated types). ### Actor Trait @@ -576,8 +575,8 @@ crates/actor/ ├── addr.rs — Addr, AnyAddr, Recipient ├── context.rs — Context, interval, stream attachment ├── envelope.rs — BoxEnvelope, type-erased message dispatch - ├── mailbox.rs — tokio mpsc wrapper, recv loop - ├── runtime.rs — spawn abstraction (tokio::spawn vs spawn_local) + ├── mailbox.rs — unbounded channel recv loop + ├── runtime.rs — platform abstraction (spawn, channels, timers) ├── supervisor.rs — RestartPolicy, supervised spawn ├── system.rs — System, SystemHandle └── error.rs — SendError, AskError @@ -591,7 +590,6 @@ willow-actor (new) ├── thiserror ├── tracing ├── [native] tokio (sync: mpsc, oneshot; time: sleep) -├── [native] tokio-util (CancellationToken) └── [wasm] wasm-bindgen-futures (spawn_local) futures-channel (mpsc, oneshot) gloo-timers (sleep) From b644119499d8f91d6d182f52a3654f1b79ca4b0b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:07:01 +0000 Subject: [PATCH 12/26] Third review pass: minor cleanups - Remove stale "Depends on" strikethrough line - Clarify Arc> is current state replaced by Phase 2 - Note RecipientSender is an internal trait https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/specs/2026-03-29-actor-system-design.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index 4151ae2c..fa009837 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -2,7 +2,6 @@ **Date**: 2026-03-29 **Status**: Ready for implementation -**Depends on**: ~~iroh integration~~ (complete — merged to main) ## Existing Solutions @@ -145,10 +144,10 @@ broadcast. See `docs/specs/2026-03-29-iroh-migration-design.md`. a thin runtime abstraction for spawn, channels, and timers — just like the original design proposed. -2. **`Send` is still required.** The `Network` trait and its associated - types require `Send + Sync`. The client uses `Arc>`. All - types in the shared path must be `Send`. On WASM, everything is - trivially `Send` (single-threaded), so this compiles without issue. +2. **`Send` is required.** The `Network` trait and its associated types + require `Send + Sync`. The client currently uses `Arc>` (to + be replaced by actors in Phase 2). On WASM, everything is trivially + `Send` (single-threaded), so this compiles without issue. 3. **`Network` trait is generic.** Workers and client are generic over `N: Network`, with `IrohNetwork` for production and `MemNetwork` for @@ -355,7 +354,7 @@ abstract over the concrete actor: /// Type-erased handle that can send a specific message type. /// Useful for pub-sub patterns where the sender doesn't know the actor type. pub struct Recipient { - tx: Box>, + tx: Box>, // internal trait, not public } impl Recipient { From e09d7567e2025f7d2816768f8f2823206b7c4091 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:18:06 +0000 Subject: [PATCH 13/26] Add actor system implementation plan 8 tasks covering the full migration: runtime abstraction, core traits, addr/context/system, supervision, worker migration, client migration, web UI migration, and final validation. Each task has concrete steps with file lists and acceptance criteria. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 230 ++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 docs/plans/2026-03-30-actor-system.md diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md new file mode 100644 index 00000000..778470bc --- /dev/null +++ b/docs/plans/2026-03-30-actor-system.md @@ -0,0 +1,230 @@ +# Actor System Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace hand-rolled channel/task patterns across worker, client, and web crates with a formalized actor system crate (`willow-actor`) that works on both native and WASM targets, eliminating all locks from the shared state path. + +**Architecture:** A pure infrastructure crate (`willow-actor`) provides `Actor`, `Handler`, `StreamHandler`, `Addr`, `System`, and a thin platform abstraction (tokio on native, futures-channel + gloo-timers on WASM). Worker actors, client state, and web UI event loops are migrated to use these primitives. + +**Tech Stack:** Rust, tokio (native), futures-channel (WASM), wasm-bindgen-futures, gloo-timers, RPITIT (Rust 1.75+) + +**Spec:** `docs/specs/2026-03-29-actor-system-design.md` + +--- + +## File Map + +### New Crate + +``` +crates/actor/ +├── Cargo.toml +└── src/ + ├── lib.rs — Public API re-exports + ├── actor.rs — Actor, Handler, StreamHandler, Message traits + ├── addr.rs — Addr, AnyAddr, Recipient + ├── context.rs — Context, interval, stream attachment + ├── envelope.rs — BoxEnvelope, type-erased message dispatch + ├── mailbox.rs — Unbounded channel recv loop + ├── runtime.rs — Platform abstraction (spawn, channels, timers) + ├── supervisor.rs — RestartPolicy, supervised spawn + ├── system.rs — System, SystemHandle + └── error.rs — SendError, AskError +``` + +### Modified Files + +``` +crates/worker/Cargo.toml — Add willow-actor dependency +crates/worker/src/actors/mod.rs — Remove StateMsg enum (replaced by per-message types) +crates/worker/src/actors/state.rs — StateActor with Handler, Handler, etc. +crates/worker/src/actors/network.rs — NetworkActor with StreamHandler +crates/worker/src/actors/heartbeat.rs — HeartbeatActor with ctx.run_interval() +crates/worker/src/actors/sync.rs — SyncActor with ctx.run_interval() +crates/worker/src/runtime.rs — Replace manual channel/spawn/join with System + +crates/client/Cargo.toml — Add willow-actor dependency +crates/client/src/lib.rs — Replace Arc> with state actor +crates/client/src/listeners.rs — Replace spawn_topic_listener with StreamHandler actor +crates/client/src/state.rs — State accessor methods become ask() calls + +crates/web/Cargo.toml — Add willow-actor dependency +crates/web/src/app.rs — Spawn actors instead of manual event loop + +Cargo.toml — Add actor to workspace members +``` + +--- + +## Task 1: Runtime Abstraction Module + +**Files:** +- Create: `crates/actor/Cargo.toml` +- Create: `crates/actor/src/lib.rs` +- Create: `crates/actor/src/runtime.rs` +- Create: `crates/actor/src/error.rs` +- Modify: `Cargo.toml` (workspace members) + +The platform abstraction is the foundation everything else builds on. It must compile on both native and `wasm32-unknown-unknown` before any actor types are defined. + +- [ ] **Step 1: Create crate skeleton.** `Cargo.toml` with workspace edition/version, dependencies split by target: `tokio` (native), `wasm-bindgen-futures` + `futures-channel` + `gloo-timers` (WASM). Shared deps: `futures-core`, `thiserror`, `tracing`. Add `actor` to workspace `Cargo.toml` members. + +- [ ] **Step 2: Implement `runtime.rs`.** Four functions: `spawn()` (tokio::spawn vs spawn_local), `unbounded_channel()` (tokio mpsc vs futures mpsc), `oneshot()` (tokio vs futures), `sleep()` (tokio vs gloo-timers). Define `Sender` / `Receiver` / `OneshotTx` / `OneshotRx` wrapper types that unify the two backends behind a common API. The `Receiver` must implement `async fn recv() -> Option`. + +- [ ] **Step 3: Implement `error.rs`.** `SendError` with `Closed(M)` variant. `AskError` with `Closed` and `NoResponse` variants. Both derive `Debug`, `thiserror::Error`. + +- [ ] **Step 4: Verify dual-target compilation.** Run `cargo check -p willow-actor` (native) and `cargo check -p willow-actor --target wasm32-unknown-unknown` (WASM). Both must pass with zero warnings. + +--- + +## Task 2: Core Actor Traits and Envelope + +**Files:** +- Create: `crates/actor/src/actor.rs` +- Create: `crates/actor/src/envelope.rs` +- Create: `crates/actor/src/mailbox.rs` + +Defines the trait hierarchy and the type-erased message dispatch mechanism. + +- [ ] **Step 1: Define `Message` trait.** `Send + 'static` with `type Result: Send + 'static`. + +- [ ] **Step 2: Define `Actor` trait.** `Send + 'static + Sized` with `started()` and `stopped()` lifecycle hooks using RPITIT (not async_trait). Both have default no-op impls. + +- [ ] **Step 3: Define `Handler` trait.** `fn handle(&mut self, msg: M, ctx: &mut Context) -> impl Future + Send`. Supertrait is `Actor`. + +- [ ] **Step 4: Define `StreamHandler` trait.** `handle_stream_item()` and `stream_finished()` with RPITIT. Supertrait is `Actor`. + +- [ ] **Step 5: Implement `envelope.rs`.** `BoxEnvelope` type alias: `Box) -> BoxFuture<'_, ()> + Send>`. Two factory functions: `envelope_send(msg) -> BoxEnvelope` (fire-and-forget) and `envelope_ask(msg, reply_tx) -> BoxEnvelope` (captures oneshot sender). Both wrap the handler call in a closure. + +- [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(actor: A, rx: Receiver>)`: calls `actor.started()`, loops on `rx.recv()`, executes each envelope, calls `actor.stopped()` on exit. Checks a stop flag between messages. + +- [ ] **Step 7: Write unit tests.** Test a simple counter actor: define `Increment` (fire-and-forget) and `GetCount` (returns u32) messages. Verify send/ask/stop lifecycle. Test on native with `#[tokio::test]`. + +--- + +## Task 3: Addr, Context, and System + +**Files:** +- Create: `crates/actor/src/addr.rs` +- Create: `crates/actor/src/context.rs` +- Create: `crates/actor/src/system.rs` +- Update: `crates/actor/src/lib.rs` + +Wires everything together into the public API. + +- [ ] **Step 1: Implement `Addr`.** Wraps `runtime::Sender>`. Methods: `send()` wraps msg in `envelope_send`, sends on channel. `ask()` creates a oneshot, wraps msg in `envelope_ask`, sends on channel, awaits oneshot receiver. `is_alive()` checks if channel is open. `Clone` impl. + +- [ ] **Step 2: Implement `AnyAddr`.** Type-erased address that can only signal stop (drops a held sender) and check liveness. `From>` impl. + +- [ ] **Step 3: Implement `Context`.** Fields: `addr: Addr`, `system: SystemHandle`, `stop: bool`. Methods: `address()`, `stop()`, `system()`, `spawn()` (delegates to system), `add_stream()` (spawns a task that forwards stream items as envelopes into the actor's mailbox), `run_interval()` (spawns a task that sleeps + sends a message on each tick, returns `IntervalHandle` with cancel). + +- [ ] **Step 4: Implement `System` / `SystemHandle`.** `System::new()` creates the handle. `SystemHandle` is `Clone` and holds a list of `AnyAddr`s (for shutdown). `spawn()` creates a channel, builds `Context`, spawns `run_mailbox` via `runtime::spawn`, returns `Addr`. `shutdown()` drops all tracked addresses and waits for mailboxes to drain. + +- [ ] **Step 5: Implement `Recipient`.** Internal `RecipientSender` trait with `send()` and `ask()`. `Addr` implements it for any `A: Handler`. `Recipient` wraps `Box>`. `From>` impl. + +- [ ] **Step 6: Wire up `lib.rs` re-exports.** Public API: `Actor`, `Handler`, `StreamHandler`, `Message`, `Addr`, `AnyAddr`, `Recipient`, `Context`, `System`, `SystemHandle`, `SendError`, `AskError`, `IntervalHandle`, `RestartPolicy`. + +- [ ] **Step 7: Write integration tests.** Multi-actor test: spawn two actors, actor A sends to actor B, B replies. Test `StreamHandler` with a `futures::stream::iter`. Test `run_interval` fires expected number of times. Test shutdown stops all actors. + +- [ ] **Step 8: Verify WASM compilation.** `cargo check -p willow-actor --target wasm32-unknown-unknown`. Add to `just check-wasm`. + +--- + +## Task 4: Supervision + +**Files:** +- Create: `crates/actor/src/supervisor.rs` +- Update: `crates/actor/src/context.rs` + +- [ ] **Step 1: Define `RestartPolicy`.** Enum: `Never`, `OnFailure { max: u32 }`, `Backoff { initial: Duration, max_delay: Duration, max_retries: u32 }`. + +- [ ] **Step 2: Implement `Context::spawn_supervised()`.** Takes `child: C` where `C: Actor + Clone` and `policy: RestartPolicy`. Spawns the actor in a wrapper task that catches panics via `catch_unwind` and restarts according to the policy. Returns `Addr` (stable across restarts — the wrapper re-creates the mailbox and re-attaches the address's sender). + +- [ ] **Step 3: Write tests.** Test: actor that panics after N messages gets restarted up to `max` times. Test: `Never` policy does not restart. Test: `Backoff` delays between restarts. + +--- + +## Task 5: Worker Migration + +**Files:** +- Modify: `crates/worker/Cargo.toml` +- Modify: `crates/worker/src/actors/mod.rs` +- Modify: `crates/worker/src/actors/state.rs` +- Modify: `crates/worker/src/actors/network.rs` +- Modify: `crates/worker/src/actors/heartbeat.rs` +- Modify: `crates/worker/src/actors/sync.rs` +- Modify: `crates/worker/src/runtime.rs` + +Migrate the four hand-rolled worker actors to use `willow-actor`. This is the first real consumer and validates the API. + +- [ ] **Step 1: Define message types in `actors/mod.rs`.** Replace `StateMsg` enum with individual message structs: `EventMsg(Event)`, `RequestMsg { req, reply: ... }` → becomes ask-pattern `WorkerRequestMsg(WorkerRequest)` with `type Result = WorkerResponse`, `GetRoleInfoMsg` with `type Result = WorkerRoleInfo`, `GetStateHashesMsg` with `type Result = Vec<(String, StateHash)>`, `ServerDiscoveredMsg { server_id }`. Remove `NetworkOutMsg` (network actor no longer needs a channel — it holds TopicHandle directly). Remove `StateMsg::Shutdown` (handled by mailbox close). + +- [ ] **Step 2: Rewrite `state.rs` as `StateActor`.** Struct holds `Box`. Implement `Actor` (no lifecycle hooks needed). Implement `Handler`, `Handler`, `Handler`, `Handler`, `Handler`. Each handler is 1-3 lines — delegates to `self.role`. Remove the manual `run()` function and its `while let` loop. + +- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr` and `EndpointId`. Implement `StreamHandler>` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, attach the `TopicEvents` stream via `ctx.add_stream()`. Remove the manual `run()` function. + +- [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle`. Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure. Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. + +- [ ] **Step 5: Rewrite `sync.rs` as `SyncActor`.** Same pattern as heartbeat. Define `SyncTick` message. `Handler` queries state hashes and broadcasts sync requests. `started()` calls `ctx.run_interval()`. Remove watch-based shutdown. + +- [ ] **Step 6: Rewrite `runtime.rs`.** Replace manual channel creation, `tokio::spawn`, watch channel, and `tokio::join!` with: create `System`, spawn all four actors, `ctrl_c().await`, `system.shutdown().await`. The function stays generic over `N: Network`. + +- [ ] **Step 7: Update existing tests.** Adapt tests in `state.rs`, `network.rs`, `heartbeat.rs` to use the actor system. Tests that sent `StateMsg` directly now use `Addr::send()` / `ask()`. Tests that checked `watch::Receiver` for shutdown now verify the actor stops when the system shuts down. + +- [ ] **Step 8: Run `just test-crate worker`.** All existing worker tests must pass. Run `just clippy` — zero warnings. + +--- + +## Task 6: Client Library Migration + +**Files:** +- Modify: `crates/client/Cargo.toml` +- Modify: `crates/client/src/lib.rs` +- Modify: `crates/client/src/listeners.rs` +- Modify: `crates/client/src/state.rs` +- Modify: `crates/client/src/events.rs` + +Replace `Arc>` and `futures::channel::mpsc` with actors. + +- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock). Define message types for mutations: `ApplyEvent`, `SetConnected`, `UpdateTyping`, `JoinVoice`, `LeaveVoice`, etc. Define query messages: `GetState` (returns `ClientState` clone or specific fields), `GetPeers`, `IsConnected`, etc. + +- [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s. + +- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Replace `event_tx: futures_mpsc::UnboundedSender` with `events: Addr` or keep as a channel (events go to UI, which may not be an actor). Client methods that read shared state switch from `shared.read().unwrap()` to `state.ask(GetState).await`. Methods that mutate state switch from `shared.write().unwrap()` to `state.send(mutation)`. + +- [ ] **Step 4: Update `listeners.rs`.** Replace `spawn_topic_listener()` with spawning a `TopicListenerActor` on the system. Remove the manual `topic_listener_loop`. + +- [ ] **Step 5: Update client tests.** Tests using `test_client()` helper need updating — shared state access changes from lock-based to ask-based. Verify all `just test-client` tests pass. + +- [ ] **Step 6: Run `just test-client` and `just clippy`.** All 93+ client tests must pass with zero warnings. + +--- + +## Task 7: Web UI Migration + +**Files:** +- Modify: `crates/web/Cargo.toml` +- Modify: `crates/web/src/app.rs` +- Modify: `crates/web/src/event_processing.rs` + +Validates WASM target correctness. + +- [ ] **Step 1: Update `app.rs` initialization.** Create a `System`, spawn the `ClientStateActor` and listener actors. Pass `Addr`s into Leptos context instead of `Arc>`. + +- [ ] **Step 2: Update signal updates.** Components that read state via `Arc>` switch to `ask()` on the state actor address. Leptos signals can be updated from the actor's event stream. + +- [ ] **Step 3: Verify WASM compilation.** `just check-wasm` must pass. + +- [ ] **Step 4: Run `just test-browser`.** All 39+ browser tests must pass. + +--- + +## Task 8: Final Validation + +- [ ] **Step 1: Run `just check`.** Full suite: fmt + clippy + test + WASM. Zero warnings. + +- [ ] **Step 2: Run `just test-scale`.** Verify no performance regression in event throughput or merge benchmarks. + +- [ ] **Step 3: Run `just test-all`.** All 420+ tests pass. + +- [ ] **Step 4: Update `CLAUDE.md`.** Add `crates/actor/` to the repository structure. Update the architecture notes to describe the actor system. From 707aac3c6c20b22c167a8f4df3a59e3c0c49016e Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:20:38 +0000 Subject: [PATCH 14/26] Review pass on implementation plan - Task 2: tests can't use Addr (defined in Task 3), changed to mailbox-level tests - Task 3: remove stop flag from Context fields (mailbox owns it), defer RestartPolicy re-export to Task 4 - Task 5: add TopicEventStream adapter note (TopicEvents is not a Stream trait), clarify heartbeat owns TopicHandle for departure - Task 6: decide event_tx stays as a channel (UI boundary) https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index 778470bc..641a8a60 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -98,7 +98,7 @@ Defines the trait hierarchy and the type-erased message dispatch mechanism. - [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(actor: A, rx: Receiver>)`: calls `actor.started()`, loops on `rx.recv()`, executes each envelope, calls `actor.stopped()` on exit. Checks a stop flag between messages. -- [ ] **Step 7: Write unit tests.** Test a simple counter actor: define `Increment` (fire-and-forget) and `GetCount` (returns u32) messages. Verify send/ask/stop lifecycle. Test on native with `#[tokio::test]`. +- [ ] **Step 7: Write mailbox-level tests.** Test `run_mailbox` directly by creating a channel, sending `BoxEnvelope`s manually, and verifying the actor processes them. Test that the mailbox loop exits when the sender is dropped. Full `Addr`/`System`-level tests come in Task 3. --- @@ -116,13 +116,13 @@ Wires everything together into the public API. - [ ] **Step 2: Implement `AnyAddr`.** Type-erased address that can only signal stop (drops a held sender) and check liveness. `From>` impl. -- [ ] **Step 3: Implement `Context`.** Fields: `addr: Addr`, `system: SystemHandle`, `stop: bool`. Methods: `address()`, `stop()`, `system()`, `spawn()` (delegates to system), `add_stream()` (spawns a task that forwards stream items as envelopes into the actor's mailbox), `run_interval()` (spawns a task that sleeps + sends a message on each tick, returns `IntervalHandle` with cancel). +- [ ] **Step 3: Implement `Context`.** Fields: `addr: Addr`, `system: SystemHandle`. The mailbox loop holds the stop flag internally (not in Context). Methods: `address()`, `stop()` (sets a flag on a shared `Arc` checked by the mailbox between messages), `system()`, `spawn()` (delegates to system), `add_stream()` (spawns a task that forwards stream items as envelopes into the actor's mailbox), `run_interval()` (spawns a task that sleeps + sends a message on each tick, returns `IntervalHandle` with cancel). - [ ] **Step 4: Implement `System` / `SystemHandle`.** `System::new()` creates the handle. `SystemHandle` is `Clone` and holds a list of `AnyAddr`s (for shutdown). `spawn()` creates a channel, builds `Context`, spawns `run_mailbox` via `runtime::spawn`, returns `Addr`. `shutdown()` drops all tracked addresses and waits for mailboxes to drain. - [ ] **Step 5: Implement `Recipient`.** Internal `RecipientSender` trait with `send()` and `ask()`. `Addr` implements it for any `A: Handler`. `Recipient` wraps `Box>`. `From>` impl. -- [ ] **Step 6: Wire up `lib.rs` re-exports.** Public API: `Actor`, `Handler`, `StreamHandler`, `Message`, `Addr`, `AnyAddr`, `Recipient`, `Context`, `System`, `SystemHandle`, `SendError`, `AskError`, `IntervalHandle`, `RestartPolicy`. +- [ ] **Step 6: Wire up `lib.rs` re-exports.** Public API: `Actor`, `Handler`, `StreamHandler`, `Message`, `Addr`, `AnyAddr`, `Recipient`, `Context`, `System`, `SystemHandle`, `SendError`, `AskError`, `IntervalHandle`. (`RestartPolicy` added in Task 4.) - [ ] **Step 7: Write integration tests.** Multi-actor test: spawn two actors, actor A sends to actor B, B replies. Test `StreamHandler` with a `futures::stream::iter`. Test `run_interval` fires expected number of times. Test shutdown stops all actors. @@ -161,9 +161,9 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi - [ ] **Step 2: Rewrite `state.rs` as `StateActor`.** Struct holds `Box`. Implement `Actor` (no lifecycle hooks needed). Implement `Handler`, `Handler`, `Handler`, `Handler`, `Handler`. Each handler is 1-3 lines — delegates to `self.role`. Remove the manual `run()` function and its `while let` loop. -- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr` and `EndpointId`. Implement `StreamHandler>` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, attach the `TopicEvents` stream via `ctx.add_stream()`. Remove the manual `run()` function. +- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr` and `EndpointId`. `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, attach the adapted stream via `ctx.add_stream()`. Remove the manual `run()` function. -- [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle`. Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure. Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. +- [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle` (owned, not borrowed — actor owns it for its lifetime). Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure message via `self.topic.broadcast()` — the topic handle is still valid because the actor owns it. Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. - [ ] **Step 5: Rewrite `sync.rs` as `SyncActor`.** Same pattern as heartbeat. Define `SyncTick` message. `Handler` queries state hashes and broadcasts sync requests. `started()` calls `ctx.run_interval()`. Remove watch-based shutdown. @@ -190,7 +190,7 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. - [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s. -- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Replace `event_tx: futures_mpsc::UnboundedSender` with `events: Addr` or keep as a channel (events go to UI, which may not be an actor). Client methods that read shared state switch from `shared.read().unwrap()` to `state.ask(GetState).await`. Methods that mutate state switch from `shared.write().unwrap()` to `state.send(mutation)`. +- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Keep `event_tx: futures_mpsc::UnboundedSender` as a plain channel — `ClientEvent`s flow to the UI layer (Leptos signals / Bevy ECS) which are not actors. The channel is the boundary between actor world and UI framework. Client methods that read shared state switch from `shared.read().unwrap()` to `state.ask(GetState).await`. Methods that mutate state switch from `shared.write().unwrap()` to `state.send(mutation)`. - [ ] **Step 4: Update `listeners.rs`.** Replace `spawn_topic_listener()` with spawning a `TopicListenerActor` on the system. Remove the manual `topic_listener_loop`. From bef187e0b550a0f13556463c25fc55e26294baf0 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:22:22 +0000 Subject: [PATCH 15/26] Second review pass on implementation plan - mailbox.rs: clarify actor moved in, add stop flag + ctx params - Context: add Arc stop flag field (shared with mailbox) - add_stream: clarify StreamEnvelope variant (separate from Handler) - spawn_supervised: wrapper reuses same channel across restarts - NetworkActor: store TopicEvents as Option, take() in started() - ClientHandle: call out sync-to-async API change cascading to callers https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index 641a8a60..feb06bd3 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -96,7 +96,7 @@ Defines the trait hierarchy and the type-erased message dispatch mechanism. - [ ] **Step 5: Implement `envelope.rs`.** `BoxEnvelope` type alias: `Box) -> BoxFuture<'_, ()> + Send>`. Two factory functions: `envelope_send(msg) -> BoxEnvelope` (fire-and-forget) and `envelope_ask(msg, reply_tx) -> BoxEnvelope` (captures oneshot sender). Both wrap the handler call in a closure. -- [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(actor: A, rx: Receiver>)`: calls `actor.started()`, loops on `rx.recv()`, executes each envelope, calls `actor.stopped()` on exit. Checks a stop flag between messages. +- [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(mut actor: A, rx: Receiver>, stop: Arc)`: actor is moved in and mutated via `&mut` for its lifetime. Calls `actor.started(&mut ctx)`, loops on `rx.recv()`, executes each envelope passing `&mut actor` and `&mut ctx`, checks stop flag between messages, calls `actor.stopped()` on exit (either channel closed or stop flag set). - [ ] **Step 7: Write mailbox-level tests.** Test `run_mailbox` directly by creating a channel, sending `BoxEnvelope`s manually, and verifying the actor processes them. Test that the mailbox loop exits when the sender is dropped. Full `Addr`/`System`-level tests come in Task 3. @@ -116,7 +116,7 @@ Wires everything together into the public API. - [ ] **Step 2: Implement `AnyAddr`.** Type-erased address that can only signal stop (drops a held sender) and check liveness. `From>` impl. -- [ ] **Step 3: Implement `Context`.** Fields: `addr: Addr`, `system: SystemHandle`. The mailbox loop holds the stop flag internally (not in Context). Methods: `address()`, `stop()` (sets a flag on a shared `Arc` checked by the mailbox between messages), `system()`, `spawn()` (delegates to system), `add_stream()` (spawns a task that forwards stream items as envelopes into the actor's mailbox), `run_interval()` (spawns a task that sleeps + sends a message on each tick, returns `IntervalHandle` with cancel). +- [ ] **Step 3: Implement `Context`.** Fields: `addr: Addr`, `system: SystemHandle`, `stop: Arc` (shared with the mailbox loop). Methods: `address()`, `stop()` (sets the `AtomicBool` — mailbox checks it between messages), `system()`, `spawn()` (delegates to system), `add_stream()` (spawns a task that polls the stream and forwards items as `StreamEnvelope`s — a separate envelope variant that calls `StreamHandler::handle_stream_item` instead of `Handler::handle`; stream end sends a `StreamFinishedEnvelope`), `run_interval()` (spawns a task that sleeps + sends a message on each tick, returns `IntervalHandle` with cancel). - [ ] **Step 4: Implement `System` / `SystemHandle`.** `System::new()` creates the handle. `SystemHandle` is `Clone` and holds a list of `AnyAddr`s (for shutdown). `spawn()` creates a channel, builds `Context`, spawns `run_mailbox` via `runtime::spawn`, returns `Addr`. `shutdown()` drops all tracked addresses and waits for mailboxes to drain. @@ -138,7 +138,7 @@ Wires everything together into the public API. - [ ] **Step 1: Define `RestartPolicy`.** Enum: `Never`, `OnFailure { max: u32 }`, `Backoff { initial: Duration, max_delay: Duration, max_retries: u32 }`. -- [ ] **Step 2: Implement `Context::spawn_supervised()`.** Takes `child: C` where `C: Actor + Clone` and `policy: RestartPolicy`. Spawns the actor in a wrapper task that catches panics via `catch_unwind` and restarts according to the policy. Returns `Addr` (stable across restarts — the wrapper re-creates the mailbox and re-attaches the address's sender). +- [ ] **Step 2: Implement `Context::spawn_supervised()`.** Takes `child: C` where `C: Actor + Clone` and `policy: RestartPolicy`. The wrapper task owns both the channel receiver and the actor clone. On restart, it clones the original actor, creates a fresh stop flag, and re-enters `run_mailbox` — but reuses the same channel. The `Addr` returned to callers points at this stable channel, so it remains valid across restarts. Panics are caught via `std::panic::catch_unwind` (requires `AssertUnwindSafe` wrapper). The wrapper respects `RestartPolicy` limits and backoff delays. - [ ] **Step 3: Write tests.** Test: actor that panics after N messages gets restarted up to `max` times. Test: `Never` policy does not restart. Test: `Backoff` delays between restarts. @@ -161,7 +161,7 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi - [ ] **Step 2: Rewrite `state.rs` as `StateActor`.** Struct holds `Box`. Implement `Actor` (no lifecycle hooks needed). Implement `Handler`, `Handler`, `Handler`, `Handler`, `Handler`. Each handler is 1-3 lines — delegates to `self.role`. Remove the manual `run()` function and its `while let` loop. -- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr` and `EndpointId`. `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, attach the adapted stream via `ctx.add_stream()`. Remove the manual `run()` function. +- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr`, `EndpointId`, and `Option` where `E: TopicEvents` (taken in `started()`). `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, call `self.events.take().unwrap()` to extract the topic events, wrap in `TopicEventStream`, and attach via `ctx.add_stream()`. Remove the manual `run()` function. - [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle` (owned, not borrowed — actor owns it for its lifetime). Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure message via `self.topic.broadcast()` — the topic handle is still valid because the actor owns it. Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. @@ -190,7 +190,7 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. - [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s. -- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Keep `event_tx: futures_mpsc::UnboundedSender` as a plain channel — `ClientEvent`s flow to the UI layer (Leptos signals / Bevy ECS) which are not actors. The channel is the boundary between actor world and UI framework. Client methods that read shared state switch from `shared.read().unwrap()` to `state.ask(GetState).await`. Methods that mutate state switch from `shared.write().unwrap()` to `state.send(mutation)`. +- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Keep `event_tx: futures_mpsc::UnboundedSender` as a plain channel — `ClientEvent`s flow to the UI layer which is not an actor. **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. - [ ] **Step 4: Update `listeners.rs`.** Replace `spawn_topic_listener()` with spawning a `TopicListenerActor` on the system. Remove the manual `topic_listener_loop`. From 3cea69cc581ba2e2a42d870892cfffc52ff9120b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:24:14 +0000 Subject: [PATCH 16/26] Third review pass on implementation plan - run_mailbox: receives pre-built Context + done oneshot, clarify Task 2 tests use dummy SystemHandle - System::shutdown: uses done oneshots to await actor completion (spawn_local has no JoinHandle on WASM) - HeartbeatActor stopped(): note departure broadcast is best-effort - ClientStateActor: audit call sites to discover messages, don't enumerate upfront - Task 6 file list: add ops.rs, invite.rs, files.rs, worker_cache.rs - Task 7: fix signal update description (Leptos uses reactive signals from ClientEvents, not Arc> directly) https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index feb06bd3..3531cd00 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -96,7 +96,7 @@ Defines the trait hierarchy and the type-erased message dispatch mechanism. - [ ] **Step 5: Implement `envelope.rs`.** `BoxEnvelope` type alias: `Box) -> BoxFuture<'_, ()> + Send>`. Two factory functions: `envelope_send(msg) -> BoxEnvelope` (fire-and-forget) and `envelope_ask(msg, reply_tx) -> BoxEnvelope` (captures oneshot sender). Both wrap the handler call in a closure. -- [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(mut actor: A, rx: Receiver>, stop: Arc)`: actor is moved in and mutated via `&mut` for its lifetime. Calls `actor.started(&mut ctx)`, loops on `rx.recv()`, executes each envelope passing `&mut actor` and `&mut ctx`, checks stop flag between messages, calls `actor.stopped()` on exit (either channel closed or stop flag set). +- [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(mut actor: A, ctx: Context, rx: Receiver>, stop: Arc, done: OneshotTx<()>)`: actor is moved in and mutated via `&mut` for its lifetime. The `Context` and `done` oneshot are provided by the caller (System in Task 3, test harness here). Calls `actor.started(&mut ctx)`, loops on `rx.recv()`, executes each envelope passing `&mut actor` and `&mut ctx`, checks stop flag between messages, calls `actor.stopped()` on exit, then signals `done`. For Task 2 tests, construct a minimal `Context` with a dummy `SystemHandle`. - [ ] **Step 7: Write mailbox-level tests.** Test `run_mailbox` directly by creating a channel, sending `BoxEnvelope`s manually, and verifying the actor processes them. Test that the mailbox loop exits when the sender is dropped. Full `Addr`/`System`-level tests come in Task 3. @@ -118,7 +118,7 @@ Wires everything together into the public API. - [ ] **Step 3: Implement `Context`.** Fields: `addr: Addr`, `system: SystemHandle`, `stop: Arc` (shared with the mailbox loop). Methods: `address()`, `stop()` (sets the `AtomicBool` — mailbox checks it between messages), `system()`, `spawn()` (delegates to system), `add_stream()` (spawns a task that polls the stream and forwards items as `StreamEnvelope`s — a separate envelope variant that calls `StreamHandler::handle_stream_item` instead of `Handler::handle`; stream end sends a `StreamFinishedEnvelope`), `run_interval()` (spawns a task that sleeps + sends a message on each tick, returns `IntervalHandle` with cancel). -- [ ] **Step 4: Implement `System` / `SystemHandle`.** `System::new()` creates the handle. `SystemHandle` is `Clone` and holds a list of `AnyAddr`s (for shutdown). `spawn()` creates a channel, builds `Context`, spawns `run_mailbox` via `runtime::spawn`, returns `Addr`. `shutdown()` drops all tracked addresses and waits for mailboxes to drain. +- [ ] **Step 4: Implement `System` / `SystemHandle`.** `System::new()` creates the handle. `SystemHandle` is `Clone` and holds a list of `AnyAddr`s (for shutdown) plus a list of `OneshotRx<()>` (one per actor, signaled when `run_mailbox` exits). `spawn()` creates a channel, a done oneshot, builds `Context`, spawns `run_mailbox` via `runtime::spawn`, stores the `OneshotRx`, returns `Addr`. `shutdown()` drops all tracked addresses (closing mailboxes) then awaits all done oneshots to confirm actors have stopped. - [ ] **Step 5: Implement `Recipient`.** Internal `RecipientSender` trait with `send()` and `ask()`. `Addr` implements it for any `A: Handler`. `Recipient` wraps `Box>`. `From>` impl. @@ -163,7 +163,7 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi - [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr`, `EndpointId`, and `Option` where `E: TopicEvents` (taken in `started()`). `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, call `self.events.take().unwrap()` to extract the topic events, wrap in `TopicEventStream`, and attach via `ctx.add_stream()`. Remove the manual `run()` function. -- [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle` (owned, not borrowed — actor owns it for its lifetime). Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure message via `self.topic.broadcast()` — the topic handle is still valid because the actor owns it. Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. +- [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle` (owned, not borrowed — actor owns it for its lifetime). Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure message via `self.topic.broadcast()` — best-effort (may fail silently if network is already shut down). Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. - [ ] **Step 5: Rewrite `sync.rs` as `SyncActor`.** Same pattern as heartbeat. Define `SyncTick` message. `Handler` queries state hashes and broadcasts sync requests. `started()` calls `ctx.run_interval()`. Remove watch-based shutdown. @@ -183,10 +183,14 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi - Modify: `crates/client/src/listeners.rs` - Modify: `crates/client/src/state.rs` - Modify: `crates/client/src/events.rs` +- Modify: `crates/client/src/ops.rs` +- Modify: `crates/client/src/invite.rs` +- Modify: `crates/client/src/files.rs` +- Modify: `crates/client/src/worker_cache.rs` Replace `Arc>` and `futures::channel::mpsc` with actors. -- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock). Define message types for mutations: `ApplyEvent`, `SetConnected`, `UpdateTyping`, `JoinVoice`, `LeaveVoice`, etc. Define query messages: `GetState` (returns `ClientState` clone or specific fields), `GetPeers`, `IsConnected`, etc. +- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock). Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. Common patterns: mutations return `()` (fire-and-forget), queries return a cloned field or computed value. - [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s. @@ -211,7 +215,7 @@ Validates WASM target correctness. - [ ] **Step 1: Update `app.rs` initialization.** Create a `System`, spawn the `ClientStateActor` and listener actors. Pass `Addr`s into Leptos context instead of `Arc>`. -- [ ] **Step 2: Update signal updates.** Components that read state via `Arc>` switch to `ask()` on the state actor address. Leptos signals can be updated from the actor's event stream. +- [ ] **Step 2: Update event processing.** Leptos components read state via reactive signals, not `Arc>` directly. The signals are populated from `ClientEvent`s received on the event channel. Since `ClientHandle` now uses actor-based state internally, verify that `ClientEvent` emission still works correctly — the `TopicListenerActor` emits events via the `event_tx` channel, which flows into `event_processing.rs` to update signals. This path should require minimal changes. - [ ] **Step 3: Verify WASM compilation.** `just check-wasm` must pass. From 6b73f1dc465bceb437a9397144c65038b0dd3594 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:27:04 +0000 Subject: [PATCH 17/26] Fourth review pass: actor-Leptos bridge design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Task 7 expanded significantly with actor↔Leptos bridge documentation: diagrams showing current vs migrated event flow, explanation of why ClientEvent channel remains the boundary between actor and signal worlds - process_event_batch and refresh_all_signals become async (handle methods now async), every handle.method() call gains .await - Components calling handle methods directly need spawn_local wrappers - Task 6: note topics Arc> also needs migration to actor - Task 7: added components/*.rs and state.rs to file list - Documented rejected alternative (state actor pushing full snapshots into signals directly — too coarse, triggers unnecessary re-renders) https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 36 ++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index 3531cd00..57f0506f 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -194,7 +194,7 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. - [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s. -- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Keep `event_tx: futures_mpsc::UnboundedSender` as a plain channel — `ClientEvent`s flow to the UI layer which is not an actor. **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. +- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Also move `topics: Arc>>` into the state actor (or a dedicated topic actor) — this is another lock to eliminate. Keep `event_tx: futures_mpsc::UnboundedSender` as a plain channel — `ClientEvent`s flow to the UI layer which is not an actor. **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. - [ ] **Step 4: Update `listeners.rs`.** Replace `spawn_topic_listener()` with spawning a `TopicListenerActor` on the system. Remove the manual `topic_listener_loop`. @@ -210,16 +210,40 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. - Modify: `crates/web/Cargo.toml` - Modify: `crates/web/src/app.rs` - Modify: `crates/web/src/event_processing.rs` +- Modify: `crates/web/src/state.rs` (if signal source changes) +- Modify: `crates/web/src/components/*.rs` (any that call handle methods directly) -Validates WASM target correctness. +Validates WASM target correctness. This task is larger than it appears because `ClientHandle` methods become async in Task 6, which cascades into the web crate. -- [ ] **Step 1: Update `app.rs` initialization.** Create a `System`, spawn the `ClientStateActor` and listener actors. Pass `Addr`s into Leptos context instead of `Arc>`. +### Actor ↔ Leptos Bridge -- [ ] **Step 2: Update event processing.** Leptos components read state via reactive signals, not `Arc>` directly. The signals are populated from `ClientEvent`s received on the event channel. Since `ClientHandle` now uses actor-based state internally, verify that `ClientEvent` emission still works correctly — the `TopicListenerActor` emits events via the `event_tx` channel, which flows into `event_processing.rs` to update signals. This path should require minimal changes. +The current flow is: +``` +TopicEvents → spawn_local loop → ClientEvent channel → process_event_batch() → WriteSignal::set() + ↓ reads handle.peers(), handle.messages(), etc. + (sync reads via Arc>) +``` + +After the migration: +``` +TopicEvents → TopicListenerActor → ClientEvent channel → process_event_batch() → WriteSignal::set() + ↓ reads via handle.ask().await + (async reads via state actor) +``` + +The `ClientEvent` channel remains the boundary between the actor world and Leptos signals. The key change is that `process_event_batch` and `refresh_all_signals` become async because `ClientHandle` state accessors (`peers()`, `messages()`, `channels()`, `server_list()`, `display_name()`, `roles_data()`, etc.) are now async. Since these are called inside a `spawn_local` async block, `.await` works — but every call site must be updated. + +**Alternative considered:** have the state actor push full state snapshots into a signal directly, eliminating the `ClientEvent` → `process_event_batch` → signal path. Rejected: the snapshot approach would re-render everything on every state change. The event-based approach is more efficient (only flags `needs_msg_refresh`, `needs_peer_refresh`, etc.). + +- [ ] **Step 1: Update `app.rs` initialization.** The `System` is created by the client library (Task 6), not the web crate. `app.rs` receives the `ClientHandle` which already owns actor addresses internally. No `System::new()` call needed here. Update the `spawn_local` event loop: the `while let Some(event) = client_event_rx.next().await` loop stays, but `process_event_batch` is now called with `.await`. + +- [ ] **Step 2: Make `process_event_batch` async.** Currently `fn`, becomes `async fn`. All `handle.peers()`, `handle.messages()`, `handle.channels()`, etc. calls gain `.await`. The function is called inside a `spawn_local` async block, so this is straightforward. Same for `refresh_all_signals`. + +- [ ] **Step 3: Update component direct handle calls.** Grep for `handle.` in `crates/web/src/components/` — any component that calls `ClientHandle` methods directly (not through signals) must switch to async. Components typically access the handle via `use_context::()`. If a component calls a now-async method in a sync event handler (e.g., button click), wrap it in `spawn_local`. -- [ ] **Step 3: Verify WASM compilation.** `just check-wasm` must pass. +- [ ] **Step 4: Verify WASM compilation.** `just check-wasm` must pass. -- [ ] **Step 4: Run `just test-browser`.** All 39+ browser tests must pass. +- [ ] **Step 5: Run `just test-browser`.** All 39+ browser tests must pass. --- From 324cd18e849c20b07096813c80ad0bdb5796d7f6 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:29:44 +0000 Subject: [PATCH 18/26] Fifth review pass on implementation plan - NetworkActor: E: TopicEvents + 'static bound needed (Actor: 'static) - TopicListenerActor: must hold event_tx channel to emit ClientEvents - System ownership: created in ClientHandle::new(), not web crate - Modified files list: add event_processing.rs and components/*.rs - Note catch_unwind must wrap entire run_mailbox future https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index 57f0506f..e7ac7549 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -49,7 +49,9 @@ crates/client/src/listeners.rs — Replace spawn_topic_listener with crates/client/src/state.rs — State accessor methods become ask() calls crates/web/Cargo.toml — Add willow-actor dependency -crates/web/src/app.rs — Spawn actors instead of manual event loop +crates/web/src/app.rs — Update event loop for async handle methods +crates/web/src/event_processing.rs — process_event_batch becomes async +crates/web/src/components/*.rs — Wrap sync handle calls in spawn_local Cargo.toml — Add actor to workspace members ``` @@ -161,7 +163,7 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi - [ ] **Step 2: Rewrite `state.rs` as `StateActor`.** Struct holds `Box`. Implement `Actor` (no lifecycle hooks needed). Implement `Handler`, `Handler`, `Handler`, `Handler`, `Handler`. Each handler is 1-3 lines — delegates to `self.role`. Remove the manual `run()` function and its `while let` loop. -- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr`, `EndpointId`, and `Option` where `E: TopicEvents` (taken in `started()`). `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, call `self.events.take().unwrap()` to extract the topic events, wrap in `TopicEventStream`, and attach via `ctx.add_stream()`. Remove the manual `run()` function. +- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr`, `EndpointId`, and `Option` where `E: TopicEvents + 'static` (the `'static` bound is required because `Actor: 'static`; taken in `started()`). `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, call `self.events.take().unwrap()` to extract the topic events, wrap in `TopicEventStream`, and attach via `ctx.add_stream()`. Remove the manual `run()` function. - [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle` (owned, not borrowed — actor owns it for its lifetime). Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure message via `self.topic.broadcast()` — best-effort (may fail silently if network is already shut down). Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. @@ -192,7 +194,7 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. - [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock). Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. Common patterns: mutations return `()` (fire-and-forget), queries return a cloned field or computed value. -- [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s. +- [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr`, `TopicHandle`, and the `event_tx` channel sender (needed to emit `ClientEvent`s to the UI). In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s on `event_tx`. - [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Also move `topics: Arc>>` into the state actor (or a dedicated topic actor) — this is another lock to eliminate. Keep `event_tx: futures_mpsc::UnboundedSender` as a plain channel — `ClientEvent`s flow to the UI layer which is not an actor. **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. @@ -235,7 +237,7 @@ The `ClientEvent` channel remains the boundary between the actor world and Lepto **Alternative considered:** have the state actor push full state snapshots into a signal directly, eliminating the `ClientEvent` → `process_event_batch` → signal path. Rejected: the snapshot approach would re-render everything on every state change. The event-based approach is more efficient (only flags `needs_msg_refresh`, `needs_peer_refresh`, etc.). -- [ ] **Step 1: Update `app.rs` initialization.** The `System` is created by the client library (Task 6), not the web crate. `app.rs` receives the `ClientHandle` which already owns actor addresses internally. No `System::new()` call needed here. Update the `spawn_local` event loop: the `while let Some(event) = client_event_rx.next().await` loop stays, but `process_event_batch` is now called with `.await`. +- [ ] **Step 1: Update `app.rs` initialization.** The `System` is created inside `ClientHandle::new()` (Task 6) and owned by the client. `app.rs` receives the `ClientHandle` which already owns actor addresses internally. No `System::new()` call needed in the web crate. Update the `spawn_local` event loop: the `while let Some(event) = client_event_rx.next().await` loop stays, but `process_event_batch` is now called with `.await`. - [ ] **Step 2: Make `process_event_batch` async.** Currently `fn`, becomes `async fn`. All `handle.peers()`, `handle.messages()`, `handle.channels()`, etc. calls gain `.await`. The function is called inside a `spawn_local` async block, so this is straightforward. Same for `refresh_all_signals`. From 590c2ebe3ae5ab9148e3f0fd1a297795a0b074dc Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:37:03 +0000 Subject: [PATCH 19/26] =?UTF-8?q?Add=20reactive=20derived=20state=20design?= =?UTF-8?q?=20for=20actor=E2=86=94Leptos=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec: new "Reactive Derived State" section with selector-based pattern inspired by Yewdux. State actor notifies subscribers after each mutation. DerivedStateActors run selectors, compare with cached values, and only update Leptos signals when the selected slice changes. Eliminates process_event_batch, ClientEvent-based state sync, and manual needs_*_refresh flags. Plan: Task 6 updated (state actor gains subscriber list, StateChanged notifications, ReadState ask pattern). Task 7 rewritten entirely — replaces event loop with derived_signal() helper, deletes event_processing.rs, rewrites state.rs as selector registrations. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 57 ++++++---- docs/specs/2026-03-29-actor-system-design.md | 109 ++++++++++++++++++- 2 files changed, 142 insertions(+), 24 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index e7ac7549..aa9e2b9b 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -190,13 +190,13 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi - Modify: `crates/client/src/files.rs` - Modify: `crates/client/src/worker_cache.rs` -Replace `Arc>` and `futures::channel::mpsc` with actors. +Replace `Arc>` and `futures::channel::mpsc` with actors. The state actor becomes the single source of truth, with a subscriber notification mechanism for derived state. -- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock). Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. Common patterns: mutations return `()` (fire-and-forget), queries return a cloned field or computed value. +- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock) plus a `Vec>` subscriber list. Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. After every mutation handler completes, the actor sends `StateChanged` to all subscribers. Also implement a `ReadState` message with a selector callback that returns a boxed value (for derived state queries). Also implement `Subscribe(Recipient)` to register new watchers. -- [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr`, `TopicHandle`, and the `event_tx` channel sender (needed to emit `ClientEvent`s to the UI). In `handle_stream_item`, sends mutations to the state actor and emits `ClientEvent`s on `event_tx`. +- [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor. No longer emits `ClientEvent`s — the state actor's subscriber notification replaces the event channel for state-derived signals. Ephemeral events (typing indicators, connection status changes) that aren't part of `SharedState` can still use a lightweight channel or a separate notification actor. -- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Also move `topics: Arc>>` into the state actor (or a dedicated topic actor) — this is another lock to eliminate. Keep `event_tx: futures_mpsc::UnboundedSender` as a plain channel — `ClientEvent`s flow to the UI layer which is not an actor. **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. +- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Also move `topics: Arc>>` into the state actor (or a dedicated topic actor). Remove `event_tx` for state synchronization — derived state actors replace it. Keep a small ephemeral event channel for non-state notifications (typing, connection). **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. - [ ] **Step 4: Update `listeners.rs`.** Replace `spawn_topic_listener()` with spawning a `TopicListenerActor` on the system. Remove the manual `topic_listener_loop`. @@ -206,46 +206,61 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. --- -## Task 7: Web UI Migration +## Task 7: Web UI Migration — Derived State Signals **Files:** - Modify: `crates/web/Cargo.toml` - Modify: `crates/web/src/app.rs` -- Modify: `crates/web/src/event_processing.rs` -- Modify: `crates/web/src/state.rs` (if signal source changes) -- Modify: `crates/web/src/components/*.rs` (any that call handle methods directly) +- Delete: `crates/web/src/event_processing.rs` +- Rewrite: `crates/web/src/state.rs` +- Modify: `crates/web/src/components/*.rs` (remove direct handle state reads) -Validates WASM target correctness. This task is larger than it appears because `ClientHandle` methods become async in Task 6, which cascades into the web crate. +Replaces the `ClientEvent` → `process_event_batch` → signal update pipeline with selector-based derived state actors. ### Actor ↔ Leptos Bridge -The current flow is: +**Current flow** (event-driven, pull-based): ``` TopicEvents → spawn_local loop → ClientEvent channel → process_event_batch() → WriteSignal::set() ↓ reads handle.peers(), handle.messages(), etc. (sync reads via Arc>) ``` -After the migration: +**New flow** (push-based, selector-driven): ``` -TopicEvents → TopicListenerActor → ClientEvent channel → process_event_batch() → WriteSignal::set() - ↓ reads via handle.ask().await - (async reads via state actor) +Network → TopicListenerActor → mutations → ClientStateActor + ↓ StateChanged notification + DerivedStateActors (one per signal) + ↓ selector(state) → compare → signal.set() if changed + Leptos reactive signals ``` -The `ClientEvent` channel remains the boundary between the actor world and Leptos signals. The key change is that `process_event_batch` and `refresh_all_signals` become async because `ClientHandle` state accessors (`peers()`, `messages()`, `channels()`, `server_list()`, `display_name()`, `roles_data()`, etc.) are now async. Since these are called inside a `spawn_local` async block, `.await` works — but every call site must be updated. +The `ClientEvent` channel and `process_event_batch` are eliminated for state-derived signals. Each Leptos signal is backed by a `DerivedStateActor` that watches a specific slice of `SharedState` via a selector function. -**Alternative considered:** have the state actor push full state snapshots into a signal directly, eliminating the `ClientEvent` → `process_event_batch` → signal path. Rejected: the snapshot approach would re-render everything on every state change. The event-based approach is more efficient (only flags `needs_msg_refresh`, `needs_peer_refresh`, etc.). +- [ ] **Step 1: Implement `DerivedStateActor`.** Generic actor parameterized by `T: PartialEq + Clone + Send + 'static`. Fields: `state_addr: Addr`, `selector: Box T + Send>`, `cached: Option`, `write: WriteSignalSender` (a callback or channel that sets the Leptos signal — must be `Send` on WASM via `SendWrapper`). Implements `Handler`: asks state actor for current derived value via `ReadState`, compares with cached, updates signal if different. Subscribes to state actor in `started()`. -- [ ] **Step 1: Update `app.rs` initialization.** The `System` is created inside `ClientHandle::new()` (Task 6) and owned by the client. `app.rs` receives the `ClientHandle` which already owns actor addresses internally. No `System::new()` call needed in the web crate. Update the `spawn_local` event loop: the `while let Some(event) = client_event_rx.next().await` loop stays, but `process_event_batch` is now called with `.await`. +- [ ] **Step 2: Implement `derived_signal` helper.** A function in the web crate (not in willow-actor — it depends on Leptos): `fn derived_signal(state_addr, system, selector) -> ReadSignal`. Creates a Leptos signal pair, spawns a `DerivedStateActor`, returns the read half. This is the primary API for connecting actor state to Leptos. -- [ ] **Step 2: Make `process_event_batch` async.** Currently `fn`, becomes `async fn`. All `handle.peers()`, `handle.messages()`, `handle.channels()`, etc. calls gain `.await`. The function is called inside a `spawn_local` async block, so this is straightforward. Same for `refresh_all_signals`. +- [ ] **Step 3: Rewrite `state.rs`.** Replace the current `create_signals()` function (which creates ~30 independent signals) with calls to `derived_signal()`. Each signal maps to a selector: + - `messages` → `|s| s.state.messages_for(&s.state.current_channel)` + - `channels` → `|s| s.state.channels.clone()` + - `peers` → `|s| s.state.chat.peers.clone()` + - `display_name` → `|s| s.state.display_name.clone()` + - etc. -- [ ] **Step 3: Update component direct handle calls.** Grep for `handle.` in `crates/web/src/components/` — any component that calls `ClientHandle` methods directly (not through signals) must switch to async. Components typically access the handle via `use_context::()`. If a component calls a now-async method in a sync event handler (e.g., button click), wrap it in `spawn_local`. + Signals that don't derive from `SharedState` (e.g., `show_settings`, `show_palette`, `current_tab`) remain as regular Leptos signals — they are pure UI state. -- [ ] **Step 4: Verify WASM compilation.** `just check-wasm` must pass. +- [ ] **Step 4: Update `app.rs`.** Remove the `spawn_local` event loop that drained `ClientEvent`s and called `process_event_batch`. Remove `refresh_all_signals`. The `ClientHandle` connection still happens in a `spawn_local` (network setup is async), but signal updates are now automatic via derived state actors. Handle ephemeral events (typing indicators, connection status) via a small separate channel or dedicated actors. -- [ ] **Step 5: Run `just test-browser`.** All 39+ browser tests must pass. +- [ ] **Step 5: Delete `event_processing.rs`.** The entire module is replaced by derived state actors. The `process_event_batch` function, `needs_*_refresh` flags, and event-to-signal mapping are all gone. + +- [ ] **Step 6: Update components.** Components that called `handle.peers()`, `handle.messages()`, etc. directly now read from their corresponding derived signal instead. Components that perform actions (send message, create channel) still call `handle.send_message()` etc., which sends a mutation message to the state actor. Grep for `handle.` in components and verify each call is either an action (keep) or a state read (replace with signal). + +- [ ] **Step 7: Handle ephemeral events.** Typing indicators, connection status changes, and voice signals are transient — they aren't part of `SharedState` and don't need derived actors. Options: (a) add them to `SharedState` and let selectors handle them, (b) keep a small `futures::channel::mpsc` for ephemeral events with a dedicated `spawn_local` consumer, (c) use dedicated actors with their own signals. Choose (a) if the events map cleanly to state fields; (b) for truly transient notifications. + +- [ ] **Step 8: Verify WASM compilation.** `just check-wasm` must pass. + +- [ ] **Step 9: Run `just test-browser`.** All 39+ browser tests must pass. --- diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index fa009837..bff1e45a 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -651,9 +651,108 @@ callers use `Addr` to send commands and `ask()` to query state. ### Phase 3: Web UI -The Leptos event loop becomes a `StreamHandler` on a UI actor. Signal -updates happen in the handler. Validates WASM target correctness and -completes the migration across all active crates. +Derived state actors bridge the actor system to Leptos signals. See +"Reactive Derived State" section below. + +## Reactive Derived State + +### Problem + +The UI needs to reactively update when shared state changes. Naive +approaches have drawbacks: + +- **Pull on every event** (`ask()` in `process_event_batch`): every + `ClientEvent` triggers async queries for all state slices, even if + most didn't change. Stale data possible between events. +- **Push full snapshots**: state actor sends complete state to the UI + on every mutation. Causes full re-renders, wasteful when only one + field changed. +- **`ClientEvent`-based flags** (current approach): `needs_msg_refresh`, + `needs_peer_refresh`, etc. — manually maintained, error-prone, and + tightly couples event types to signal updates. + +### Solution: Selector-Based Derived State + +Inspired by Yewdux's `use_selector` pattern. The state actor is the +single source of truth. Derived state actors subscribe to state changes, +run a selector function, and only update their signal when the selected +value actually changes. + +``` +Network → TopicListenerActor → mutations → ClientStateActor + ↓ notifies after each mutation + StateChanged + ↓ + ┌──────────────────┼──────────────────┐ + ▼ ▼ ▼ + DerivedActor DerivedActor DerivedActor + selector: peers() selector: msgs() selector: channels() + cached: [A, B] cached: [m1, m2] cached: [#gen, #dev] + │ │ │ + (if changed) (if changed) (if changed) + ▼ ▼ ▼ + signal.set([A,B,C]) (no update) signal.set(...) +``` + +### How it works + +1. **`ClientStateActor`** holds `SharedState` and a list of + `Recipient` subscribers. After every mutation handler + completes, it sends `StateChanged` to all subscribers. + +2. **`DerivedStateActor`** is a generic actor parameterized by: + - A selector: `Fn(&SharedState) -> T` (extracts a slice) + - A cached value: `Option` (last known value) + - A signal writer (Leptos `WriteSignal` or a callback) + +3. On receiving `StateChanged`, the derived actor sends a + `ReadState(selector)` ask to the state actor, which runs the + selector against current state and returns `T`. + +4. The derived actor compares the new `T` with its cached value. If + different (via `PartialEq`), it updates the signal and caches the + new value. If equal, it does nothing — no re-render. + +### API sketch (web crate, not in willow-actor) + +```rust +/// Create a derived Leptos signal backed by a state actor selector. +/// Returns a ReadSignal that updates only when the selected value changes. +fn derived_signal( + state_addr: &Addr, + system: &SystemHandle, + selector: impl Fn(&SharedState) -> T + Send + 'static, +) -> ReadSignal { + let (read, write) = create_signal(selector(&initial_state)); + system.spawn(DerivedStateActor { + state_addr: state_addr.clone(), + selector, + cached: None, + write, + }); + read +} +``` + +### Benefits + +- **No stale data**: signals always reflect the latest state +- **Minimal re-renders**: only updates when the selected slice changes +- **No manual event mapping**: no `needs_msg_refresh` flags, no + `process_event_batch` function matching events to signal updates +- **Decoupled**: adding a new signal is one `derived_signal()` call, + no changes to event processing +- **Eliminates `ClientEvent` for state sync**: `ClientEvent` becomes + purely for ephemeral notifications (typing indicators, connection + status) that aren't part of `SharedState` + +### Notification cost + +Each mutation triggers N `StateChanged` messages (one per subscriber) +plus N `ReadState` ask round-trips. With ~15-20 derived signals and +in-process message passing (no I/O, same thread on WASM), this is +sub-millisecond total. The PartialEq check prevents signal updates +from propagating further. ## Decisions @@ -669,3 +768,7 @@ completes the migration across all active crates. state via `ask()`. This eliminates all locks from the hot path — the actor processes messages sequentially, so no synchronization is needed inside the actor. + +4. **Derived state for UI signals.** Leptos signals are updated via + selector-based derived state actors, not event batch processing. + Signals only re-render when their selected value actually changes. From 38dc7e802b41159d18e587355455f7e7cfd233a0 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:39:42 +0000 Subject: [PATCH 20/26] Unify ephemeral and persistent state under same actor pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typing indicators, connection status, voice participants are all just state owned by an actor — same selector/notify/signal pattern applies. No special event channels or separate handling needed. ClientEvent enum is eliminated entirely, not just for state sync. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 14 +++++----- docs/specs/2026-03-29-actor-system-design.md | 28 +++++++++++++++++--- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index aa9e2b9b..eea2397a 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -194,9 +194,9 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. The - [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock) plus a `Vec>` subscriber list. Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. After every mutation handler completes, the actor sends `StateChanged` to all subscribers. Also implement a `ReadState` message with a selector callback that returns a boxed value (for derived state queries). Also implement `Subscribe(Recipient)` to register new watchers. -- [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor. No longer emits `ClientEvent`s — the state actor's subscriber notification replaces the event channel for state-derived signals. Ephemeral events (typing indicators, connection status changes) that aren't part of `SharedState` can still use a lightweight channel or a separate notification actor. +- [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor. All state flows through the actor — typing indicators, connection status, and voice state are fields in `SharedState` mutated via messages, not separate event channels. -- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Also move `topics: Arc>>` into the state actor (or a dedicated topic actor). Remove `event_tx` for state synchronization — derived state actors replace it. Keep a small ephemeral event channel for non-state notifications (typing, connection). **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. +- [ ] **Step 3: Refactor `ClientHandle`.** Replace `shared: Arc>` with `state: Addr`. Also move `topics: Arc>>` into the state actor (or a dedicated topic actor). Remove `event_tx` entirely — derived state actors replace the event channel for all UI updates (including ephemeral state like typing and connection status). **Note:** this makes previously-synchronous state accessors async (`shared.read()` → `state.ask().await`). All callers in `lib.rs`, `ops.rs`, `invite.rs`, `files.rs`, and `worker_cache.rs` must be updated to `.await` state queries. Methods that were `fn` become `async fn`. - [ ] **Step 4: Update `listeners.rs`.** Replace `spawn_topic_listener()` with spawning a `TopicListenerActor` on the system. Remove the manual `topic_listener_loop`. @@ -250,17 +250,15 @@ The `ClientEvent` channel and `process_event_batch` are eliminated for state-der Signals that don't derive from `SharedState` (e.g., `show_settings`, `show_palette`, `current_tab`) remain as regular Leptos signals — they are pure UI state. -- [ ] **Step 4: Update `app.rs`.** Remove the `spawn_local` event loop that drained `ClientEvent`s and called `process_event_batch`. Remove `refresh_all_signals`. The `ClientHandle` connection still happens in a `spawn_local` (network setup is async), but signal updates are now automatic via derived state actors. Handle ephemeral events (typing indicators, connection status) via a small separate channel or dedicated actors. +- [ ] **Step 4: Update `app.rs`.** Remove the `spawn_local` event loop that drained `ClientEvent`s and called `process_event_batch`. Remove `refresh_all_signals`. The `ClientHandle` connection still happens in a `spawn_local` (network setup is async), but signal updates are now automatic via derived state actors. All state — including typing indicators, connection status, and voice participants — flows through the state actor and derived signals. -- [ ] **Step 5: Delete `event_processing.rs`.** The entire module is replaced by derived state actors. The `process_event_batch` function, `needs_*_refresh` flags, and event-to-signal mapping are all gone. +- [ ] **Step 5: Delete `event_processing.rs`.** The entire module is replaced by derived state actors. The `process_event_batch` function, `needs_*_refresh` flags, `ClientEvent` enum, and event-to-signal mapping are all gone. - [ ] **Step 6: Update components.** Components that called `handle.peers()`, `handle.messages()`, etc. directly now read from their corresponding derived signal instead. Components that perform actions (send message, create channel) still call `handle.send_message()` etc., which sends a mutation message to the state actor. Grep for `handle.` in components and verify each call is either an action (keep) or a state read (replace with signal). -- [ ] **Step 7: Handle ephemeral events.** Typing indicators, connection status changes, and voice signals are transient — they aren't part of `SharedState` and don't need derived actors. Options: (a) add them to `SharedState` and let selectors handle them, (b) keep a small `futures::channel::mpsc` for ephemeral events with a dedicated `spawn_local` consumer, (c) use dedicated actors with their own signals. Choose (a) if the events map cleanly to state fields; (b) for truly transient notifications. +- [ ] **Step 7: Verify WASM compilation.** `just check-wasm` must pass. -- [ ] **Step 8: Verify WASM compilation.** `just check-wasm` must pass. - -- [ ] **Step 9: Run `just test-browser`.** All 39+ browser tests must pass. +- [ ] **Step 8: Run `just test-browser`.** All 39+ browser tests must pass. --- diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index bff1e45a..3389b9ad 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -734,6 +734,25 @@ fn derived_signal( } ``` +### Ephemeral state is the same pattern + +Typing indicators, connection status, voice participants — these are +all just state owned by an actor. There's no reason to treat them +differently. Each gets its own actor (or they live as fields in a +single actor) with the same selector/notify pattern: + +- **`ConnectionActor`**: holds connected peers, relay status. Selectors: + `|s| s.peers.len()`, `|s| s.connection_status.clone()` +- **`TypingActor`**: holds typing peer map with expiry. Selectors: + `|s| s.typing_in("general")` +- **`VoiceActor`**: holds voice participants, mute/deafen state. + Selectors: `|s| s.participants.clone()` + +Or these can all be fields in `ClientStateActor` — the derived state +pattern works the same regardless. The key insight is that **all UI +state flows through actors with selector-based notification**. No +separate event channels, no special cases. + ### Benefits - **No stale data**: signals always reflect the latest state @@ -742,9 +761,12 @@ fn derived_signal( `process_event_batch` function matching events to signal updates - **Decoupled**: adding a new signal is one `derived_signal()` call, no changes to event processing -- **Eliminates `ClientEvent` for state sync**: `ClientEvent` becomes - purely for ephemeral notifications (typing indicators, connection - status) that aren't part of `SharedState` +- **Uniform**: persistent state, ephemeral state, and UI state all + use the same actor → selector → signal pattern. No special channels + or event types for different categories of state. +- **Eliminates `ClientEvent` entirely**: all state flows through actors. + The `ClientEvent` enum, the event channel, and `process_event_batch` + are all removed. ### Notification cost From 8e75c6310c7fed14dd232ab01371dce62365b7af Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:43:48 +0000 Subject: [PATCH 21/26] Sixth review pass: derived state mechanics - File map: fix stale descriptions (event_processing deleted, not async) - ReadState message: explicit type-erasure via Box, note this is the one place Any is needed, derived actor downcasts - ClientEvent deletion: add explicit step in Task 6, grep workspace - derived_signal: use T::default() for initial value, DerivedStateActor seeds the signal in started() via immediate ask - Task 6/7 contract: call out Subscribe/StateChanged/ReadState as the stable API boundary between client and web crates - Renumber Task 6 steps after insertion https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 22 +++++++++++++------- docs/specs/2026-03-29-actor-system-design.md | 9 +++++--- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index eea2397a..62f04f1b 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -47,11 +47,13 @@ crates/client/Cargo.toml — Add willow-actor dependency crates/client/src/lib.rs — Replace Arc> with state actor crates/client/src/listeners.rs — Replace spawn_topic_listener with StreamHandler actor crates/client/src/state.rs — State accessor methods become ask() calls +crates/client/src/events.rs — Delete ClientEvent enum (replaced by derived state) crates/web/Cargo.toml — Add willow-actor dependency -crates/web/src/app.rs — Update event loop for async handle methods -crates/web/src/event_processing.rs — process_event_batch becomes async -crates/web/src/components/*.rs — Wrap sync handle calls in spawn_local +crates/web/src/app.rs — Remove event loop, wire derived signals +crates/web/src/event_processing.rs — DELETE (replaced by derived state actors) +crates/web/src/state.rs — Rewrite: derived_signal() calls replace create_signals() +crates/web/src/components/*.rs — Replace handle state reads with derived signals Cargo.toml — Add actor to workspace members ``` @@ -192,7 +194,11 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi Replace `Arc>` and `futures::channel::mpsc` with actors. The state actor becomes the single source of truth, with a subscriber notification mechanism for derived state. -- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock) plus a `Vec>` subscriber list. Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. After every mutation handler completes, the actor sends `StateChanged` to all subscribers. Also implement a `ReadState` message with a selector callback that returns a boxed value (for derived state queries). Also implement `Subscribe(Recipient)` to register new watchers. +- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock) plus a `Vec>` subscriber list. Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. After every mutation handler completes, the actor sends `StateChanged` to all subscribers. Also implement: + - `Subscribe(Recipient)` — register a new watcher + - `ReadState` — type-erased selector query: carries `Box Box + Send>` and replies with `Box`. The derived actor downcasts the response to its concrete `T`. This is the one place `Any` is required — the alternative (a typed message per selector) would need a separate message type for every derived signal. + + **Contract with Task 7:** The `Subscribe`, `StateChanged`, and `ReadState` messages form the interface that `DerivedStateActor` (defined in the web crate) depends on. Design them as a stable API. - [ ] **Step 2: Define `TopicListenerActor`.** Replaces `spawn_topic_listener`. Implements `StreamHandler` for gossip events. Holds `Addr` and `TopicHandle`. In `handle_stream_item`, sends mutations to the state actor. All state flows through the actor — typing indicators, connection status, and voice state are fields in `SharedState` mutated via messages, not separate event channels. @@ -200,9 +206,11 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. The - [ ] **Step 4: Update `listeners.rs`.** Replace `spawn_topic_listener()` with spawning a `TopicListenerActor` on the system. Remove the manual `topic_listener_loop`. -- [ ] **Step 5: Update client tests.** Tests using `test_client()` helper need updating — shared state access changes from lock-based to ask-based. Verify all `just test-client` tests pass. +- [ ] **Step 5: Delete `ClientEvent` enum.** Remove `crates/client/src/events.rs` (or gut it). The `ClientEvent` enum, the `event_tx` channel, and all event emission are replaced by state actor mutations + subscriber notifications. Grep for `ClientEvent` across the workspace to find and remove all references. + +- [ ] **Step 6: Update client tests.** Tests using `test_client()` helper need updating — shared state access changes from lock-based to ask-based. Tests that asserted on `ClientEvent` emissions must switch to asserting on state actor state via `ask()`. Verify all `just test-client` tests pass. -- [ ] **Step 6: Run `just test-client` and `just clippy`.** All 93+ client tests must pass with zero warnings. +- [ ] **Step 7: Run `just test-client` and `just clippy`.** All 93+ client tests must pass with zero warnings. --- @@ -237,7 +245,7 @@ Network → TopicListenerActor → mutations → ClientStateActor The `ClientEvent` channel and `process_event_batch` are eliminated for state-derived signals. Each Leptos signal is backed by a `DerivedStateActor` that watches a specific slice of `SharedState` via a selector function. -- [ ] **Step 1: Implement `DerivedStateActor`.** Generic actor parameterized by `T: PartialEq + Clone + Send + 'static`. Fields: `state_addr: Addr`, `selector: Box T + Send>`, `cached: Option`, `write: WriteSignalSender` (a callback or channel that sets the Leptos signal — must be `Send` on WASM via `SendWrapper`). Implements `Handler`: asks state actor for current derived value via `ReadState`, compares with cached, updates signal if different. Subscribes to state actor in `started()`. +- [ ] **Step 1: Implement `DerivedStateActor`.** Generic actor parameterized by `T: PartialEq + Clone + Default + Send + 'static`. Fields: `state_addr: Addr`, `selector: Box T + Send>`, `cached: Option`, `write: WriteSignalSender` (a callback or channel that sets the Leptos signal — must be `Send` on WASM via `SendWrapper`). Implements `Handler`: asks state actor via `ReadState` (passes a closure that calls the selector, returns `Box`), downcasts the `Box` response to `T`, compares with cached, updates signal if different. In `started()`: subscribes to state actor via `Subscribe`, then immediately asks for the current value to seed the signal (so it doesn't show `T::default()` for long). - [ ] **Step 2: Implement `derived_signal` helper.** A function in the web crate (not in willow-actor — it depends on Leptos): `fn derived_signal(state_addr, system, selector) -> ReadSignal`. Creates a Leptos signal pair, spawns a `DerivedStateActor`, returns the read half. This is the primary API for connecting actor state to Leptos. diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index 3389b9ad..63cb0202 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -718,18 +718,21 @@ Network → TopicListenerActor → mutations → ClientStateActor ```rust /// Create a derived Leptos signal backed by a state actor selector. /// Returns a ReadSignal that updates only when the selected value changes. -fn derived_signal( +fn derived_signal( state_addr: &Addr, system: &SystemHandle, - selector: impl Fn(&SharedState) -> T + Send + 'static, + selector: impl Fn(&SharedState) -> T + Send + Clone + 'static, ) -> ReadSignal { - let (read, write) = create_signal(selector(&initial_state)); + let (read, write) = create_signal(T::default()); system.spawn(DerivedStateActor { state_addr: state_addr.clone(), selector, cached: None, write, }); + // The DerivedStateActor's started() hook immediately asks the state + // actor for the current value, seeding the signal. Until that first + // ask completes, the signal holds T::default(). read } ``` From c24e957f53d6a8cad48d64ed2fa8664dfb9c60a2 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:45:44 +0000 Subject: [PATCH 22/26] Seventh review pass on implementation plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ReadState: FnOnce per message, derived actor holds Fn and wraps fresh each time — clarified the FnOnce vs Fn distinction - DerivedStateActor: explicit SendWrapper> for WASM - Selectors: current_channel must move into SharedState so the messages selector can read it; channel switching becomes a state mutation - Components: action calls (send_message, etc.) need spawn_local wrappers in sync event handlers since client methods are now async - Notification batching: noted as optimization opportunity, not needed initially since PartialEq prevents redundant signal updates https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index 62f04f1b..d20449c6 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -196,7 +196,9 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. The - [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock) plus a `Vec>` subscriber list. Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. After every mutation handler completes, the actor sends `StateChanged` to all subscribers. Also implement: - `Subscribe(Recipient)` — register a new watcher - - `ReadState` — type-erased selector query: carries `Box Box + Send>` and replies with `Box`. The derived actor downcasts the response to its concrete `T`. This is the one place `Any` is required — the alternative (a typed message per selector) would need a separate message type for every derived signal. + - `ReadState` — type-erased selector query: carries `Box Box + Send>` and replies with `Box`. Note: the closure is `FnOnce` because each `ReadState` message is constructed fresh per notification — the derived actor holds its selector as `Fn` and wraps it in a new `FnOnce` closure for each ask. The derived actor downcasts the `Box` response to its concrete `T`. This is the one place `Any` is required. + + **Notification batching:** Each mutation handler sends `StateChanged` individually. If a sync batch applies 10 events, that's 10 notifications × N subscribers. This is correct but chatty. Optimization (deferred): coalesce notifications by processing all pending mailbox messages before notifying, so a burst of mutations triggers only one `StateChanged` round. Not needed initially — the PartialEq check in derived actors prevents redundant signal updates regardless. **Contract with Task 7:** The `Subscribe`, `StateChanged`, and `ReadState` messages form the interface that `DerivedStateActor` (defined in the web crate) depends on. Design them as a stable API. @@ -245,24 +247,24 @@ Network → TopicListenerActor → mutations → ClientStateActor The `ClientEvent` channel and `process_event_batch` are eliminated for state-derived signals. Each Leptos signal is backed by a `DerivedStateActor` that watches a specific slice of `SharedState` via a selector function. -- [ ] **Step 1: Implement `DerivedStateActor`.** Generic actor parameterized by `T: PartialEq + Clone + Default + Send + 'static`. Fields: `state_addr: Addr`, `selector: Box T + Send>`, `cached: Option`, `write: WriteSignalSender` (a callback or channel that sets the Leptos signal — must be `Send` on WASM via `SendWrapper`). Implements `Handler`: asks state actor via `ReadState` (passes a closure that calls the selector, returns `Box`), downcasts the `Box` response to `T`, compares with cached, updates signal if different. In `started()`: subscribes to state actor via `Subscribe`, then immediately asks for the current value to seed the signal (so it doesn't show `T::default()` for long). +- [ ] **Step 1: Implement `DerivedStateActor`.** Generic actor parameterized by `T: PartialEq + Clone + Default + Send + 'static`. Fields: `state_addr: Addr`, `selector: Box T + Send>`, `cached: Option`, `write: SendWrapper>` (Leptos's `WriteSignal` is `!Send` — `SendWrapper` makes it `Send`, safe on single-threaded WASM). Implements `Handler`: constructs a `ReadState` message by wrapping `&self.selector` in a fresh `FnOnce` closure that returns `Box`, asks state actor, downcasts response to `T`, compares with cached, calls `self.write.set(new_value)` if different. In `started()`: subscribes to state actor via `Subscribe`, then immediately asks for the current value to seed the signal. - [ ] **Step 2: Implement `derived_signal` helper.** A function in the web crate (not in willow-actor — it depends on Leptos): `fn derived_signal(state_addr, system, selector) -> ReadSignal`. Creates a Leptos signal pair, spawns a `DerivedStateActor`, returns the read half. This is the primary API for connecting actor state to Leptos. - [ ] **Step 3: Rewrite `state.rs`.** Replace the current `create_signals()` function (which creates ~30 independent signals) with calls to `derived_signal()`. Each signal maps to a selector: - - `messages` → `|s| s.state.messages_for(&s.state.current_channel)` - `channels` → `|s| s.state.channels.clone()` - `peers` → `|s| s.state.chat.peers.clone()` - `display_name` → `|s| s.state.display_name.clone()` + - `messages` → `|s| s.state.messages_for(&s.state.current_channel)` — this requires `current_channel` to be in `SharedState`, not a standalone Leptos signal. **Move `current_channel` into `SharedState`** so the selector can read it. Channel switching becomes a state actor mutation (`SetCurrentChannel`), and the messages selector naturally picks up the new channel. - etc. - Signals that don't derive from `SharedState` (e.g., `show_settings`, `show_palette`, `current_tab`) remain as regular Leptos signals — they are pure UI state. + Signals that are purely local UI state (e.g., `show_settings`, `show_palette`, `current_tab`) remain as regular Leptos signals — no actor needed. The rule: if other selectors depend on it, it belongs in `SharedState`; if nothing else reads it, it stays as a local signal. - [ ] **Step 4: Update `app.rs`.** Remove the `spawn_local` event loop that drained `ClientEvent`s and called `process_event_batch`. Remove `refresh_all_signals`. The `ClientHandle` connection still happens in a `spawn_local` (network setup is async), but signal updates are now automatic via derived state actors. All state — including typing indicators, connection status, and voice participants — flows through the state actor and derived signals. - [ ] **Step 5: Delete `event_processing.rs`.** The entire module is replaced by derived state actors. The `process_event_batch` function, `needs_*_refresh` flags, `ClientEvent` enum, and event-to-signal mapping are all gone. -- [ ] **Step 6: Update components.** Components that called `handle.peers()`, `handle.messages()`, etc. directly now read from their corresponding derived signal instead. Components that perform actions (send message, create channel) still call `handle.send_message()` etc., which sends a mutation message to the state actor. Grep for `handle.` in components and verify each call is either an action (keep) or a state read (replace with signal). +- [ ] **Step 6: Update components.** Components that called `handle.peers()`, `handle.messages()`, etc. directly now read from their corresponding derived signal instead. Components that perform actions (send message, create channel) still call `handle.send_message()` etc. — these are now async (Task 6), so sync event handlers (button clicks, key presses) must wrap action calls in `spawn_local`. Grep for `handle.` in components and classify each call: state read → replace with signal, action → wrap in `spawn_local` if in a sync context. - [ ] **Step 7: Verify WASM compilation.** `just check-wasm` must pass. From a055b2a9990935147279659a7af59fa7714dd7e4 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:48:52 +0000 Subject: [PATCH 23/26] Add notification batching via Actor::idle() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mailbox loop drains all immediately-available messages via try_recv() before calling idle(). ClientStateActor sets a dirty flag in mutation handlers, then notifies subscribers in idle() — a burst of N mutations triggers a single StateChanged round instead of N. Spec: added idle() to Actor trait, updated notification cost section. Plan: added idle() to Task 2 trait definition, rewrote mailbox loop as numbered steps showing drain-then-idle flow, updated Task 6 batching description. https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 18 +++++++++++++++--- docs/specs/2026-03-29-actor-system-design.md | 20 +++++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index d20449c6..85c879b7 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -92,7 +92,10 @@ Defines the trait hierarchy and the type-erased message dispatch mechanism. - [ ] **Step 1: Define `Message` trait.** `Send + 'static` with `type Result: Send + 'static`. -- [ ] **Step 2: Define `Actor` trait.** `Send + 'static + Sized` with `started()` and `stopped()` lifecycle hooks using RPITIT (not async_trait). Both have default no-op impls. +- [ ] **Step 2: Define `Actor` trait.** `Send + 'static + Sized` with lifecycle hooks using RPITIT (not async_trait), all with default no-op impls: + - `started(&mut self, ctx: &mut Context)` — called once before processing messages + - `stopped(&mut self)` — called when the actor is shutting down + - `idle(&mut self, ctx: &mut Context)` — called after the mailbox drains all immediately-available messages (i.e., when `try_recv()` returns empty). Used for batched notification patterns. - [ ] **Step 3: Define `Handler` trait.** `fn handle(&mut self, msg: M, ctx: &mut Context) -> impl Future + Send`. Supertrait is `Actor`. @@ -100,7 +103,16 @@ Defines the trait hierarchy and the type-erased message dispatch mechanism. - [ ] **Step 5: Implement `envelope.rs`.** `BoxEnvelope` type alias: `Box) -> BoxFuture<'_, ()> + Send>`. Two factory functions: `envelope_send(msg) -> BoxEnvelope` (fire-and-forget) and `envelope_ask(msg, reply_tx) -> BoxEnvelope` (captures oneshot sender). Both wrap the handler call in a closure. -- [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(mut actor: A, ctx: Context, rx: Receiver>, stop: Arc, done: OneshotTx<()>)`: actor is moved in and mutated via `&mut` for its lifetime. The `Context` and `done` oneshot are provided by the caller (System in Task 3, test harness here). Calls `actor.started(&mut ctx)`, loops on `rx.recv()`, executes each envelope passing `&mut actor` and `&mut ctx`, checks stop flag between messages, calls `actor.stopped()` on exit, then signals `done`. For Task 2 tests, construct a minimal `Context` with a dummy `SystemHandle`. +- [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(mut actor: A, ctx: Context, rx: Receiver>, stop: Arc, done: OneshotTx<()>)`: actor is moved in and mutated via `&mut` for its lifetime. The `Context` and `done` oneshot are provided by the caller (System in Task 3, test harness here). Loop structure: + 1. Call `actor.started(&mut ctx)` + 2. `recv().await` — blocks until at least one message arrives + 3. Execute the envelope + 4. `try_recv()` in a loop — drain all immediately-available messages without yielding, executing each envelope + 5. Call `actor.idle(&mut ctx)` — queue is now empty + 6. Check stop flag — if set, exit loop + 7. Go to step 2 + + On exit (channel closed or stop flag), call `actor.stopped()`, then signal `done`. For Task 2 tests, construct a minimal `Context` with a dummy `SystemHandle`. - [ ] **Step 7: Write mailbox-level tests.** Test `run_mailbox` directly by creating a channel, sending `BoxEnvelope`s manually, and verifying the actor processes them. Test that the mailbox loop exits when the sender is dropped. Full `Addr`/`System`-level tests come in Task 3. @@ -198,7 +210,7 @@ Replace `Arc>` and `futures::channel::mpsc` with actors. The - `Subscribe(Recipient)` — register a new watcher - `ReadState` — type-erased selector query: carries `Box Box + Send>` and replies with `Box`. Note: the closure is `FnOnce` because each `ReadState` message is constructed fresh per notification — the derived actor holds its selector as `Fn` and wraps it in a new `FnOnce` closure for each ask. The derived actor downcasts the `Box` response to its concrete `T`. This is the one place `Any` is required. - **Notification batching:** Each mutation handler sends `StateChanged` individually. If a sync batch applies 10 events, that's 10 notifications × N subscribers. This is correct but chatty. Optimization (deferred): coalesce notifications by processing all pending mailbox messages before notifying, so a burst of mutations triggers only one `StateChanged` round. Not needed initially — the PartialEq check in derived actors prevents redundant signal updates regardless. + **Notification batching:** Mutations don't notify subscribers individually. Instead, the state actor uses a `dirty: bool` flag. Each mutation handler sets `dirty = true` but does not send `StateChanged` directly. The mailbox loop is modified to drain: after processing one envelope via `recv().await`, it calls `try_recv()` in a loop to process all immediately-available messages without yielding. Only after the queue is drained does it call a new `Actor::idle()` hook. The `ClientStateActor` implements `idle()`: if `dirty`, send `StateChanged` to all subscribers and reset the flag. This means a burst of N mutations from a sync batch processes all N, then sends a single `StateChanged` round. The `idle()` hook is added to the `Actor` trait in Task 2 with a default no-op. **Contract with Task 7:** The `Subscribe`, `StateChanged`, and `ReadState` messages form the interface that `DerivedStateActor` (defined in the web crate) depends on. Design them as a stable API. diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index 63cb0202..161ee718 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -250,6 +250,14 @@ pub trait Actor: Send + 'static + Sized { /// Called when the actor is stopping (mailbox closed or explicit stop). fn stopped(&mut self) -> impl Future + Send { async {} } + + /// Called after the mailbox drains all immediately-available messages. + /// The mailbox processes one message via recv().await, then drains + /// remaining messages via try_recv(), then calls idle(). Use this + /// for batched notifications — e.g., set a dirty flag in mutation + /// handlers, then notify subscribers in idle(). + fn idle(&mut self, ctx: &mut Context) + -> impl Future + Send { async {} } } ``` @@ -773,11 +781,13 @@ separate event channels, no special cases. ### Notification cost -Each mutation triggers N `StateChanged` messages (one per subscriber) -plus N `ReadState` ask round-trips. With ~15-20 derived signals and -in-process message passing (no I/O, same thread on WASM), this is -sub-millisecond total. The PartialEq check prevents signal updates -from propagating further. +Notifications are batched via the `Actor::idle()` hook. A burst of +mutations (e.g., a sync batch applying 10 events) is processed in one +drain cycle, then triggers a single `StateChanged` round. Each round +sends N messages to subscribers (one per derived signal) plus N +`ReadState` ask round-trips. With ~15-20 derived signals and in-process +message passing (no I/O, same thread on WASM), this is sub-millisecond. +The PartialEq check prevents signal updates from propagating further. ## Decisions From 24728c68c5a89f86347090d1d3a3bc1b1e9c5752 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:55:32 +0000 Subject: [PATCH 24/26] Eighth review pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - runtime Receiver: add try_recv() to API (needed for mailbox drain loop) - DerivedStateActor selector: Box → Arc (must be cloneable into each ReadState FnOnce closure which is 'static) - Spec API sketch: selector takes Send + Sync, stored as Arc, write wrapped in SendWrapper - Integration tests: add idle() batching test (send N messages, verify idle called once not N times) https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 6 +++--- docs/specs/2026-03-29-actor-system-design.md | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index 85c879b7..456b7aec 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -73,7 +73,7 @@ The platform abstraction is the foundation everything else builds on. It must co - [ ] **Step 1: Create crate skeleton.** `Cargo.toml` with workspace edition/version, dependencies split by target: `tokio` (native), `wasm-bindgen-futures` + `futures-channel` + `gloo-timers` (WASM). Shared deps: `futures-core`, `thiserror`, `tracing`. Add `actor` to workspace `Cargo.toml` members. -- [ ] **Step 2: Implement `runtime.rs`.** Four functions: `spawn()` (tokio::spawn vs spawn_local), `unbounded_channel()` (tokio mpsc vs futures mpsc), `oneshot()` (tokio vs futures), `sleep()` (tokio vs gloo-timers). Define `Sender` / `Receiver` / `OneshotTx` / `OneshotRx` wrapper types that unify the two backends behind a common API. The `Receiver` must implement `async fn recv() -> Option`. +- [ ] **Step 2: Implement `runtime.rs`.** Four functions: `spawn()` (tokio::spawn vs spawn_local), `unbounded_channel()` (tokio mpsc vs futures mpsc), `oneshot()` (tokio vs futures), `sleep()` (tokio vs gloo-timers). Define `Sender` / `Receiver` / `OneshotTx` / `OneshotRx` wrapper types that unify the two backends behind a common API. The `Receiver` must implement both `async fn recv() -> Option` (blocks until a message arrives) and `fn try_recv() -> Option` (non-blocking, returns `None` if empty). `try_recv` is required for the mailbox drain loop (Task 2 Step 6). Both `tokio::sync::mpsc::UnboundedReceiver` and `futures::channel::mpsc::UnboundedReceiver` support this. - [ ] **Step 3: Implement `error.rs`.** `SendError` with `Closed(M)` variant. `AskError` with `Closed` and `NoResponse` variants. Both derive `Debug`, `thiserror::Error`. @@ -140,7 +140,7 @@ Wires everything together into the public API. - [ ] **Step 6: Wire up `lib.rs` re-exports.** Public API: `Actor`, `Handler`, `StreamHandler`, `Message`, `Addr`, `AnyAddr`, `Recipient`, `Context`, `System`, `SystemHandle`, `SendError`, `AskError`, `IntervalHandle`. (`RestartPolicy` added in Task 4.) -- [ ] **Step 7: Write integration tests.** Multi-actor test: spawn two actors, actor A sends to actor B, B replies. Test `StreamHandler` with a `futures::stream::iter`. Test `run_interval` fires expected number of times. Test shutdown stops all actors. +- [ ] **Step 7: Write integration tests.** Multi-actor test: spawn two actors, actor A sends to actor B, B replies. Test `StreamHandler` with a `futures::stream::iter`. Test `run_interval` fires expected number of times. Test shutdown stops all actors. Test `idle()` batching: send N messages at once, verify `idle()` is called once (not N times) by checking a counter in the actor. - [ ] **Step 8: Verify WASM compilation.** `cargo check -p willow-actor --target wasm32-unknown-unknown`. Add to `just check-wasm`. @@ -259,7 +259,7 @@ Network → TopicListenerActor → mutations → ClientStateActor The `ClientEvent` channel and `process_event_batch` are eliminated for state-derived signals. Each Leptos signal is backed by a `DerivedStateActor` that watches a specific slice of `SharedState` via a selector function. -- [ ] **Step 1: Implement `DerivedStateActor`.** Generic actor parameterized by `T: PartialEq + Clone + Default + Send + 'static`. Fields: `state_addr: Addr`, `selector: Box T + Send>`, `cached: Option`, `write: SendWrapper>` (Leptos's `WriteSignal` is `!Send` — `SendWrapper` makes it `Send`, safe on single-threaded WASM). Implements `Handler`: constructs a `ReadState` message by wrapping `&self.selector` in a fresh `FnOnce` closure that returns `Box`, asks state actor, downcasts response to `T`, compares with cached, calls `self.write.set(new_value)` if different. In `started()`: subscribes to state actor via `Subscribe`, then immediately asks for the current value to seed the signal. +- [ ] **Step 1: Implement `DerivedStateActor`.** Generic actor parameterized by `T: PartialEq + Clone + Default + Send + 'static`. Fields: `state_addr: Addr`, `selector: Arc T + Send + Sync>` (must be `Arc` so it can be cloned into each `ReadState` closure — a `Box` borrow would not be `'static`), `cached: Option`, `write: SendWrapper>` (Leptos's `WriteSignal` is `!Send` — `SendWrapper` makes it `Send`, safe on single-threaded WASM). Implements `Handler`: clones the `Arc` selector, constructs a `ReadState { Box::new(move |state| Box::new(selector(state))) }`, asks state actor, downcasts `Box` response to `T`, compares with cached, calls `self.write.set(new_value)` if different. In `started()`: subscribes to state actor via `Subscribe`, then immediately asks for the current value to seed the signal. - [ ] **Step 2: Implement `derived_signal` helper.** A function in the web crate (not in willow-actor — it depends on Leptos): `fn derived_signal(state_addr, system, selector) -> ReadSignal`. Creates a Leptos signal pair, spawns a `DerivedStateActor`, returns the read half. This is the primary API for connecting actor state to Leptos. diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index 161ee718..7d50a078 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -709,9 +709,10 @@ Network → TopicListenerActor → mutations → ClientStateActor completes, it sends `StateChanged` to all subscribers. 2. **`DerivedStateActor`** is a generic actor parameterized by: - - A selector: `Fn(&SharedState) -> T` (extracts a slice) + - A selector: `Arc T + Send + Sync>` + (Arc because it's cloned into each `ReadState` closure) - A cached value: `Option` (last known value) - - A signal writer (Leptos `WriteSignal` or a callback) + - A signal writer (Leptos `WriteSignal` via `SendWrapper`) 3. On receiving `StateChanged`, the derived actor sends a `ReadState(selector)` ask to the state actor, which runs the @@ -729,14 +730,14 @@ Network → TopicListenerActor → mutations → ClientStateActor fn derived_signal( state_addr: &Addr, system: &SystemHandle, - selector: impl Fn(&SharedState) -> T + Send + Clone + 'static, + selector: impl Fn(&SharedState) -> T + Send + Sync + 'static, ) -> ReadSignal { let (read, write) = create_signal(T::default()); system.spawn(DerivedStateActor { state_addr: state_addr.clone(), - selector, + selector: Arc::new(selector), cached: None, - write, + write: SendWrapper::new(write), }); // The DerivedStateActor's started() hook immediately asks the state // actor for the current value, seeding the signal. Until that first From 1aeec84cf69f02c80289148ca932a5ba27049660 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 17:58:30 +0000 Subject: [PATCH 25/26] Add implementation convenience notes and Arc cloning guidance Plan: - Message trait: note potential derive macro for boilerplate reduction - envelope.rs: explain monomorphization and type-erasure boundary - Worker StateActor: clarify it doesn't need idle/dirty/subscribers - ClientStateActor: add mutate() helper to avoid repeating dirty=true - TopicEventStream adapter: suggest placing in willow-network to share between worker and client TopicListenerActors - Cloning cost: wrap expensive SharedState fields in Arc, selectors clone the Arc pointer instead of deep-cloning. PartialEq on Arc compares pointers first. Apply selectively based on profiling. Spec: - New "Cloning cost" section in the derived state design https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/plans/2026-03-30-actor-system.md | 12 +++++++----- docs/specs/2026-03-29-actor-system-design.md | 10 ++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/docs/plans/2026-03-30-actor-system.md b/docs/plans/2026-03-30-actor-system.md index 456b7aec..21c41bb8 100644 --- a/docs/plans/2026-03-30-actor-system.md +++ b/docs/plans/2026-03-30-actor-system.md @@ -90,7 +90,7 @@ The platform abstraction is the foundation everything else builds on. It must co Defines the trait hierarchy and the type-erased message dispatch mechanism. -- [ ] **Step 1: Define `Message` trait.** `Send + 'static` with `type Result: Send + 'static`. +- [ ] **Step 1: Define `Message` trait.** `Send + 'static` with `type Result: Send + 'static`. **Convenience:** consider a `#[derive(Message)]` proc macro or a declarative macro `message!(MyMsg => ResponseType)` to reduce boilerplate. Not required for initial implementation — can be added later if the manual impls feel verbose. - [ ] **Step 2: Define `Actor` trait.** `Send + 'static + Sized` with lifecycle hooks using RPITIT (not async_trait), all with default no-op impls: - `started(&mut self, ctx: &mut Context)` — called once before processing messages @@ -101,7 +101,7 @@ Defines the trait hierarchy and the type-erased message dispatch mechanism. - [ ] **Step 4: Define `StreamHandler` trait.** `handle_stream_item()` and `stream_finished()` with RPITIT. Supertrait is `Actor`. -- [ ] **Step 5: Implement `envelope.rs`.** `BoxEnvelope` type alias: `Box) -> BoxFuture<'_, ()> + Send>`. Two factory functions: `envelope_send(msg) -> BoxEnvelope` (fire-and-forget) and `envelope_ask(msg, reply_tx) -> BoxEnvelope` (captures oneshot sender). Both wrap the handler call in a closure. +- [ ] **Step 5: Implement `envelope.rs`.** `BoxEnvelope` type alias: `Box) -> BoxFuture<'_, ()> + Send>`. Two factory functions: `envelope_send(msg) -> BoxEnvelope` (fire-and-forget, drops the handler's return value) and `envelope_ask(msg, reply_tx) -> BoxEnvelope` (captures oneshot sender, sends the return value back). Both wrap the handler call in a closure. **Implementation note:** the generic bounds on these functions are `A: Handler, M: Message` — the monomorphization happens at the call site (`Addr::send`/`Addr::ask`), and the resulting closure is type-erased into the `BoxEnvelope`. This is where per-message-type dispatch is compiled in. - [ ] **Step 6: Implement `mailbox.rs`.** `async fn run_mailbox(mut actor: A, ctx: Context, rx: Receiver>, stop: Arc, done: OneshotTx<()>)`: actor is moved in and mutated via `&mut` for its lifetime. The `Context` and `done` oneshot are provided by the caller (System in Task 3, test harness here). Loop structure: 1. Call `actor.started(&mut ctx)` @@ -175,9 +175,9 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi - [ ] **Step 1: Define message types in `actors/mod.rs`.** Replace `StateMsg` enum with individual message structs: `EventMsg(Event)`, `RequestMsg { req, reply: ... }` → becomes ask-pattern `WorkerRequestMsg(WorkerRequest)` with `type Result = WorkerResponse`, `GetRoleInfoMsg` with `type Result = WorkerRoleInfo`, `GetStateHashesMsg` with `type Result = Vec<(String, StateHash)>`, `ServerDiscoveredMsg { server_id }`. Remove `NetworkOutMsg` (network actor no longer needs a channel — it holds TopicHandle directly). Remove `StateMsg::Shutdown` (handled by mailbox close). -- [ ] **Step 2: Rewrite `state.rs` as `StateActor`.** Struct holds `Box`. Implement `Actor` (no lifecycle hooks needed). Implement `Handler`, `Handler`, `Handler`, `Handler`, `Handler`. Each handler is 1-3 lines — delegates to `self.role`. Remove the manual `run()` function and its `while let` loop. +- [ ] **Step 2: Rewrite `state.rs` as `StateActor`.** Struct holds `Box`. Implement `Actor` (no lifecycle hooks needed). Implement `Handler`, `Handler`, `Handler`, `Handler`, `Handler`. Each handler is 1-3 lines — delegates to `self.role`. Remove the manual `run()` function and its `while let` loop. **Implementation note:** the worker `StateActor` does not need `idle()` or subscriber notifications — it's a simple request-reply actor with no UI signals. The `idle()`/dirty/subscriber pattern is only for `ClientStateActor` in Task 6. -- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr`, `EndpointId`, and `Option` where `E: TopicEvents + 'static` (the `'static` bound is required because `Actor: 'static`; taken in `started()`). `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, call `self.events.take().unwrap()` to extract the topic events, wrap in `TopicEventStream`, and attach via `ctx.add_stream()`. Remove the manual `run()` function. +- [ ] **Step 3: Rewrite `network.rs` as `NetworkActor`.** Struct holds `Addr`, `EndpointId`, and `Option` where `E: TopicEvents + 'static` (the `'static` bound is required because `Actor: 'static`; taken in `started()`). `TopicEvents` is not a `Stream` trait — it has an async `next()` method. Write a thin adapter (`TopicEventStream`) that wraps a `TopicEvents` impl into a `futures::Stream` (filtering errors with a warning log). Implement `StreamHandler` — the `handle_stream_item` replaces the `while let` loop. Keep `parse_worker_message()` and `parse_server_message()` as pure functions. In `started()`, call `self.events.take().unwrap()` to extract the topic events, wrap in `TopicEventStream`, and attach via `ctx.add_stream()`. Remove the manual `run()` function. **Implementation note:** `TopicEventStream` can implement `Stream` via `poll_fn` or `async_stream` — wrap the `next().await` call in a `Pin>`. This adapter is also needed by `TopicListenerActor` in Task 6, so consider placing it in `willow-network` (behind the existing `test-utils` feature or a new `stream` feature) to avoid duplication. - [ ] **Step 4: Rewrite `heartbeat.rs` as `HeartbeatActor`.** Struct holds `EndpointId`, `Addr`, and the `TopicHandle` (owned, not borrowed — actor owns it for its lifetime). Define `HeartbeatTick` message. Implement `Handler` — queries state actor via `state_addr.ask(GetRoleInfoMsg)`, broadcasts announcement. In `started()`, call `ctx.run_interval(duration, || HeartbeatTick)`. Implement `stopped()` to broadcast departure message via `self.topic.broadcast()` — best-effort (may fail silently if network is already shut down). Remove `shutdown: watch::Receiver` — the actor stops when its address is dropped. @@ -206,7 +206,7 @@ Migrate the four hand-rolled worker actors to use `willow-actor`. This is the fi Replace `Arc>` and `futures::channel::mpsc` with actors. The state actor becomes the single source of truth, with a subscriber notification mechanism for derived state. -- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock) plus a `Vec>` subscriber list. Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. After every mutation handler completes, the actor sends `StateChanged` to all subscribers. Also implement: +- [ ] **Step 1: Define `ClientStateActor`.** Holds `SharedState` directly (no Arc, no RwLock) plus a `Vec>` subscriber list and a `dirty: bool` flag. Audit all `shared.write()` and `shared.read()` call sites in the client crate to discover the full message set — expect ~10-15 mutation messages and ~5-10 query messages. Define message structs for each. **Convenience:** to avoid repeating `self.dirty = true` in every mutation handler, add a helper method `fn mutate(&mut self, f: impl FnOnce(&mut SharedState))` that applies the closure and sets `dirty = true`. Mutation handlers become one-liners: `self.mutate(|s| s.peers.push(peer))`. Also implement: - `Subscribe(Recipient)` — register a new watcher - `ReadState` — type-erased selector query: carries `Box Box + Send>` and replies with `Box`. Note: the closure is `FnOnce` because each `ReadState` message is constructed fresh per notification — the derived actor holds its selector as `Fn` and wraps it in a new `FnOnce` closure for each ask. The derived actor downcasts the `Box` response to its concrete `T`. This is the one place `Any` is required. @@ -272,6 +272,8 @@ The `ClientEvent` channel and `process_event_batch` are eliminated for state-der Signals that are purely local UI state (e.g., `show_settings`, `show_palette`, `current_tab`) remain as regular Leptos signals — no actor needed. The rule: if other selectors depend on it, it belongs in `SharedState`; if nothing else reads it, it stays as a local signal. + **Cloning cost:** Selectors return owned `T`, which means cloning state out of the actor on every `ReadState`. For small types (`String`, `bool`, `Vec`) this is trivial. For expensive types (e.g., `Vec` with hundreds of messages), wrap the field in `Arc` inside `SharedState` — then the selector clones the `Arc` (cheap pointer bump) instead of deep-cloning the data. The `PartialEq` check on `Arc>` compares pointers first (fast path: same Arc = no change), falling back to element comparison only when the Arc was replaced. Apply `Arc` wrapping selectively to fields where profiling shows cloning is a bottleneck — don't pre-optimize every field. + - [ ] **Step 4: Update `app.rs`.** Remove the `spawn_local` event loop that drained `ClientEvent`s and called `process_event_batch`. Remove `refresh_all_signals`. The `ClientHandle` connection still happens in a `spawn_local` (network setup is async), but signal updates are now automatic via derived state actors. All state — including typing indicators, connection status, and voice participants — flows through the state actor and derived signals. - [ ] **Step 5: Delete `event_processing.rs`.** The entire module is replaced by derived state actors. The `process_event_batch` function, `needs_*_refresh` flags, `ClientEvent` enum, and event-to-signal mapping are all gone. diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index 7d50a078..51dc3444 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -790,6 +790,16 @@ sends N messages to subscribers (one per derived signal) plus N message passing (no I/O, same thread on WASM), this is sub-millisecond. The PartialEq check prevents signal updates from propagating further. +### Cloning cost + +Selectors return owned `T`, so each `ReadState` clones data out of the +actor. For expensive fields (e.g., `Vec` with hundreds +of entries), wrap the field in `Arc` inside `SharedState`. The selector +then clones the `Arc` (cheap pointer bump) instead of deep-cloning. +`PartialEq` on `Arc>` compares pointers first — if the `Arc` +wasn't replaced, the check is O(1). Apply `Arc` wrapping selectively +to fields where cloning is measurably expensive. + ## Decisions 1. **Mailboxes are unbounded.** `send()` returns `Err` only if the From 749e8b04565563f3a86bbb3b34c8907ee3341a3f Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 30 Mar 2026 18:00:38 +0000 Subject: [PATCH 26/26] Final review pass: fix stale per-mutation notification descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Spec diagram: "notifies after each mutation" → "idle() notifies after drain" - Spec "How it works" step 1: rewritten to describe dirty flag + idle() batching instead of per-mutation notification - Recipient: Box> → Box so it can be stored in Send actors (e.g., ClientStateActor subscribers) https://claude.ai/code/session_01Wp9UVgRg5b8wDrhDuHBY6Z --- docs/specs/2026-03-29-actor-system-design.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/specs/2026-03-29-actor-system-design.md b/docs/specs/2026-03-29-actor-system-design.md index 51dc3444..a1a16caa 100644 --- a/docs/specs/2026-03-29-actor-system-design.md +++ b/docs/specs/2026-03-29-actor-system-design.md @@ -362,7 +362,7 @@ abstract over the concrete actor: /// Type-erased handle that can send a specific message type. /// Useful for pub-sub patterns where the sender doesn't know the actor type. pub struct Recipient { - tx: Box>, // internal trait, not public + tx: Box + Send>, // internal trait, not public } impl Recipient { @@ -688,7 +688,7 @@ value actually changes. ``` Network → TopicListenerActor → mutations → ClientStateActor - ↓ notifies after each mutation + ↓ idle() notifies after drain StateChanged ↓ ┌──────────────────┼──────────────────┐ @@ -704,9 +704,12 @@ Network → TopicListenerActor → mutations → ClientStateActor ### How it works -1. **`ClientStateActor`** holds `SharedState` and a list of - `Recipient` subscribers. After every mutation handler - completes, it sends `StateChanged` to all subscribers. +1. **`ClientStateActor`** holds `SharedState`, a list of + `Recipient` subscribers, and a `dirty` flag. Mutation + handlers set `dirty = true`. The `idle()` hook (called after the + mailbox drains all pending messages) checks the flag, sends + `StateChanged` to all subscribers if dirty, and resets. This + batches a burst of mutations into a single notification round. 2. **`DerivedStateActor`** is a generic actor parameterized by: - A selector: `Arc T + Send + Sync>`