From d1c163f20cd0de7b023097d09a4d8d60cc899905 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 1 Apr 2026 03:50:07 +0000 Subject: [PATCH] Refactor scanners from actor to struct for true concurrency Co-authored-by: acebytes <2820910+acebytes@users.noreply.github.com> --- .jules/bolt.md | 3 +++ Sources/Cacheout/Scanner/CacheScanner.swift | 10 ++++++---- Sources/Cacheout/Scanner/NodeModulesScanner.swift | 7 ++++--- 3 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..2705ee8 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-18 - Fix actor serialization to enable true parallel scanning +**Learning:** In Swift structured concurrency, using an `actor` to manage a `withTaskGroup` where tasks invoke synchronous, blocking I/O (like `FileManager` operations) directly on the actor inadvertently serializes the tasks, preventing parallelism. +**Action:** For stateless components interacting with thread-safe dependencies (like `FileManager.default`), use `struct`s or `nonisolated` methods to allow tasks to execute concurrently across threads and unlock true parallel scanning. diff --git a/Sources/Cacheout/Scanner/CacheScanner.swift b/Sources/Cacheout/Scanner/CacheScanner.swift index 3ce3e9c..702befd 100644 --- a/Sources/Cacheout/Scanner/CacheScanner.swift +++ b/Sources/Cacheout/Scanner/CacheScanner.swift @@ -1,13 +1,15 @@ /// # CacheScanner — Parallel Cache Category Scanner /// -/// An `actor` that scans all registered cache categories concurrently using +/// A `struct` that scans all registered cache categories concurrently using /// Swift's structured concurrency (`TaskGroup`). Each category is scanned in /// its own child task for maximum parallelism. /// /// ## Thread Safety /// -/// Uses the `actor` isolation model to ensure thread-safe access to internal state. -/// All public methods are `async` and can be called from any concurrency context. +/// Uses a stateless `struct` instead of an `actor` to prevent unintentional +/// serialization of child tasks when invoking `scanCategory`. Because all +/// methods operate on local variables and thread-safe dependencies (`FileManager.default`), +/// concurrent execution is safe and truly parallel. /// /// ## Disk Size Calculation /// @@ -26,7 +28,7 @@ import Foundation -actor CacheScanner { +struct CacheScanner { private let fileManager = FileManager.default func scanAll(_ categories: [CacheCategory]) async -> [ScanResult] { diff --git a/Sources/Cacheout/Scanner/NodeModulesScanner.swift b/Sources/Cacheout/Scanner/NodeModulesScanner.swift index 3ed4d8c..bec959b 100644 --- a/Sources/Cacheout/Scanner/NodeModulesScanner.swift +++ b/Sources/Cacheout/Scanner/NodeModulesScanner.swift @@ -1,6 +1,6 @@ /// # NodeModulesScanner — Recursive node_modules Finder /// -/// An `actor` that recursively searches common developer project directories +/// A `struct` that recursively searches common developer project directories /// for `node_modules` folders. Designed to find abandoned or stale dependencies /// that consume significant disk space. /// @@ -21,14 +21,15 @@ /// /// ## Performance /// -/// - Parallel scanning of root directories via `TaskGroup` +/// - Parallel scanning of root directories via `TaskGroup` (using a stateless `struct` +/// avoids actor serialization, unlocking true concurrent filesystem traversal) /// - Early termination when `node_modules` found (no deeper recursion) /// - Skip list eliminates most irrelevant directories /// - `maxDepth` cap prevents excessive filesystem traversal import Foundation -actor NodeModulesScanner { +struct NodeModulesScanner { private let fileManager = FileManager.default /// Common directories where developers keep projects