From 9aab644b31ead90d7d9d16ab8ac5870d7f86bab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=99=93=E9=9B=B7?= Date: Sat, 28 Feb 2026 00:39:58 +0800 Subject: [PATCH] =?UTF-8?q?#=20Pull=20Request:=20=E6=99=BA=E8=83=BD=20CPU?= =?UTF-8?q?=20=E8=B4=9F=E8=BD=BD=E9=98=88=E5=80=BC=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E8=AE=A1=E7=AE=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## 问题描述 **当前问题**: - `EVOLVE_LOAD_MAX` 默认值为固定 `2.0` - 对单核机器过于激进(阈值 2.0 远超单核承受能力) - 对多核机器过于保守(4 核机器阈值 2.0 过低,正常负载 2-3 也会触发退避) - 导致 evolver 在正常系统负载下频繁退避,无法正常工作 **实际案例**: - 用户环境:4 核 CPU,系统负载 2.07-3.15(正常范围) - 旧阈值:2.0 - 结果:evolver 持续退避,32 小时未运行 ## 解决方案 **智能默认值**: - 单核机器:`0.9`(经验法则 0.8-1.0) - 多核机器:`核心数 × 0.9`(经验法则 0.8-1.0) - 仍支持 `EVOLVE_LOAD_MAX` 环境变量覆盖 **示例**: | CPU 核心数 | 旧阈值 | 新阈值 | 说明 | |-----------|--------|--------|------| | 1 核 | 2.0 | 0.9 | 降低 55%,更安全 | | 2 核 | 2.0 | 1.8 | 降低 10% | | 4 核 | 2.0 | 3.6 | 提高 80%,更合理 | | 8 核 | 2.0 | 7.2 | 提高 260%,更合理 | | 16 核 | 2.0 | 14.4 | 提高 620%,更合理 | ## 代码改动 ### 1. 新增函数 `getDefaultLoadMax()` ```javascript // Calculate intelligent default load threshold based on CPU cores // Rule of thumb (感谢晓雷的建议): // - Single-core: 0.8-1.0 (use 0.9) // - Multi-core: cores × 0.8-1.0 (use 0.9) // - Production: reserve 20% headroom for burst traffic function getDefaultLoadMax() { const cpuCount = os.cpus().length; if (cpuCount === 1) { // Single-core machine: use conservative threshold return 0.9; } else { // Multi-core machine: cores × 0.9 // Examples: 4 cores → 3.6, 8 cores → 7.2, 16 cores → 14.4 return cpuCount * 0.9; } } ``` ### 2. 修改负载检查逻辑 ```javascript // 旧代码 const LOAD_MAX = parseFloat(process.env.EVOLVE_LOAD_MAX || '2.0'); // 新代码 const LOAD_MAX = parseFloat(process.env.EVOLVE_LOAD_MAX || String(getDefaultLoadMax())); // 改进日志输出 if (sysLoad.load1m > LOAD_MAX) { console.log(`[Evolver] System load ${sysLoad.load1m.toFixed(2)} exceeds max ${LOAD_MAX.toFixed(1)} (auto-calculated for ${os.cpus().length} cores). Backing off ${QUEUE_BACKOFF_MS}ms.`); // ... } ``` ## 测试结果 **环境**:macOS,4 核 CPU ```bash # 测试智能默认值 CPU 核心数: 4 智能默认阈值: 3.6 当前系统负载: 2.91 # 测试环境变量覆盖 EVOLVE_LOAD_MAX=5.0 实际阈值: 5.0 ✅ 环境变量优先级正确 # 测试无环境变量 EVOLVE_LOAD_MAX: (未设置) 实际阈值: 3.6 ✅ 智能默认值生效 ``` ## 经验法则来源 **感谢**:王晓雷(晓雷)的建议 **经验法则**: - 单核机器:阈值建议 0.8~1.0 - 多核机器:阈值建议 核心数 × 0.8~1.0 - 生产环境:预留 20% 余量应对突发流量 **实现选择**:使用 0.9 作为中间值,平衡安全性和性能 ## 向后兼容性 - ✅ 完全向后兼容 - ✅ 环境变量 `EVOLVE_LOAD_MAX` 仍可覆盖 - ✅ 默认行为从"固定值"改为"智能值" - ✅ 无破坏性改动 ## 相关问题 - 解决了多核机器上 evolver 频繁退避的问题 - 解决了单核机器上阈值过高导致系统过载的问题 - 提升了 evolver 在不同硬件环境下的适应性 ## 检查清单 - [x] 代码已测试 - [x] 向后兼容 - [x] 添加了注释说明 - [x] 改进了日志输出(显示核心数) - [x] 遵循项目编码规范 - [x] 提交信息清晰 --- **提交者**:OpenClaw Agent(大龙虾 🦞) **日期**:2026-02-27 --- package.json | 4 +++- src/evolve.js | 23 +++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 2c24c3ce..4e5360b3 100644 --- a/package.json +++ b/package.json @@ -22,5 +22,7 @@ "a2a:ingest": "node scripts/a2a_ingest.js", "a2a:promote": "node scripts/a2a_promote.js" }, - "dependencies": {} + "dependencies": { + "dotenv": "^16.4.7" + } } diff --git a/src/evolve.js b/src/evolve.js index 40bb92e4..06da87c8 100644 --- a/src/evolve.js +++ b/src/evolve.js @@ -608,6 +608,24 @@ function getSystemLoad() { } } +// Calculate intelligent default load threshold based on CPU cores +// Rule of thumb (感谢晓雷的建议): +// - Single-core: 0.8-1.0 (use 0.9) +// - Multi-core: cores × 0.8-1.0 (use 0.9) +// - Production: reserve 20% headroom for burst traffic +function getDefaultLoadMax() { + const cpuCount = os.cpus().length; + + if (cpuCount === 1) { + // Single-core machine: use conservative threshold + return 0.9; + } else { + // Multi-core machine: cores × 0.9 + // Examples: 4 cores → 3.6, 8 cores → 7.2, 16 cores → 14.4 + return cpuCount * 0.9; + } +} + // Check how many agent sessions are actively being processed (modified in the last N minutes). // If the agent is busy with user conversations, evolver should back off. function getRecentActiveSessionCount(windowMs) { @@ -665,14 +683,15 @@ async function run() { // When system load is too high (e.g. too many concurrent processes, heavy I/O), // back off to prevent the evolver from contributing to load spikes. // Echo-MingXuan's Cycle #55 saw load spike from 0.02-0.50 to 1.30 before crash. - const LOAD_MAX = parseFloat(process.env.EVOLVE_LOAD_MAX || '2.0'); + const LOAD_MAX = parseFloat(process.env.EVOLVE_LOAD_MAX || String(getDefaultLoadMax())); const sysLoad = getSystemLoad(); if (sysLoad.load1m > LOAD_MAX) { - console.log(`[Evolver] System load ${sysLoad.load1m.toFixed(2)} exceeds max ${LOAD_MAX}. Backing off ${QUEUE_BACKOFF_MS}ms.`); + console.log(`[Evolver] System load ${sysLoad.load1m.toFixed(2)} exceeds max ${LOAD_MAX.toFixed(1)} (auto-calculated for ${os.cpus().length} cores). Backing off ${QUEUE_BACKOFF_MS}ms.`); writeDormantHypothesis({ backoff_reason: 'system_load_exceeded', system_load: { load1m: sysLoad.load1m, load5m: sysLoad.load5m, load15m: sysLoad.load15m }, load_max: LOAD_MAX, + cpu_cores: os.cpus().length, }); await sleepMs(QUEUE_BACKOFF_MS); return;