Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions src/auth.js
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,41 @@ export function acquireAccountByKey(apiKey, modelKey = null) {
};
}

/**
* Explain why a pinned account cannot be used right now. Used by strict
* Cascade reuse mode, where switching accounts would lose server-side
* conversation context.
*/
export function getAccountAvailability(apiKey, modelKey = null) {
const now = Date.now();
const a = accounts.find(x => x.apiKey === apiKey);
if (!a) return { available: false, reason: 'missing', retryAfterMs: 60_000 };
if (a.status !== 'active') return { available: false, reason: `status:${a.status}`, retryAfterMs: 60_000 };

if (a.rateLimitedUntil && a.rateLimitedUntil > now) {
return { available: false, reason: 'rate_limited', retryAfterMs: Math.max(1000, a.rateLimitedUntil - now) };
}
if (modelKey && a._modelRateLimits) {
const until = a._modelRateLimits[modelKey];
if (until && until > now) {
return { available: false, reason: 'model_rate_limited', retryAfterMs: Math.max(1000, until - now) };
}
if (until && until <= now) delete a._modelRateLimits[modelKey];
}

const limit = rpmLimitFor(a);
if (limit <= 0) return { available: false, reason: 'tier_expired', retryAfterMs: 60_000 };
const used = pruneRpmHistory(a, now);
if (used >= limit) {
const oldest = a._rpmHistory?.[0] || now;
return { available: false, reason: 'rpm_full', retryAfterMs: Math.max(1000, oldest + RPM_WINDOW_MS - now) };
}
if (modelKey && !isModelAllowedForAccount(a, modelKey)) {
return { available: false, reason: 'model_not_available', retryAfterMs: 60_000 };
}
return { available: true, reason: 'available', retryAfterMs: 0 };
}

/**
* Snapshot of per-account RPM usage, for dashboard display.
*/
Expand Down
21 changes: 17 additions & 4 deletions src/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ function contentToString(content) {
return content == null ? '' : JSON.stringify(content);
}

function positiveIntEnv(name, fallback) {
const n = parseInt(process.env[name] || '', 10);
return Number.isFinite(n) && n > 0 ? n : fallback;
}

function cascadeHistoryBudget(modelUid) {
const normal = positiveIntEnv('CASCADE_MAX_HISTORY_BYTES', 200_000);
if (/\b1m\b|[-_]1m$/i.test(String(modelUid || ''))) {
return positiveIntEnv('CASCADE_1M_HISTORY_BYTES', 900_000);
}
return normal;
}

// ─── WindsurfClient ────────────────────────────────────────

export class WindsurfClient {
Expand Down Expand Up @@ -233,14 +246,14 @@ export class WindsurfClient {
images = extracted.images;
if (!isResume && sysText) text = sysText + '\n\n' + text;
} else {
const MAX_HISTORY_BYTES = 200_000;
const maxHistoryBytes = cascadeHistoryBudget(modelUid);
const lines = [];
let historyBytes = 0;
for (let i = convo.length - 2; i >= 0; i--) {
const m = convo[i];
const tag = m.role === 'user' ? 'human' : 'assistant';
const line = `<${tag}>\n${contentToString(m.content)}\n</${tag}>`;
if (historyBytes + line.length > MAX_HISTORY_BYTES && lines.length > 0) {
if (historyBytes + line.length > maxHistoryBytes && lines.length > 0) {
log.info(`Cascade: trimmed history at turn ${i}/${convo.length} (${Math.round(historyBytes/1024)}KB kept, ${convo.length - 2 - i} turns dropped)`);
break;
}
Expand Down Expand Up @@ -270,14 +283,14 @@ export class WindsurfClient {
// Cascade expired — fall back to fresh with FULL history.
// text was built as resume-only (last message). Rebuild it.
if (isResume && convo.length > 1) {
const MAX_HISTORY_BYTES = 200_000;
const maxHistoryBytes = cascadeHistoryBudget(modelUid);
const lines = [];
let historyBytes = 0;
for (let i = convo.length - 2; i >= 0; i--) {
const m = convo[i];
const tag = m.role === 'user' ? 'human' : 'assistant';
const line = `<${tag}>\n${contentToString(m.content)}\n</${tag}>`;
if (historyBytes + line.length > MAX_HISTORY_BYTES && lines.length > 0) break;
if (historyBytes + line.length > maxHistoryBytes && lines.length > 0) break;
lines.unshift(line);
historyBytes += line.length;
}
Expand Down
55 changes: 37 additions & 18 deletions src/conversation-pool.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,30 @@
* Windsurf backend keep its own per-cascade context cached — we avoid
* resending the full history on each turn and the server responds faster.
*
* The key is a "fingerprint" of the conversation up to (but not including)
* the newest user message. A client sending [u1, a1, u2] looks up fp([u1, a1]);
* a hit means we already drove the cascade to exactly that state. We then
* `SendUserCascadeMessage(u2)` on the stored cascade_id and, on success,
* re-store the entry under fp([u1, a1, u2, a2]) for the next turn.
* The key is a "fingerprint" of the stable caller-visible trajectory up to
* (but not including) the newest user/tool result turn. A client sending
* [u1, a1, u2] looks up fp([u1]); a hit means we already drove the cascade to
* exactly that state. We then `SendUserCascadeMessage(u2)` on the stored
* cascade_id and, on success, re-store the entry under fp([u1, u2]) for the
* next turn.
*
* Safety rails:
* - Entries are pinned to a specific (apiKey, lsPort) pair. We must reuse
* the same LS and the same account or the cascade_id is meaningless.
* - A checked-out entry is removed from the pool. Concurrent second request
* with the same fingerprint falls back to a fresh cascade.
* - TTL 30 min; LRU eviction at 500 entries.
* - TTL defaults to 30 min (override with CASCADE_POOL_TTL_MS); LRU eviction
* at 500 entries.
*/

import { createHash } from 'crypto';

const POOL_TTL_MS = 30 * 60 * 1000;
function positiveIntEnv(name, fallback) {
const n = parseInt(process.env[name] || '', 10);
return Number.isFinite(n) && n > 0 ? n : fallback;
}

const POOL_TTL_MS = positiveIntEnv('CASCADE_POOL_TTL_MS', 30 * 60 * 1000);
const POOL_MAX = 500;

// fingerprint -> { cascadeId, sessionId, lsPort, apiKey, createdAt, lastAccess }
Expand Down Expand Up @@ -51,30 +58,42 @@ function canonicalise(messages) {
}

/**
* Fingerprint for "resume this conversation". Hash only USER messages
* (excluding the latest one we're about to send). User messages have stable
* format across client round-trips; assistant messages don't — the client
* may restructure content arrays, add tool_use blocks, or modify text,
* causing hash mismatches and 0% hit rate. (#24)
* Fingerprint for "resume this conversation". Hash only stable caller-visible
* turns: normal user messages and tool results. Assistant messages are
* excluded because clients may restructure content arrays, add tool_use
* blocks, or modify text between turns, causing hash mismatches and 0% hit
* rate. Claude Code's system prompt also changes frequently as local project
* state changes, so it is excluded by default; set
* CASCADE_REUSE_HASH_SYSTEM=1 if strict system-prompt isolation matters more
* than reuse hit rate for a deployment.
*/
function systemPrefix(messages) {
if (process.env.CASCADE_REUSE_HASH_SYSTEM !== '1') return '';
return messages
.filter(m => m.role === 'system')
.map(m => typeof m.content === 'string' ? m.content : JSON.stringify(m.content ?? ''))
.join('\0');
}

function stableTurns(messages) {
return messages
.filter(m => m.role === 'user' || m.role === 'tool')
.map(m => m.role === 'tool'
? { ...m, role: 'tool_result' }
: m);
}

export function fingerprintBefore(messages, modelKey = '') {
if (!Array.isArray(messages) || messages.length < 2) return null;
const users = messages.filter(m => m.role === 'user');
if (users.length < 2) return null;
return sha256(modelKey + '\0' + systemPrefix(messages) + '\0' + JSON.stringify(canonicalise(users.slice(0, -1))));
const turns = stableTurns(messages);
if (turns.length < 2) return null;
return sha256(modelKey + '\0' + systemPrefix(messages) + '\0' + JSON.stringify(canonicalise(turns.slice(0, -1))));
}

export function fingerprintAfter(messages, modelKey = '') {
const users = messages.filter(m => m.role === 'user');
if (!users.length) return null;
return sha256(modelKey + '\0' + systemPrefix(messages) + '\0' + JSON.stringify(canonicalise(users)));
const turns = stableTurns(messages);
if (!turns.length) return null;
return sha256(modelKey + '\0' + systemPrefix(messages) + '\0' + JSON.stringify(canonicalise(turns)));
}

function prune(now) {
Expand Down
Loading