Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions cloudflare-gastown/src/dos/Town.do.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2464,25 +2464,33 @@ export class TownDO extends DurableObject<Env> {
if (pendingCount === 0) return;

// Check if a triage batch bead is already in progress (meaning a
// triage agent is working). We can't filter by role since triage
// uses polecat role; instead check for an open gt:triage batch bead.
// triage agent is working), or recently failed (cooldown to prevent
// rapid retry loops). Skip dispatch in either case.
const triageBatchLike = patrol.TRIAGE_LABEL_LIKE.replace(
patrol.TRIAGE_REQUEST_LABEL,
patrol.TRIAGE_BATCH_LABEL
);
const cooldownCutoff = new Date(Date.now() - DISPATCH_COOLDOWN_MS).toISOString();
const existingBatch = [
...query(
this.sql,
/* sql */ `
SELECT ${beads.bead_id} FROM ${beads}
WHERE ${beads.type} = 'issue'
AND ${beads.labels} LIKE ?
AND ${beads.status} IN ('open', 'in_progress')
AND ${beads.created_by} = 'patrol'
AND (
${beads.status} IN ('open', 'in_progress')
OR (${beads.status} = 'failed' AND ${beads.updated_at} > ?)
)
LIMIT 1
`,
[patrol.TRIAGE_LABEL_LIKE.replace(patrol.TRIAGE_REQUEST_LABEL, patrol.TRIAGE_BATCH_LABEL)]
[triageBatchLike, cooldownCutoff]
),
];
if (existingBatch.length > 0) {
console.log(
`${TOWN_LOG} maybeDispatchTriageAgent: triage agent already working, skipping (${pendingCount} pending)`
`${TOWN_LOG} maybeDispatchTriageAgent: triage batch bead active or in cooldown, skipping (${pendingCount} pending)`
);
return;
}
Expand Down Expand Up @@ -2555,19 +2563,10 @@ export class TownDO extends DurableObject<Env> {
agents.updateAgentStatus(this.sql, triageAgent.id, 'working');
} else {
agents.unhookBead(this.sql, triageAgent.id);
// Failing the batch bead triggers cooldown: the guard at the top of
// this method skips dispatch while a failed batch bead's updated_at
// is within DISPATCH_COOLDOWN_MS.
beadOps.updateBeadStatus(this.sql, triageBead.bead_id, 'failed', triageAgent.id);
// Apply dispatch cooldown so the next alarm tick doesn't immediately
// retry. Setting last_activity_at = now() makes the agent invisible
// to schedulePendingWork for DISPATCH_COOLDOWN_MS (2 min).
query(
this.sql,
/* sql */ `
UPDATE ${agent_metadata}
SET ${agent_metadata.columns.last_activity_at} = ?
WHERE ${agent_metadata.bead_id} = ?
`,
[now(), triageAgent.id]
);
Comment thread
jrf0110 marked this conversation as resolved.
console.error(`${TOWN_LOG} maybeDispatchTriageAgent: triage agent failed to start`);
}
}
Expand Down
60 changes: 37 additions & 23 deletions cloudflare-gastown/src/dos/town/patrol.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,26 +107,31 @@ export function createTriageRequest(
if (existing.length > 0) return;
}

// Global cap: skip if there are already too many open triage requests.
// Prevents unbounded accumulation during feedback loops.
const openCountRows = [
...query(
sql,
/* sql */ `
SELECT COUNT(*) AS cnt FROM ${beads}
WHERE ${beads.type} = 'issue'
AND ${beads.labels} LIKE ?
AND ${beads.status} = 'open'
`,
[TRIAGE_LABEL_LIKE]
),
];
const openCount = Number(z.object({ cnt: z.number() }).parse(openCountRows[0]).cnt);
if (openCount >= MAX_OPEN_TRIAGE_REQUESTS) {
console.warn(
`${LOG} createTriageRequest: global cap reached (${openCount} open), skipping type=${params.triageType}`
);
return;
// Global cap: skip if there are already too many open *automatic* triage
// requests (patrol-generated). Escalations are exempt from both the gate
// and the count — they are agent/user initiated and silently dropping
// them would leave the escalation bead with no automated follow-up.
if (params.triageType !== 'escalation') {
Comment thread
jrf0110 marked this conversation as resolved.
const openCountRows = [
...query(
sql,
/* sql */ `
SELECT COUNT(*) AS cnt FROM ${beads}
WHERE ${beads.type} = 'issue'
AND ${beads.labels} LIKE ?
Comment thread
jrf0110 marked this conversation as resolved.
AND ${beads.status} = 'open'
AND json_extract(${beads.metadata}, '$.triage_type') != 'escalation'
`,
[TRIAGE_LABEL_LIKE]
),
];
const openCount = Number(z.object({ cnt: z.number() }).parse(openCountRows[0]).cnt);
if (openCount >= MAX_OPEN_TRIAGE_REQUESTS) {
console.warn(
`${LOG} createTriageRequest: global cap reached (${openCount} open), skipping type=${params.triageType}`
);
return;
}
}

const metadata: TriageRequestMetadata = {
Expand Down Expand Up @@ -585,8 +590,12 @@ export function detectCrashLoops(sql: SqlStorage): void {

// Exclude triage agents from crash loop detection — their failures must
// not create new triage requests, which would feed the feedback loop.
// An agent is considered a triage agent if its current hooked bead has
// the gt:triage or gt:triage-request label (both start with "gt:triage").
// Two complementary checks:
// 1. The failed bead itself carries a triage label (covers triage batch
// bead failures, stable after unhook clears current_hook_bead_id).
// 2. The agent is currently hooked to a triage-labeled bead (covers
// resolveTriage actions like CLOSE_BEAD that fail ordinary beads
// while the triage agent is still working its batch).
const TRIAGE_LABEL_ANY = `%"gt:triage%`;

const rows = CrashRow.array().parse([
Expand All @@ -599,6 +608,11 @@ export function detectCrashLoops(sql: SqlStorage): void {
AND be.new_value = 'failed'
AND be.agent_id IS NOT NULL
AND be.created_at > ?
AND NOT EXISTS (
Comment thread
jrf0110 marked this conversation as resolved.
SELECT 1 FROM ${beads} AS failed_bead
WHERE failed_bead.${beads.columns.bead_id} = be.bead_id
AND failed_bead.${beads.columns.labels} LIKE ?
Comment thread
jrf0110 marked this conversation as resolved.
)
AND NOT EXISTS (
SELECT 1 FROM ${agent_metadata}
INNER JOIN ${beads} AS hooked
Expand All @@ -609,7 +623,7 @@ export function detectCrashLoops(sql: SqlStorage): void {
GROUP BY be.agent_id
HAVING fail_count >= ?
`,
[windowCutoff, TRIAGE_LABEL_ANY, CRASH_LOOP_THRESHOLD]
[windowCutoff, TRIAGE_LABEL_ANY, TRIAGE_LABEL_ANY, CRASH_LOOP_THRESHOLD]
),
]);

Expand Down
2 changes: 1 addition & 1 deletion cloudflare-gastown/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export type BeadFilter = {

// -- Agents (now beads + agent_metadata) --

export const AgentRole = z.enum(['polecat', 'refinery', 'mayor', 'triage']);
export const AgentRole = z.enum(['polecat', 'refinery', 'mayor']);
Comment thread
jrf0110 marked this conversation as resolved.
export type AgentRole = z.infer<typeof AgentRole>;

export const AgentStatus = z.enum(['idle', 'working', 'stalled', 'dead']);
Expand Down
Loading