Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cli/src/app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ const AuthedSurface = ({
// 'queued' → waiting our turn
// 'country_blocked' → terminal region-gate message
// 'banned' → terminal account-banned message
// 'rate_limited' → hit per-model session quota; terminal for this run
//
// 'ended' deliberately falls through to <Chat>: the agent may still be
// finishing work under the server-side grace period, and the chat surface
Expand All @@ -390,7 +391,8 @@ const AuthedSurface = ({
session.status === 'queued' ||
session.status === 'none' ||
session.status === 'country_blocked' ||
session.status === 'banned')
session.status === 'banned' ||
session.status === 'rate_limited')
) {
return <WaitingRoomScreen session={session} error={sessionError} />
}
Expand Down
47 changes: 47 additions & 0 deletions cli/src/components/waiting-room-screen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ const formatElapsed = (ms: number): string => {
return `${minutes}m ${seconds.toString().padStart(2, '0')}s`
}

/** "in ~3h 20m" / "in ~45 min" / "in under a minute". Used on the
* rate-limited screen so users know when they can try again. */
const formatRetryAfter = (ms: number): string => {
if (!Number.isFinite(ms) || ms <= 0) return 'any moment now'
const minutes = Math.round(ms / 60_000)
if (minutes < 1) return 'under a minute'
if (minutes < 60) return `${minutes} min`
const hours = Math.floor(minutes / 60)
const rem = minutes % 60
return rem === 0 ? `${hours}h` : `${hours}h ${rem}m`
}

export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
session,
error,
Expand Down Expand Up @@ -216,6 +228,18 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
<span>Elapsed </span>
{formatElapsed(elapsedMs)}
</text>
{/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
rendered for rate-limited models so the Minimax queue stays
clutter-free. */}
{session.rateLimit && (
<text style={{ fg: theme.muted, alignSelf: 'flex-start' }}>
<span>Sessions </span>
<span fg={theme.foreground}>
{session.rateLimit.recentCount} / {session.rateLimit.limit}
</span>
<span> used in last {session.rateLimit.windowHours}h</span>
</text>
)}
</box>
</>
)}
Expand Down Expand Up @@ -258,6 +282,29 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
</text>
</>
)}

{/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
last 20h). Terminal for this run — the user can exit and come
back once the oldest session in the window rolls off. */}
{session?.status === 'rate_limited' && (
<>
<text style={{ fg: theme.secondary, marginBottom: 1 }}>
⚠ Session limit reached
</text>
<text style={{ fg: theme.muted, wrapMode: 'word' }}>
You've used{' '}
<span fg={theme.foreground}>
{session.recentCount} of {session.limit}
</span>{' '}
hour-long sessions on {session.model} in the last{' '}
{session.windowHours}h. Try again in{' '}
<span fg={theme.foreground}>
{formatRetryAfter(session.retryAfterMs)}
</span>
. Press Ctrl+C to exit.
</text>
</>
)}
</box>
</box>

Expand Down
14 changes: 14 additions & 0 deletions cli/src/hooks/use-freebuff-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,19 @@ async function callSession(
return body
}
}
// 429 from POST is the per-model session-quota reject (e.g. too many GLM
// sessions in the last 20h). Terminal for the current poll — the CLI shows
// a screen explaining the limit and when the user can try again. The 429
// status (rather than 200) keeps older CLIs in their error path so they
// back off instead of tight-polling an unrecognized 200 body.
if (resp.status === 429 && method === 'POST') {
const body = (await resp.json().catch(() => null)) as
| FreebuffSessionResponse
| null
if (body && body.status === 'rate_limited') {
return body
}
}
if (!resp.ok) {
const text = await resp.text().catch(() => '')
throw new Error(
Expand Down Expand Up @@ -124,6 +137,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
case 'country_blocked':
case 'banned':
case 'model_locked':
case 'rate_limited':
case 'model_unavailable':
return null
}
Expand Down
45 changes: 45 additions & 0 deletions common/src/types/freebuff-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@
*
* The CLI uses these shapes directly; there are no client-only states.
*/

/**
* Per-model usage counter surfaced to the CLI so the waiting-room UI can
* render "N of M sessions used" alongside queue/active state. Present when
* the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
* per 20-hour window). `recentCount` is the number of admissions inside
* `windowHours` at the time the response was produced — see also the
* standalone `rate_limited` status for the reject path.
*/
export interface FreebuffSessionRateLimit {
model: string
limit: number
windowHours: number
recentCount: number
}

export type FreebuffSessionServerResponse =
| {
/** Waiting room is globally off; free-mode requests flow through
Expand Down Expand Up @@ -38,6 +54,10 @@ export type FreebuffSessionServerResponse =
queueDepthByModel: Record<string, number>
estimatedWaitMs: number
queuedAt: string
/** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
* for unlimited models or when the status was produced outside the
* rate-limit check path (e.g. pure read via GET). */
rateLimit?: FreebuffSessionRateLimit
}
| {
status: 'active'
Expand All @@ -47,6 +67,10 @@ export type FreebuffSessionServerResponse =
admittedAt: string
expiresAt: string
remainingMs: number
/** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
* for unlimited models or when the status was produced outside the
* rate-limit check path (e.g. pure read via GET). */
rateLimit?: FreebuffSessionRateLimit
}
| {
/** Session is over. While `instanceId` is present we're inside the
Expand Down Expand Up @@ -105,3 +129,24 @@ export type FreebuffSessionServerResponse =
* stops polling and shows a banned message. */
status: 'banned'
}
| {
/** User has used up their per-model admission quota in the rolling
* window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
* /session before the user is placed in the queue. `retryAfterMs` is
* the time until the oldest admission inside the window falls off
* and one quota slot opens up — clients should show the user when
* they can try again. Terminal for the CLI's current poll session;
* the user can exit and come back later. */
status: 'rate_limited'
/** The freebuff model the user tried to join. */
model: string
/** Max admissions permitted per window (e.g. 5). */
limit: number
/** Rolling window size in hours (e.g. 20). */
windowHours: number
/** Admission count inside the window at check time — will be ≥ limit. */
recentCount: number
/** Milliseconds from now until the oldest admission in the window
* exits and the user regains one quota slot. */
retryAfterMs: number
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CREATE TABLE "free_session_admit" (
"id" text PRIMARY KEY NOT NULL,
"user_id" text NOT NULL,
"model" text NOT NULL,
"admitted_at" timestamp with time zone DEFAULT now() NOT NULL
);
--> statement-breakpoint
ALTER TABLE "free_session_admit" ADD CONSTRAINT "free_session_admit_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "idx_free_session_admit_user_model_time" ON "free_session_admit" USING btree ("user_id","model","admitted_at");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 SQL file is missing a trailing newline

The diff ends with \ No newline at end of file. Most SQL linters, editors, and git diff tools treat POSIX files without a trailing newline as malformed. A newline should be added after the final ;.

Loading
Loading