Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions apps/cli/src/commands/results/remote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import path from 'node:path';
import {
DEFAULT_THRESHOLD,
type EvaluationResult,
type GitListedRun,
type ResultsConfig,
type ResultsRepoStatus,
directPushResults,
Expand All @@ -24,11 +25,10 @@ import {
listResultFilesFromRunsDir,
} from '../inspect/utils.js';


// ── In-memory TTL cache for listGitRuns ────────────────────────────
// Avoids repeated expensive git ls-tree + git cat-file --batch operations
// on every API request. Cache key is repoDir, TTL is 60 seconds.
const gitRunsCache = new Map<string, { data: any; expiresAt: number }>();
const gitRunsCache = new Map<string, { data: Promise<GitListedRun[]>; expiresAt: number }>();
const GIT_RUNS_CACHE_TTL_MS = 60_000;

function cachedListGitRuns(repoDir: string) {
Expand All @@ -40,12 +40,14 @@ function cachedListGitRuns(repoDir: string) {
const promise = listGitRuns(repoDir);
gitRunsCache.set(repoDir, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
// Evict stale entry once the promise settles so a fresh fetch replaces it
promise.catch(() => {}).finally(() => {
const entry = gitRunsCache.get(repoDir);
if (entry && entry.expiresAt <= Date.now()) {
gitRunsCache.delete(repoDir);
}
});
promise
.catch(() => {})
.finally(() => {
const entry = gitRunsCache.get(repoDir);
if (entry && entry.expiresAt <= Date.now()) {
gitRunsCache.delete(repoDir);
}
});
return promise;
}

Expand Down
4 changes: 3 additions & 1 deletion apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1633,7 +1633,9 @@ export const resultsServeCommand = command({
// Clone or pull any project entries that declare a source.
// Non-blocking: fire-and-forget so startup is instant even when some
// project paths are missing or slow (e.g. /tmp paths that timeout).
syncProjects(registry.projects).catch((err) => console.error("Background project sync failed:", err));
syncProjects(registry.projects).catch((err) =>
console.error('Background project sync failed:', err),
);

try {
let results: EvaluationResult[] = [];
Expand Down
4 changes: 2 additions & 2 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,9 @@ describe('loadResults', () => {
// ── resolveDashboardMode ───────────────────────────────────────────────

describe('resolveDashboardMode', () => {
it('defaults to single-project mode when no projects are registered', () => {
it('defaults to project dashboard mode when no projects are registered', () => {
expect(resolveDashboardMode(0, {})).toEqual({
projectDashboard: false,
projectDashboard: true,
});
});

Expand Down
4 changes: 2 additions & 2 deletions apps/cli/test/unit/studio-navigation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import {
} from '../../../studio/src/lib/navigation.ts';

describe('studio navigation helpers', () => {
it('redirects the root entrypoint to the only registered project', () => {
expect(resolveIndexRoute(['demo-project'], undefined, 'analytics')).toEqual({
it('redirects when the preferred project id matches a registered project', () => {
expect(resolveIndexRoute(['demo-project'], undefined, 'demo-project', 'analytics')).toEqual({
kind: 'redirect',
redirectPath: '/projects/demo-project?tab=analytics',
});
Expand Down
12 changes: 2 additions & 10 deletions apps/studio/src/components/EvalDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,7 @@ function FilesTab({
return (
<div className="relative flex h-full min-h-[400px] gap-4">
{/* FileTree panel — desktop: side-by-side, mobile: full-width slide-over */}
<div
className={`${
mobileShowTree ? 'block' : 'hidden'
} md:block w-full md:w-auto`}
>
<div className={`${mobileShowTree ? 'block' : 'hidden'} md:block w-full md:w-auto`}>
<FileTree
files={files}
selectedPath={effectivePath}
Expand All @@ -304,11 +300,7 @@ function FilesTab({
</div>

{/* MonacoViewer panel — desktop: side-by-side, mobile: full-width */}
<div
className={`${
!mobileShowTree ? 'block' : 'hidden'
} md:block flex-1 h-full`}
>
<div className={`${!mobileShowTree ? 'block' : 'hidden'} md:block flex-1 h-full`}>
<MonacoViewer value={displayValue} language={displayLanguage} height="100%" />
</div>

Expand Down
5 changes: 4 additions & 1 deletion apps/studio/src/components/StopRunButton.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ export function StopRunButton({ runId, status, isReadOnly, projectId }: StopRunB
'Stopping…'
) : (
<>
<span aria-hidden="true" className="inline-block h-2.5 w-2.5 rounded-[1px] bg-current" />
<span
aria-hidden="true"
className="inline-block h-2.5 w-2.5 rounded-[1px] bg-current"
/>
Stop
</>
)}
Expand Down
2 changes: 1 addition & 1 deletion apps/studio/src/routes/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ import {
useStudioConfig,
} from '~/lib/api';
import {
type StudioTabId,
initialProjectRedirectStorageKey,
resolveIndexRoute,
resolveInitialProjectRedirect,
type StudioTabId,
} from '~/lib/navigation';
import type { RunMeta } from '~/lib/types';
type TabId = StudioTabId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ description: Use when tests have race conditions, timing dependencies, or pass/f
- [ ] Move heavy reference (100+ lines) to separate files
- [ ] Use cross-references instead of repeating content from other skills
- [ ] Compress examples — one excellent example beats many mediocre ones
- [ ] When SKILL.md exceeds ~500 words for a standard skill, the heaviest section is almost always inlined reference material — extract it

### Coverage Contracts vs. Rule Restatement

When a skill author wants to enforce that the agent doesn't skip rules, the temptation is to inline each rule with its full rationale. Don't.

- **Coverage contract pattern:** Keep one-line checklist items in SKILL.md naming each rule and citing the reference file (e.g., `"Lifecycle choice — apply large-table rule in references/schema-rules.md"`). Add one sentence: "Silence on any item is itself a review gap." Close the silent-skip loophole with: "If a reference file is unavailable, say so explicitly rather than skipping it."
- **Anti-pattern:** Multi-paragraph items that restate rules and rationale already in `references/`. The fix is structural — the prose is in the wrong file, not the wrong shape. Move operational procedures (how to locate files, `find` syntax, what to record) and output-format meta (citation discipline worked examples) into `references/`. Mark that file as always-load.

### Structure

Expand Down Expand Up @@ -115,6 +123,8 @@ Match specificity to the task's fragility:
| Version printing instructions | Fragile, rely on git history |
| Hardcoded local paths | Machine-specific, not portable |
| Description summarizes workflow | the agent follows description, skips SKILL.md body |
| SKILL.md inlines rule prose that also lives in `references/` | Two sources of truth — the inline copy drifts from the canonical reference; agent applies the SKILL.md version and ignores the more detailed reference |
| SKILL.md embeds operational procedures or worked-example pairs | Procedures (how to locate files, `find` syntax, what to record) and output-format meta (citation discipline examples) belong in `references/` per progressive disclosure |

## Discipline-Enforcing Skills (Additional Checks)

Expand All @@ -125,3 +135,5 @@ For skills that enforce rules (TDD, verification, coding standards):
- [ ] Red flags list for self-checking
- [ ] "Spirit vs letter" addressed: "Violating the letter IS violating the spirit"
- [ ] Hard gates at critical decision points
- [ ] Discipline patterns (output-format meta, citation examples, verification procedures) live in `references/` — SKILL.md names them in one line and cites the file
- [ ] Discipline reference file is marked as always-load so the agent cannot bypass it (don't inline to guarantee coverage — mark as unmissable instead)
Loading