From 0ff1448b126c567ff605ffe9b5de55159d47bc91 Mon Sep 17 00:00:00 2001
From: Tony Deng <tony@runloop.ai>
Date: Fri, 6 Mar 2026 16:07:52 -0800
Subject: [PATCH 1/5] update with secretst and public benchmark display

---
 README.md                                |  23 ++
 misc/config.yml                          |   3 +-
 package.json                             |   1 -
 src/screens/BenchmarkJobCreateScreen.tsx | 371 +++++++++++++++++++++++
 src/screens/BenchmarkJobDetailScreen.tsx |  27 +-
 src/screens/BenchmarkJobListScreen.tsx   |  82 +++--
 src/screens/BenchmarkListScreen.tsx      |  25 +-
 src/services/benchmarkService.ts         |   4 +-
 8 files changed, 488 insertions(+), 48 deletions(-)

diff --git a/README.md b/README.md
index 86d1bb95..577406c8 100644
--- a/README.md
+++ b/README.md
@@ -235,6 +235,29 @@ pnpm run build
 
 # Watch mode
 pnpm run dev
+```
+
+### Debugging the TUI
+
+If the TUI crashes (e.g. when pressing Enter on a form field), you can capture logs to inspect the error:
+
+**Option 1 – stderr to file (no debug env)**  
+Useful to see uncaught errors and stack traces that the app writes to stderr:
+
+```bash
+pnpm run build
+pnpm run start:debug
+# Reproduce the crash, then:
+cat debug.log
+```
+
+**Option 2 – run under Node with inspector**  
+To get a stack trace from an uncaught exception, run with Node’s inspector and reproduce the crash; the process will pause and you can inspect the stack:
+
+```bash
+node --inspect-brk dist/cli.js
+# Attach Chrome/Edge to the URL shown (e.g. chrome://inspect) and resume; reproduce the crash.
+```
 
 ## Contributing
 
diff --git a/misc/config.yml b/misc/config.yml
index f3a417bc..f20e7d65 100644
--- a/misc/config.yml
+++ b/misc/config.yml
@@ -9,8 +9,7 @@ command: rli
 cwd: ~
 
 # Export additional ENV variables
-env:
-  recording: true
+env: {}
 
 # Explicitly set the number of columns
 # or use `auto` to take the current
diff --git a/package.json b/package.json
index 82b3e67d..76abd48e 100644
--- a/package.json
+++ b/package.json
@@ -11,7 +11,6 @@
     "build:mcp": "pnpm run build && node scripts/build-mcp.js",
     "dev": "tsc --watch",
     "start": "node dist/cli.js",
-    "start:debug": "node dist/cli.js 2> debug.log",
     "prepublishOnly": "pnpm run build",
     "version:patch": "pnpm version patch",
     "version:minor": "pnpm version minor",
diff --git a/src/screens/BenchmarkJobCreateScreen.tsx b/src/screens/BenchmarkJobCreateScreen.tsx
index 565e0171..c4138c52 100644
--- a/src/screens/BenchmarkJobCreateScreen.tsx
+++ b/src/screens/BenchmarkJobCreateScreen.tsx
@@ -29,12 +29,20 @@ import {
   type OrchestratorConfig,
 } from "../services/benchmarkJobService.js";
 import type { Benchmark } from "../store/benchmarkStore.js";
+import { getClient } from "../utils/client.js";
+
+/** Secret list item for account secrets picker */
+interface SecretListItem {
+  id: string;
+  name: string;
+}
 
 type FormField =
   | "source_type"
   | "benchmark"
   | "scenarios"
   | "agents"
+  | "secrets"
   | "model_names"
   | "name"
   | "agent_timeout"
@@ -49,6 +57,8 @@ interface FormData {
   scenarioNames: string[];
   agentIds: string[];
   agentNames: string[];
+  /** Env var name -> secret name (account secret) */
+  secretsMapping: Record<string, string>;
   /** Comma-separated model names (one per agent, or one value applied to all) */
   modelNamesInput: string;
   name: string;
@@ -61,6 +71,9 @@ type ScreenState =
   | "picking_benchmark"
   | "picking_scenarios"
   | "picking_agents"
+  | "secrets_config"
+  | "picking_secret"
+  | "entering_env_var"
   | "creating"
   | "success"
   | "error";
@@ -80,6 +93,187 @@ interface BenchmarkJobCreateScreenProps {
   cloneConcurrentTrials?: string;
 }
 
+/**
+ * Secrets config sub-screen: list mappings, Add, Done. Handles its own input so hooks are stable.
+ */
+function SecretsConfigView({
+  mappingEntries,
+  selectedIndex,
+  onSelectIndex,
+  onAdd,
+  onDone,
+  onRemove,
+  onBack,
+}: {
+  mappingEntries: [string, string][];
+  selectedIndex: number;
+  onSelectIndex: (i: number) => void;
+  onAdd: () => void;
+  onDone: () => void;
+  onRemove: (envVar: string) => void;
+  onBack: () => void;
+}) {
+  const totalOptions = mappingEntries.length + 2;
+  const idx = Math.min(selectedIndex, Math.max(0, totalOptions - 1));
+
+  useInput((_input, key) => {
+    if (key.upArrow && idx > 0) {
+      onSelectIndex(idx - 1);
+    } else if (key.downArrow && idx < totalOptions - 1) {
+      onSelectIndex(idx + 1);
+    } else if (key.return) {
+      if (idx === mappingEntries.length) {
+        onAdd();
+      } else if (idx === mappingEntries.length + 1) {
+        onDone();
+      } else {
+        const keyToRemove = mappingEntries[idx][0];
+        onRemove(keyToRemove);
+        onSelectIndex(Math.max(0, idx - 1));
+      }
+    } else if (key.escape) {
+      onBack();
+    }
+  });
+
+  return (
+    <>
+      <Breadcrumb
+        items={[
+          { label: "Home" },
+          { label: "Benchmarks" },
+          { label: "Jobs" },
+          { label: "Create" },
+          { label: "Secrets", active: true },
+        ]}
+      />
+      <Box flexDirection="column" paddingX={1}>
+        <Box marginBottom={1}>
+          <Text color={colors.primary} bold>
+            {figures.pointer} Secrets (env → secret)
+          </Text>
+        </Box>
+        {mappingEntries.map(([envVar, secretName], i) => (
+          <Box key={envVar} marginBottom={0}>
+            <Box width={4}>
+              <Text
+                color={idx === i ? colors.primary : colors.textDim}
+                bold={idx === i}
+              >
+                {idx === i ? figures.pointer : " "}
+              </Text>
+            </Box>
+            <Text color={colors.textDim}>
+              {envVar} → {secretName}
+            </Text>
+            {idx === i && (
+              <Text color={colors.textDim} dimColor>
+                {" "}
+                Enter to remove
+              </Text>
+            )}
+          </Box>
+        ))}
+        <Box marginBottom={0}>
+          <Box width={4}>
+            <Text
+              color={idx === mappingEntries.length ? colors.primary : colors.textDim}
+              bold={idx === mappingEntries.length}
+            >
+              {idx === mappingEntries.length ? figures.pointer : " "}
+            </Text>
+          </Box>
+          <Text color={idx === mappingEntries.length ? colors.primary : colors.text}>
+            + Add secret
+          </Text>
+        </Box>
+        <Box marginBottom={0}>
+          <Box width={4}>
+            <Text
+              color={
+                idx === mappingEntries.length + 1 ? colors.primary : colors.textDim
+              }
+              bold={idx === mappingEntries.length + 1}
+            >
+              {idx === mappingEntries.length + 1 ? figures.pointer : " "}
+            </Text>
+          </Box>
+          <Text
+            color={
+              idx === mappingEntries.length + 1 ? colors.primary : colors.text
+            }
+          >
+            Done
+          </Text>
+        </Box>
+      </Box>
+      <NavigationTips
+        tips={[
+          { key: "Enter", label: "Select" },
+          { key: "Esc", label: "Back to form" },
+        ]}
+      />
+    </>
+  );
+}
+
+/**
+ * Inline view to enter env var name for a selected secret
+ * Pre-fills with secret name so Enter uses it as-is; user can edit if needed.
+ */
+function EnvVarInputView({
+  secretName,
+  onSubmit,
+  onCancel,
+}: {
+  secretName: string;
+  onSubmit: (value: string) => void;
+  onCancel: () => void;
+}) {
+  const [value, setValue] = React.useState(secretName);
+  useInput((_input, key) => {
+    if (key.return) {
+      onSubmit(value.trim() || secretName);
+    } else if (key.escape) {
+      onCancel();
+    }
+  });
+  return (
+    <>
+      <Breadcrumb
+        items={[
+          { label: "Home" },
+          { label: "Benchmarks" },
+          { label: "Jobs" },
+          { label: "Create" },
+          { label: "Secret env var", active: true },
+        ]}
+      />
+      <Box flexDirection="column" paddingX={1}>
+        <Box marginBottom={1}>
+          <Text color={colors.textDim} dimColor>
+            Env var name for secret &quot;{secretName}&quot;:
+          </Text>
+        </Box>
+        <Box marginLeft={2}>
+          <TextInput
+            value={value}
+            onChange={setValue}
+            placeholder="e.g. ANTHROPIC_API_KEY (or use secret name as-is)"
+            onSubmit={() => onSubmit(value.trim() || secretName)}
+          />
+        </Box>
+      </Box>
+      <NavigationTips
+        tips={[
+          { key: "Enter", label: "Add (uses secret name if empty)" },
+          { key: "Esc", label: "Cancel" },
+        ]}
+      />
+    </>
+  );
+}
+
 /**
  * Success screen component with input handling
  */
@@ -174,16 +368,26 @@ export function BenchmarkJobCreateScreen({
 
   const [formData, setFormData] = React.useState<FormData>(() => {
     let modelNamesInput = "";
+    let secretsMapping: Record<string, string> = {};
     try {
       if (cloneAgentConfigs) {
         const arr = JSON.parse(cloneAgentConfigs) as Array<{
           modelName?: string | null;
           model_name?: string | null;
+          secrets?: Record<string, string>;
+          secret_names?: Record<string, string>;
         }>;
         modelNamesInput = arr
           .map((a) => a.modelName ?? a.model_name ?? "")
           .filter(Boolean)
           .join(", ");
+        // Merge secrets from all agent configs into one mapping (clone prefill)
+        const allSecrets = arr
+          .map((a) => a.secrets ?? a.secret_names)
+          .filter((s): s is Record<string, string> => !!s && typeof s === "object");
+        if (allSecrets.length > 0) {
+          secretsMapping = Object.assign({}, ...allSecrets);
+        }
       }
     } catch {
       // ignore invalid JSON
@@ -196,6 +400,7 @@ export function BenchmarkJobCreateScreen({
       scenarioNames: [],
       agentIds: cloneAgentIds ? cloneAgentIds.split(",") : [],
       agentNames: cloneAgentNames ? cloneAgentNames.split(",") : [],
+      secretsMapping,
       modelNamesInput,
       name: cloneJobName ? `${cloneJobName} (clone)` : "",
       agentTimeout: cloneAgentTimeout || "",
@@ -205,6 +410,14 @@ export function BenchmarkJobCreateScreen({
 
   const [createdJob, setCreatedJob] = React.useState<BenchmarkJob | null>(null);
   const [error, setError] = React.useState<Error | null>(null);
+  /** When adding a secret: selected secret awaiting env var name */
+  const [pendingSecretForEnv, setPendingSecretForEnv] = React.useState<{
+    id: string;
+    name: string;
+  } | null>(null);
+  /** In secrets_config, index of mapping row selected for removal (or -1 for Add/Done) */
+  const [secretsConfigSelectedIndex, setSecretsConfigSelectedIndex] =
+    React.useState(0);
 
   // Handle Ctrl+C to exit
   useExitOnCtrlC();
@@ -288,6 +501,16 @@ export function BenchmarkJobCreateScreen({
       required: true,
       description: "Select one or more agents to run",
     },
+    {
+      key: "secrets",
+      label: "Secrets (env → secret)",
+      type: "picker",
+      required: false,
+      description:
+        cloneFromJobId && Object.keys(formData.secretsMapping).length === 0
+          ? "Optional. The API does not return secrets on job fetch; add any needed env→secret mappings here."
+          : "Optional. Map environment variable names to account secrets.",
+    },
     {
       key: "model_names",
       label: "Model names (comma-separated, optional)",
@@ -464,6 +687,55 @@ export function BenchmarkJobCreateScreen({
     [fetchAgentsPage],
   );
 
+  // Fetch account secrets for picker (client-side pagination)
+  const fetchSecretsPage = React.useCallback(
+    async (params: { limit: number; startingAt?: string; search?: string }) => {
+      const client = getClient();
+      const result = await client.secrets.list({ limit: 5000 });
+      const raw = (result.secrets || []) as Array<{ id: string; name: string }>;
+      let items = raw.map((s) => ({ id: s.id, name: s.name || s.id }));
+      if (params.search) {
+        const q = params.search.toLowerCase();
+        items = items.filter(
+          (s) =>
+            s.name.toLowerCase().includes(q) || s.id.toLowerCase().includes(q),
+        );
+      }
+      const startIdx = params.startingAt
+        ? items.findIndex((s) => s.id === params.startingAt) + 1
+        : 0;
+      const page = items.slice(startIdx, startIdx + params.limit);
+      return {
+        items: page,
+        hasMore: startIdx + params.limit < items.length,
+        totalCount: items.length,
+      };
+    },
+    [],
+  );
+
+  const secretPickerConfig = React.useMemo(
+    () => ({
+      title: "Select Secret",
+      fetchPage: fetchSecretsPage,
+      getItemId: (s: SecretListItem) => s.id,
+      getItemLabel: (s: SecretListItem) => s.name,
+      getItemStatus: () => undefined,
+      mode: "single" as const,
+      minSelection: 1,
+      emptyMessage: "No secrets found",
+      searchPlaceholder: "Search secrets...",
+      breadcrumbItems: [
+        { label: "Home" },
+        { label: "Benchmarks" },
+        { label: "Jobs" },
+        { label: "Create" },
+        { label: "Select Secret", active: true },
+      ],
+    }),
+    [fetchSecretsPage],
+  );
+
   // Handle benchmark selection (single)
   const handleBenchmarkSelect = React.useCallback((items: Benchmark[]) => {
     if (items.length > 0) {
@@ -497,6 +769,37 @@ export function BenchmarkJobCreateScreen({
     setScreenState("form");
   }, []);
 
+  // After picking a secret: set pending and go to env var input
+  const handleSecretSelect = React.useCallback((items: SecretListItem[]) => {
+    if (items.length > 0) {
+      const s = items[0];
+      setPendingSecretForEnv({ id: s.id, name: s.name });
+      setScreenState("entering_env_var");
+    } else {
+      setScreenState("secrets_config");
+    }
+  }, []);
+
+  // After entering env var for pending secret: add mapping and return to secrets_config
+  // If envVarName is empty, use secret name as-is for the mapping (env var name = secret name).
+  const handleEnvVarForSecretSubmit = React.useCallback(
+    (envVarName: string) => {
+      const envVarToUse = envVarName.trim() || pendingSecretForEnv?.name || "";
+      if (envVarToUse && pendingSecretForEnv) {
+        setFormData((prev) => ({
+          ...prev,
+          secretsMapping: {
+            ...prev.secretsMapping,
+            [envVarToUse]: pendingSecretForEnv.name,
+          },
+        }));
+      }
+      setPendingSecretForEnv(null);
+      setScreenState("secrets_config");
+    },
+    [pendingSecretForEnv],
+  );
+
   // Handle create
   const handleCreate = React.useCallback(async () => {
     if (!isFormValid) return;
@@ -545,6 +848,13 @@ export function BenchmarkJobCreateScreen({
         });
       }
 
+      // Form secrets are source of truth: apply to all agents
+      if (Object.keys(formData.secretsMapping).length > 0) {
+        for (const config of agentConfigs) {
+          config.secrets = { ...formData.secretsMapping };
+        }
+      }
+
       // Use cloned orchestrator config if available, otherwise build from form
       let orchestratorConfig: OrchestratorConfig | undefined;
       if (cloneOrchestratorConfig) {
@@ -617,6 +927,12 @@ export function BenchmarkJobCreateScreen({
         currentField === "agents"
       ) {
         setScreenState("picking_agents");
+      } else if (
+        currentFieldDef?.type === "picker" &&
+        currentField === "secrets"
+      ) {
+        setScreenState("secrets_config");
+        setSecretsConfigSelectedIndex(0);
       } else if (
         currentFieldDef?.type === "action" &&
         currentField === "create"
@@ -629,6 +945,54 @@ export function BenchmarkJobCreateScreen({
     }
   });
 
+  // ----- Secrets sub-flow -----
+  const mappingEntries = Object.entries(formData.secretsMapping);
+
+  if (screenState === "secrets_config") {
+    return (
+      <SecretsConfigView
+        mappingEntries={mappingEntries}
+        selectedIndex={secretsConfigSelectedIndex}
+        onSelectIndex={setSecretsConfigSelectedIndex}
+        onAdd={() => setScreenState("picking_secret")}
+        onDone={() => setScreenState("form")}
+        onRemove={(envVar) => {
+          setFormData((prev) => {
+            const next = { ...prev.secretsMapping };
+            delete next[envVar];
+            return { ...prev, secretsMapping: next };
+          });
+          setSecretsConfigSelectedIndex((i) => Math.max(0, i - 1));
+        }}
+        onBack={() => setScreenState("form")}
+      />
+    );
+  }
+
+  if (screenState === "entering_env_var" && pendingSecretForEnv) {
+    return (
+      <EnvVarInputView
+        secretName={pendingSecretForEnv.name}
+        onSubmit={(val) => handleEnvVarForSecretSubmit(val)}
+        onCancel={() => {
+          setPendingSecretForEnv(null);
+          setScreenState("secrets_config");
+        }}
+      />
+    );
+  }
+
+  if (screenState === "picking_secret") {
+    return (
+      <ResourcePicker<SecretListItem>
+        config={secretPickerConfig}
+        onSelect={handleSecretSelect}
+        onCancel={() => setScreenState("secrets_config")}
+        initialSelected={[]}
+      />
+    );
+  }
+
   // Show benchmark picker (single-select)
   if (screenState === "picking_benchmark") {
     return (
@@ -750,6 +1114,13 @@ export function BenchmarkJobCreateScreen({
         if (formData.agentNames.length === 0) return "";
         if (formData.agentNames.length === 1) return formData.agentNames[0];
         return `${formData.agentNames.length} agents selected`;
+      case "secrets": {
+        const keys = Object.keys(formData.secretsMapping);
+        if (keys.length === 0) return "";
+        if (keys.length === 1)
+          return `${keys[0]} → ${formData.secretsMapping[keys[0]]}`;
+        return `${keys.length} mappings`;
+      }
       case "model_names":
         return formData.modelNamesInput;
       case "name":
diff --git a/src/screens/BenchmarkJobDetailScreen.tsx b/src/screens/BenchmarkJobDetailScreen.tsx
index d71ce721..5d0ae478 100644
--- a/src/screens/BenchmarkJobDetailScreen.tsx
+++ b/src/screens/BenchmarkJobDetailScreen.tsx
@@ -633,15 +633,24 @@ export function BenchmarkJobDetailScreen({
 
       // Extract agent configs - both full configs and legacy fields
       if (resource.job_spec?.agent_configs) {
-        const agentConfigs = resource.job_spec.agent_configs.map((a: any) => ({
-          agentId: a.agent_id,
-          name: a.name,
-          modelName: a.model_name,
-          timeoutSeconds: a.timeout_seconds,
-          kwargs: a.kwargs,
-          environmentVariables: a.agent_environment?.environment_variables,
-          secrets: a.agent_environment?.secrets,
-        }));
+        const agentConfigs = resource.job_spec.agent_configs.map((a: any) => {
+          const env = a.agent_environment;
+          const secrets =
+            env?.secrets ??
+            env?.secret_names ??
+            (typeof env?.secret_refs === "object" && env.secret_refs
+              ? env.secret_refs
+              : undefined);
+          return {
+            agentId: a.agent_id,
+            name: a.name,
+            modelName: a.model_name,
+            timeoutSeconds: a.timeout_seconds,
+            kwargs: a.kwargs,
+            environmentVariables: env?.environment_variables,
+            secrets,
+          };
+        });
         cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
 
         // Also extract legacy fields for form initialization
diff --git a/src/screens/BenchmarkJobListScreen.tsx b/src/screens/BenchmarkJobListScreen.tsx
index 83475e0b..8bd0b564 100644
--- a/src/screens/BenchmarkJobListScreen.tsx
+++ b/src/screens/BenchmarkJobListScreen.tsx
@@ -318,16 +318,24 @@ export function BenchmarkJobListScreen() {
           // Extract agent configs - both full configs and legacy fields
           if (selectedJob.job_spec?.agent_configs) {
             const agentConfigs = selectedJob.job_spec.agent_configs.map(
-              (a: any) => ({
-                agentId: a.agent_id,
-                name: a.name,
-                modelName: a.model_name,
-                timeoutSeconds: a.timeout_seconds,
-                kwargs: a.kwargs,
-                environmentVariables:
-                  a.agent_environment?.environment_variables,
-                secrets: a.agent_environment?.secrets,
-              }),
+              (a: any) => {
+                const env = a.agent_environment;
+                const secrets =
+                  env?.secrets ??
+                  env?.secret_names ??
+                  (typeof env?.secret_refs === "object" && env.secret_refs
+                    ? env.secret_refs
+                    : undefined);
+                return {
+                  agentId: a.agent_id,
+                  name: a.name,
+                  modelName: a.model_name,
+                  timeoutSeconds: a.timeout_seconds,
+                  kwargs: a.kwargs,
+                  environmentVariables: env?.environment_variables,
+                  secrets,
+                };
+              },
             );
             cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
 
@@ -396,15 +404,24 @@ export function BenchmarkJobListScreen() {
         // Extract agent configs - both full configs and legacy fields
         if (selectedJob.job_spec?.agent_configs) {
           const agentConfigs = selectedJob.job_spec.agent_configs.map(
-            (a: any) => ({
-              agentId: a.agent_id,
-              name: a.name,
-              modelName: a.model_name,
-              timeoutSeconds: a.timeout_seconds,
-              kwargs: a.kwargs,
-              environmentVariables: a.agent_environment?.environment_variables,
-              secrets: a.agent_environment?.secrets,
-            }),
+            (a: any) => {
+              const env = a.agent_environment;
+              const secrets =
+                env?.secrets ??
+                env?.secret_names ??
+                (typeof env?.secret_refs === "object" && env.secret_refs
+                  ? env.secret_refs
+                  : undefined);
+              return {
+                agentId: a.agent_id,
+                name: a.name,
+                modelName: a.model_name,
+                timeoutSeconds: a.timeout_seconds,
+                kwargs: a.kwargs,
+                environmentVariables: env?.environment_variables,
+                secrets,
+              };
+            },
           );
           cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
 
@@ -504,15 +521,24 @@ export function BenchmarkJobListScreen() {
         // Extract agent configs - both full configs and legacy fields
         if (selectedJob.job_spec?.agent_configs) {
           const agentConfigs = selectedJob.job_spec.agent_configs.map(
-            (a: any) => ({
-              agentId: a.agent_id,
-              name: a.name,
-              modelName: a.model_name,
-              timeoutSeconds: a.timeout_seconds,
-              kwargs: a.kwargs,
-              environmentVariables: a.agent_environment?.environment_variables,
-              secrets: a.agent_environment?.secrets,
-            }),
+            (a: any) => {
+              const env = a.agent_environment;
+              const secrets =
+                env?.secrets ??
+                env?.secret_names ??
+                (typeof env?.secret_refs === "object" && env.secret_refs
+                  ? env.secret_refs
+                  : undefined);
+              return {
+                agentId: a.agent_id,
+                name: a.name,
+                modelName: a.model_name,
+                timeoutSeconds: a.timeout_seconds,
+                kwargs: a.kwargs,
+                environmentVariables: env?.environment_variables,
+                secrets,
+              };
+            },
           );
           cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
 
diff --git a/src/screens/BenchmarkListScreen.tsx b/src/screens/BenchmarkListScreen.tsx
index fe953b30..c37ee700 100644
--- a/src/screens/BenchmarkListScreen.tsx
+++ b/src/screens/BenchmarkListScreen.tsx
@@ -24,7 +24,10 @@ import { useViewportHeight } from "../hooks/useViewportHeight.js";
 import { useExitOnCtrlC } from "../hooks/useExitOnCtrlC.js";
 import { useCursorPagination } from "../hooks/useCursorPagination.js";
 import { useListSearch } from "../hooks/useListSearch.js";
-import { listBenchmarks } from "../services/benchmarkService.js";
+import {
+  listBenchmarks,
+  listPublicBenchmarks,
+} from "../services/benchmarkService.js";
 import type { Benchmark } from "../store/benchmarkStore.js";
 
 export function BenchmarkListScreen() {
@@ -33,6 +36,7 @@ export function BenchmarkListScreen() {
   const [selectedIndex, setSelectedIndex] = React.useState(0);
   const [showPopup, setShowPopup] = React.useState(false);
   const [selectedOperation, setSelectedOperation] = React.useState(0);
+  const [showPublic, setShowPublic] = React.useState(false);
 
   // Search state
   const search = useListSearch({
@@ -61,7 +65,8 @@ export function BenchmarkListScreen() {
   // Fetch function for pagination hook
   const fetchPage = React.useCallback(
     async (params: { limit: number; startingAt?: string }) => {
-      const result = await listBenchmarks({
+      const listFn = showPublic ? listPublicBenchmarks : listBenchmarks;
+      const result = await listFn({
         limit: params.limit,
         startingAfter: params.startingAt,
         search: search.submittedSearchQuery || undefined,
@@ -73,7 +78,7 @@ export function BenchmarkListScreen() {
         totalCount: result.totalCount,
       };
     },
-    [search.submittedSearchQuery],
+    [showPublic, search.submittedSearchQuery],
   );
 
   // Use the shared pagination hook
@@ -94,7 +99,7 @@ export function BenchmarkListScreen() {
     getItemId: (benchmark: Benchmark) => benchmark.id,
     pollInterval: 5000,
     pollingEnabled: !showPopup && !search.searchMode,
-    deps: [PAGE_SIZE, search.submittedSearchQuery],
+    deps: [PAGE_SIZE, search.submittedSearchQuery, showPublic],
   });
 
   // Operations for benchmarks
@@ -271,6 +276,9 @@ export function BenchmarkListScreen() {
       });
     } else if (input === "/") {
       search.enterSearchMode();
+    } else if (input === "t") {
+      setShowPublic((prev) => !prev);
+      setSelectedIndex(0);
     } else if (key.escape) {
       if (search.handleEscape()) {
         return;
@@ -339,11 +347,11 @@ export function BenchmarkListScreen() {
           data={benchmarks}
           keyExtractor={(benchmark: Benchmark) => benchmark.id}
           selectedIndex={selectedIndex}
-          title={`benchmarks[${totalCount}]`}
+          title={`benchmarks[${totalCount}] ${showPublic ? "(public)" : "(private)"}`}
           columns={columns}
           emptyState={
             <Text color={colors.textDim}>
-              {figures.info} No benchmarks found
+              {figures.info} No {showPublic ? "public " : ""}benchmarks found
             </Text>
           }
         />
@@ -359,6 +367,10 @@ export function BenchmarkListScreen() {
             {" "}
             total
           </Text>
+          <Text color={showPublic ? colors.warning : colors.textDim} dimColor={!showPublic}>
+            {" "}
+            • {showPublic ? "Public" : "Private"}
+          </Text>
           {totalPages > 1 && (
             <>
               <Text color={colors.textDim} dimColor>
@@ -421,6 +433,7 @@ export function BenchmarkListScreen() {
           { key: "Enter", label: "Details" },
           { key: "c", label: "Create Job" },
           { key: "a", label: "Actions" },
+          { key: "t", label: showPublic ? "Private" : "Public" },
           { key: "/", label: "Search" },
           { key: "Esc", label: "Back" },
         ]}
diff --git a/src/services/benchmarkService.ts b/src/services/benchmarkService.ts
index e6373464..7abab590 100644
--- a/src/services/benchmarkService.ts
+++ b/src/services/benchmarkService.ts
@@ -214,7 +214,7 @@ export async function listPublicBenchmarks(
 }
 
 /**
- * Create/start a benchmark run with selected benchmarks
+ * Create/start a benchmark run with selected benchmarks (POST /v1/benchmark_runs)
  */
 export async function createBenchmarkRun(
   benchmarkIds: string[],
@@ -241,4 +241,4 @@ export async function createBenchmarkRun(
   // Use type assertion since the API client types may not be fully defined
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   return (client.benchmarkRuns as any).create(createParams);
-}
+  }

From e5b790408877a813465234cc915a4df9ca57cb18 Mon Sep 17 00:00:00 2001
From: Tony Deng <tony@runloop.ai>
Date: Fri, 6 Mar 2026 16:09:26 -0800
Subject: [PATCH 2/5] fmt

---
 src/screens/BenchmarkJobCreateScreen.tsx | 16 ++++++++++++----
 src/screens/BenchmarkListScreen.tsx      |  5 ++++-
 src/services/benchmarkService.ts         |  2 +-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/screens/BenchmarkJobCreateScreen.tsx b/src/screens/BenchmarkJobCreateScreen.tsx
index c4138c52..c1af08be 100644
--- a/src/screens/BenchmarkJobCreateScreen.tsx
+++ b/src/screens/BenchmarkJobCreateScreen.tsx
@@ -177,13 +177,17 @@ function SecretsConfigView({
         <Box marginBottom={0}>
           <Box width={4}>
             <Text
-              color={idx === mappingEntries.length ? colors.primary : colors.textDim}
+              color={
+                idx === mappingEntries.length ? colors.primary : colors.textDim
+              }
               bold={idx === mappingEntries.length}
             >
               {idx === mappingEntries.length ? figures.pointer : " "}
             </Text>
           </Box>
-          <Text color={idx === mappingEntries.length ? colors.primary : colors.text}>
+          <Text
+            color={idx === mappingEntries.length ? colors.primary : colors.text}
+          >
             + Add secret
           </Text>
         </Box>
@@ -191,7 +195,9 @@ function SecretsConfigView({
           <Box width={4}>
             <Text
               color={
-                idx === mappingEntries.length + 1 ? colors.primary : colors.textDim
+                idx === mappingEntries.length + 1
+                  ? colors.primary
+                  : colors.textDim
               }
               bold={idx === mappingEntries.length + 1}
             >
@@ -384,7 +390,9 @@ export function BenchmarkJobCreateScreen({
         // Merge secrets from all agent configs into one mapping (clone prefill)
         const allSecrets = arr
           .map((a) => a.secrets ?? a.secret_names)
-          .filter((s): s is Record<string, string> => !!s && typeof s === "object");
+          .filter(
+            (s): s is Record<string, string> => !!s && typeof s === "object",
+          );
         if (allSecrets.length > 0) {
           secretsMapping = Object.assign({}, ...allSecrets);
         }
diff --git a/src/screens/BenchmarkListScreen.tsx b/src/screens/BenchmarkListScreen.tsx
index c37ee700..3d6fe786 100644
--- a/src/screens/BenchmarkListScreen.tsx
+++ b/src/screens/BenchmarkListScreen.tsx
@@ -367,7 +367,10 @@ export function BenchmarkListScreen() {
             {" "}
             total
           </Text>
-          <Text color={showPublic ? colors.warning : colors.textDim} dimColor={!showPublic}>
+          <Text
+            color={showPublic ? colors.warning : colors.textDim}
+            dimColor={!showPublic}
+          >
             {" "}
             • {showPublic ? "Public" : "Private"}
           </Text>
diff --git a/src/services/benchmarkService.ts b/src/services/benchmarkService.ts
index 7abab590..d1aabb86 100644
--- a/src/services/benchmarkService.ts
+++ b/src/services/benchmarkService.ts
@@ -241,4 +241,4 @@ export async function createBenchmarkRun(
   // Use type assertion since the API client types may not be fully defined
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   return (client.benchmarkRuns as any).create(createParams);
-  }
+}

From a1d953a3fefab6a254d8be7add724bba85e1984c Mon Sep 17 00:00:00 2001
From: Tony Deng <tony@runloop.ai>
Date: Fri, 6 Mar 2026 16:14:51 -0800
Subject: [PATCH 3/5] "Custom" benchmarks, reduce surface area of changes

---
 misc/config.yml                     | 3 ++-
 package.json                        | 1 +
 src/screens/BenchmarkListScreen.tsx | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/misc/config.yml b/misc/config.yml
index f20e7d65..f3a417bc 100644
--- a/misc/config.yml
+++ b/misc/config.yml
@@ -9,7 +9,8 @@ command: rli
 cwd: ~
 
 # Export additional ENV variables
-env: {}
+env:
+  recording: true
 
 # Explicitly set the number of columns
 # or use `auto` to take the current
diff --git a/package.json b/package.json
index 76abd48e..82b3e67d 100644
--- a/package.json
+++ b/package.json
@@ -11,6 +11,7 @@
     "build:mcp": "pnpm run build && node scripts/build-mcp.js",
     "dev": "tsc --watch",
     "start": "node dist/cli.js",
+    "start:debug": "node dist/cli.js 2> debug.log",
     "prepublishOnly": "pnpm run build",
     "version:patch": "pnpm version patch",
     "version:minor": "pnpm version minor",
diff --git a/src/screens/BenchmarkListScreen.tsx b/src/screens/BenchmarkListScreen.tsx
index 3d6fe786..0c6d35c4 100644
--- a/src/screens/BenchmarkListScreen.tsx
+++ b/src/screens/BenchmarkListScreen.tsx
@@ -372,7 +372,7 @@ export function BenchmarkListScreen() {
             dimColor={!showPublic}
           >
             {" "}
-            • {showPublic ? "Public" : "Private"}
+            • {showPublic ? "Public" : "Custom"}
           </Text>
           {totalPages > 1 && (
             <>
@@ -436,7 +436,7 @@ export function BenchmarkListScreen() {
           { key: "Enter", label: "Details" },
           { key: "c", label: "Create Job" },
           { key: "a", label: "Actions" },
-          { key: "t", label: showPublic ? "Private" : "Public" },
+          { key: "t", label: showPublic ? "Custom" : "Public" },
           { key: "/", label: "Search" },
           { key: "Esc", label: "Back" },
         ]}

From 8b46022bc28cbd6078e70a8a9d26e07983b9353b Mon Sep 17 00:00:00 2001
From: Tony Deng <tony@runloop.ai>
Date: Wed, 15 Apr 2026 14:42:25 -0700
Subject: [PATCH 4/5] show benchmarks interactively again

---
 src/components/MainMenu.tsx         | 4 ----
 src/screens/BenchmarkListScreen.tsx | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/components/MainMenu.tsx b/src/components/MainMenu.tsx
index 683b3660..6e456835 100644
--- a/src/components/MainMenu.tsx
+++ b/src/components/MainMenu.tsx
@@ -25,7 +25,6 @@ interface MenuItem {
 }
 
 const allMenuItems: MenuItem[] = [
-  /**
   {
     key: "benchmarks",
     label: "Benchmarks",
@@ -33,7 +32,6 @@ const allMenuItems: MenuItem[] = [
     icon: "▷",
     color: colors.success,
   },
-  */
   {
     key: "devboxes",
     label: "Devboxes",
@@ -207,10 +205,8 @@ export const MainMenu = ({ onSelect }: MainMenuProps) => {
       selectByKey("snapshots");
     } else if (input === "o") {
       selectByKey("objects");
-      /**
     } else if (input === "e") {
       selectByKey("benchmarks");
-    */
     } else if (input === "n") {
       selectByKey("settings");
     } else if (input >= "1" && input <= "9") {
diff --git a/src/screens/BenchmarkListScreen.tsx b/src/screens/BenchmarkListScreen.tsx
index c4144268..bd25cb64 100644
--- a/src/screens/BenchmarkListScreen.tsx
+++ b/src/screens/BenchmarkListScreen.tsx
@@ -36,7 +36,7 @@ export function BenchmarkListScreen() {
   const [selectedIndex, setSelectedIndex] = React.useState(0);
   const [showPopup, setShowPopup] = React.useState(false);
   const [selectedOperation, setSelectedOperation] = React.useState(0);
-  const [showPublic, setShowPublic] = React.useState(false);
+  const [showPublic, setShowPublic] = React.useState(true);
 
   // Search state
   const search = useListSearch({
@@ -347,7 +347,7 @@ export function BenchmarkListScreen() {
           data={benchmarks}
           keyExtractor={(benchmark: Benchmark) => benchmark.id}
           selectedIndex={selectedIndex}
-          title={`benchmarks[${totalCount}] ${showPublic ? "(public)" : "(private)"}`}
+          title={`benchmarks[${totalCount}] ${showPublic ? "(public)" : "(custom)"}`}
           columns={columns}
           emptyState={
             <Text color={colors.textDim}>

From 947a44885da2e0cf31a97018e37315e8995c8e20 Mon Sep 17 00:00:00 2001
From: Rob von Behren <rob@runloop.ai>
Date: Wed, 15 Apr 2026 16:17:48 -0700
Subject: [PATCH 5/5] consolidate boilerplate into helper function

---
 src/screens/BenchmarkJobDetailScreen.tsx |  82 +-------
 src/screens/BenchmarkJobListScreen.tsx   | 237 +----------------------
 src/services/benchmarkJobService.ts      |  78 ++++++++
 3 files changed, 87 insertions(+), 310 deletions(-)

diff --git a/src/screens/BenchmarkJobDetailScreen.tsx b/src/screens/BenchmarkJobDetailScreen.tsx
index cb1ecea1..e9fb2ed6 100644
--- a/src/screens/BenchmarkJobDetailScreen.tsx
+++ b/src/screens/BenchmarkJobDetailScreen.tsx
@@ -16,7 +16,10 @@ import {
   type DetailSection,
   type ResourceOperation,
 } from "../components/ResourceDetailPage.js";
-import { getBenchmarkJob } from "../services/benchmarkJobService.js";
+import {
+  getBenchmarkJob,
+  buildCloneParams,
+} from "../services/benchmarkJobService.js";
 import { getBenchmarkRun } from "../services/benchmarkService.js";
 import { useResourceDetail } from "../hooks/useResourceDetail.js";
 import { SpinnerComponent } from "../components/Spinner.js";
@@ -610,82 +613,7 @@ export function BenchmarkJobDetailScreen({
         });
       }
     } else if (operation === "clone-job") {
-      // Pass job data for cloning
-      const cloneParams: any = {
-        cloneFromJobId: resource.id,
-        cloneJobName: resource.name,
-      };
-
-      // Determine source type and extract IDs
-      if (resource.job_spec) {
-        const spec = resource.job_spec as any;
-
-        // Check if it's a scenarios spec (has scenario_ids array)
-        if (spec.scenario_ids && Array.isArray(spec.scenario_ids)) {
-          cloneParams.cloneSourceType = "scenarios";
-          cloneParams.initialScenarioIds = spec.scenario_ids.join(",");
-        }
-        // Check if it's a benchmark spec (has benchmark_id)
-        else if (spec.benchmark_id) {
-          cloneParams.cloneSourceType = "benchmark";
-          cloneParams.initialBenchmarkIds = spec.benchmark_id;
-        }
-        // Fallback: check job_source
-        else if (resource.job_source) {
-          const source = resource.job_source as any;
-          if (source.scenario_ids && Array.isArray(source.scenario_ids)) {
-            cloneParams.cloneSourceType = "scenarios";
-            cloneParams.initialScenarioIds = source.scenario_ids.join(",");
-          } else if (source.benchmark_id) {
-            cloneParams.cloneSourceType = "benchmark";
-            cloneParams.initialBenchmarkIds = source.benchmark_id;
-          }
-        }
-      }
-
-      // Extract agent configs - both full configs and legacy fields
-      if (resource.job_spec?.agent_configs) {
-        const agentConfigs = resource.job_spec.agent_configs.map((a: any) => {
-          const env = a.agent_environment;
-          const secrets =
-            env?.secrets ??
-            env?.secret_names ??
-            (typeof env?.secret_refs === "object" && env.secret_refs
-              ? env.secret_refs
-              : undefined);
-          return {
-            agentId: a.agent_id,
-            name: a.name,
-            modelName: a.model_name,
-            timeoutSeconds: a.timeout_seconds,
-            kwargs: a.kwargs,
-            environmentVariables: env?.environment_variables,
-            secrets,
-          };
-        });
-        cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
-
-        // Also extract legacy fields for form initialization
-        cloneParams.cloneAgentIds = resource.job_spec.agent_configs
-          .map((a: any) => a.agent_id)
-          .join(",");
-        cloneParams.cloneAgentNames = resource.job_spec.agent_configs
-          .map((a: any) => a.name)
-          .join(",");
-      }
-
-      // Extract orchestrator config
-      if (resource.job_spec?.orchestrator_config) {
-        const orch = resource.job_spec.orchestrator_config;
-        cloneParams.cloneOrchestratorConfig = JSON.stringify({
-          nAttempts: orch.n_attempts,
-          nConcurrentTrials: orch.n_concurrent_trials,
-          quiet: orch.quiet,
-          timeoutMultiplier: orch.timeout_multiplier,
-        });
-      }
-
-      navigate("benchmark-job-create", cloneParams);
+      navigate("benchmark-job-create", buildCloneParams(resource));
     }
   };
 
diff --git a/src/screens/BenchmarkJobListScreen.tsx b/src/screens/BenchmarkJobListScreen.tsx
index b0222eed..82ad69ff 100644
--- a/src/screens/BenchmarkJobListScreen.tsx
+++ b/src/screens/BenchmarkJobListScreen.tsx
@@ -26,6 +26,7 @@ import { useCursorPagination } from "../hooks/useCursorPagination.js";
 import { useListSearch } from "../hooks/useListSearch.js";
 import {
   listBenchmarkJobs,
+  buildCloneParams,
   type BenchmarkJob,
 } from "../services/benchmarkJobService.js";
 
@@ -282,84 +283,7 @@ export function BenchmarkJobListScreen() {
             benchmarkJobId: selectedJob.id,
           });
         } else if (operationKey === "clone_job" && selectedJob) {
-          // Pass job data for cloning
-          const cloneParams: any = {
-            cloneFromJobId: selectedJob.id,
-            cloneJobName: selectedJob.name,
-          };
-
-          // Determine source type and extract IDs
-          if (selectedJob.job_spec) {
-            const spec = selectedJob.job_spec as any;
-
-            // Check if it's a scenarios spec (has scenario_ids array)
-            if (spec.scenario_ids && Array.isArray(spec.scenario_ids)) {
-              cloneParams.cloneSourceType = "scenarios";
-              cloneParams.initialScenarioIds = spec.scenario_ids.join(",");
-            }
-            // Check if it's a benchmark spec (has benchmark_id)
-            else if (spec.benchmark_id) {
-              cloneParams.cloneSourceType = "benchmark";
-              cloneParams.initialBenchmarkIds = spec.benchmark_id;
-            }
-            // Fallback: check job_source
-            else if (selectedJob.job_source) {
-              const source = selectedJob.job_source as any;
-              if (source.scenario_ids && Array.isArray(source.scenario_ids)) {
-                cloneParams.cloneSourceType = "scenarios";
-                cloneParams.initialScenarioIds = source.scenario_ids.join(",");
-              } else if (source.benchmark_id) {
-                cloneParams.cloneSourceType = "benchmark";
-                cloneParams.initialBenchmarkIds = source.benchmark_id;
-              }
-            }
-          }
-
-          // Extract agent configs - both full configs and legacy fields
-          if (selectedJob.job_spec?.agent_configs) {
-            const agentConfigs = selectedJob.job_spec.agent_configs.map(
-              (a: any) => {
-                const env = a.agent_environment;
-                const secrets =
-                  env?.secrets ??
-                  env?.secret_names ??
-                  (typeof env?.secret_refs === "object" && env.secret_refs
-                    ? env.secret_refs
-                    : undefined);
-                return {
-                  agentId: a.agent_id,
-                  name: a.name,
-                  modelName: a.model_name,
-                  timeoutSeconds: a.timeout_seconds,
-                  kwargs: a.kwargs,
-                  environmentVariables: env?.environment_variables,
-                  secrets,
-                };
-              },
-            );
-            cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
-
-            // Also extract legacy fields for form initialization
-            cloneParams.cloneAgentIds = selectedJob.job_spec.agent_configs
-              .map((a: any) => a.agent_id)
-              .join(",");
-            cloneParams.cloneAgentNames = selectedJob.job_spec.agent_configs
-              .map((a: any) => a.name)
-              .join(",");
-          }
-
-          // Extract orchestrator config
-          if (selectedJob.job_spec?.orchestrator_config) {
-            const orch = selectedJob.job_spec.orchestrator_config;
-            cloneParams.cloneOrchestratorConfig = JSON.stringify({
-              nAttempts: orch.n_attempts,
-              nConcurrentTrials: orch.n_concurrent_trials,
-              quiet: orch.quiet,
-              timeoutMultiplier: orch.timeout_multiplier,
-            });
-          }
-
-          navigate("benchmark-job-create", cloneParams);
+          navigate("benchmark-job-create", buildCloneParams(selectedJob));
         }
       } else if (input === "v" && selectedJob) {
         setShowPopup(false);
@@ -368,84 +292,7 @@ export function BenchmarkJobListScreen() {
         });
       } else if (input === "n" && selectedJob) {
         setShowPopup(false);
-        // Clone the selected job
-        const cloneParams: any = {
-          cloneFromJobId: selectedJob.id,
-          cloneJobName: selectedJob.name,
-        };
-
-        // Determine source type and extract IDs
-        if (selectedJob.job_spec) {
-          const spec = selectedJob.job_spec as any;
-
-          // Check if it's a scenarios spec (has scenario_ids array)
-          if (spec.scenario_ids && Array.isArray(spec.scenario_ids)) {
-            cloneParams.cloneSourceType = "scenarios";
-            cloneParams.initialScenarioIds = spec.scenario_ids.join(",");
-          }
-          // Check if it's a benchmark spec (has benchmark_id)
-          else if (spec.benchmark_id) {
-            cloneParams.cloneSourceType = "benchmark";
-            cloneParams.initialBenchmarkIds = spec.benchmark_id;
-          }
-          // Fallback: check job_source
-          else if (selectedJob.job_source) {
-            const source = selectedJob.job_source as any;
-            if (source.scenario_ids && Array.isArray(source.scenario_ids)) {
-              cloneParams.cloneSourceType = "scenarios";
-              cloneParams.initialScenarioIds = source.scenario_ids.join(",");
-            } else if (source.benchmark_id) {
-              cloneParams.cloneSourceType = "benchmark";
-              cloneParams.initialBenchmarkIds = source.benchmark_id;
-            }
-          }
-        }
-
-        // Extract agent configs - both full configs and legacy fields
-        if (selectedJob.job_spec?.agent_configs) {
-          const agentConfigs = selectedJob.job_spec.agent_configs.map(
-            (a: any) => {
-              const env = a.agent_environment;
-              const secrets =
-                env?.secrets ??
-                env?.secret_names ??
-                (typeof env?.secret_refs === "object" && env.secret_refs
-                  ? env.secret_refs
-                  : undefined);
-              return {
-                agentId: a.agent_id,
-                name: a.name,
-                modelName: a.model_name,
-                timeoutSeconds: a.timeout_seconds,
-                kwargs: a.kwargs,
-                environmentVariables: env?.environment_variables,
-                secrets,
-              };
-            },
-          );
-          cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
-
-          // Also extract legacy fields for form initialization
-          cloneParams.cloneAgentIds = selectedJob.job_spec.agent_configs
-            .map((a: any) => a.agent_id)
-            .join(",");
-          cloneParams.cloneAgentNames = selectedJob.job_spec.agent_configs
-            .map((a: any) => a.name)
-            .join(",");
-        }
-
-        // Extract orchestrator config
-        if (selectedJob.job_spec?.orchestrator_config) {
-          const orch = selectedJob.job_spec.orchestrator_config;
-          cloneParams.cloneOrchestratorConfig = JSON.stringify({
-            nAttempts: orch.n_attempts,
-            nConcurrentTrials: orch.n_concurrent_trials,
-            quiet: orch.quiet,
-            timeoutMultiplier: orch.timeout_multiplier,
-          });
-        }
-
-        navigate("benchmark-job-create", cloneParams);
+        navigate("benchmark-job-create", buildCloneParams(selectedJob));
       } else if (key.escape || input === "q") {
         setShowPopup(false);
         setSelectedOperation(0);
@@ -486,83 +333,7 @@ export function BenchmarkJobListScreen() {
     } else if (input === "3") {
       // Quick shortcut to clone the selected job, or create a new job if none selected
       if (selectedJob) {
-        const cloneParams: any = {
-          cloneFromJobId: selectedJob.id,
-          cloneJobName: selectedJob.name,
-        };
-
-        // Determine source type and extract IDs
-        if (selectedJob.job_spec) {
-          const spec = selectedJob.job_spec as any;
-
-          // Check if it's a scenarios spec (has scenario_ids array)
-          if (spec.scenario_ids && Array.isArray(spec.scenario_ids)) {
-            cloneParams.cloneSourceType = "scenarios";
-            cloneParams.initialScenarioIds = spec.scenario_ids.join(",");
-          }
-          // Check if it's a benchmark spec (has benchmark_id)
-          else if (spec.benchmark_id) {
-            cloneParams.cloneSourceType = "benchmark";
-            cloneParams.initialBenchmarkIds = spec.benchmark_id;
-          }
-          // Fallback: check job_source
-          else if (selectedJob.job_source) {
-            const source = selectedJob.job_source as any;
-            if (source.scenario_ids && Array.isArray(source.scenario_ids)) {
-              cloneParams.cloneSourceType = "scenarios";
-              cloneParams.initialScenarioIds = source.scenario_ids.join(",");
-            } else if (source.benchmark_id) {
-              cloneParams.cloneSourceType = "benchmark";
-              cloneParams.initialBenchmarkIds = source.benchmark_id;
-            }
-          }
-        }
-
-        // Extract agent configs - both full configs and legacy fields
-        if (selectedJob.job_spec?.agent_configs) {
-          const agentConfigs = selectedJob.job_spec.agent_configs.map(
-            (a: any) => {
-              const env = a.agent_environment;
-              const secrets =
-                env?.secrets ??
-                env?.secret_names ??
-                (typeof env?.secret_refs === "object" && env.secret_refs
-                  ? env.secret_refs
-                  : undefined);
-              return {
-                agentId: a.agent_id,
-                name: a.name,
-                modelName: a.model_name,
-                timeoutSeconds: a.timeout_seconds,
-                kwargs: a.kwargs,
-                environmentVariables: env?.environment_variables,
-                secrets,
-              };
-            },
-          );
-          cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
-
-          // Also extract legacy fields for form initialization
-          cloneParams.cloneAgentIds = selectedJob.job_spec.agent_configs
-            .map((a: any) => a.agent_id)
-            .join(",");
-          cloneParams.cloneAgentNames = selectedJob.job_spec.agent_configs
-            .map((a: any) => a.name)
-            .join(",");
-        }
-
-        // Extract orchestrator config
-        if (selectedJob.job_spec?.orchestrator_config) {
-          const orch = selectedJob.job_spec.orchestrator_config;
-          cloneParams.cloneOrchestratorConfig = JSON.stringify({
-            nAttempts: orch.n_attempts,
-            nConcurrentTrials: orch.n_concurrent_trials,
-            quiet: orch.quiet,
-            timeoutMultiplier: orch.timeout_multiplier,
-          });
-        }
-
-        navigate("benchmark-job-create", cloneParams);
+        navigate("benchmark-job-create", buildCloneParams(selectedJob));
       } else {
         navigate("benchmark-job-create");
       }
diff --git a/src/services/benchmarkJobService.ts b/src/services/benchmarkJobService.ts
index 9ad150b6..c766cb1f 100644
--- a/src/services/benchmarkJobService.ts
+++ b/src/services/benchmarkJobService.ts
@@ -19,6 +19,84 @@ export type BenchmarkRun = BenchmarkRunView;
 export type ScenarioRun = ScenarioRunView;
 export type { BenchmarkJobCreateParams };
 
+/**
+ * Extract clone parameters from a benchmark job for navigating to the create screen.
+ * Handles source type detection, agent config mapping (with secrets format variants),
+ * and orchestrator config extraction.
+ */
+export function buildCloneParams(job: BenchmarkJob): Record<string, string> {
+  const params: Record<string, string> = {
+    cloneFromJobId: job.id,
+    cloneJobName: job.name ?? "",
+  };
+
+  // Determine source type and extract IDs
+  if (job.job_spec) {
+    const spec = job.job_spec as any;
+
+    if (spec.scenario_ids && Array.isArray(spec.scenario_ids)) {
+      params.cloneSourceType = "scenarios";
+      params.initialScenarioIds = spec.scenario_ids.join(",");
+    } else if (spec.benchmark_id) {
+      params.cloneSourceType = "benchmark";
+      params.initialBenchmarkIds = spec.benchmark_id;
+    } else if (job.job_source) {
+      const source = job.job_source as any;
+      if (source.scenario_ids && Array.isArray(source.scenario_ids)) {
+        params.cloneSourceType = "scenarios";
+        params.initialScenarioIds = source.scenario_ids.join(",");
+      } else if (source.benchmark_id) {
+        params.cloneSourceType = "benchmark";
+        params.initialBenchmarkIds = source.benchmark_id;
+      }
+    }
+  }
+
+  // Extract agent configs
+  if (job.job_spec?.agent_configs) {
+    const agentConfigs = job.job_spec.agent_configs.map((a: any) => {
+      const env = a.agent_environment;
+      const secrets =
+        env?.secrets ??
+        env?.secret_names ??
+        (typeof env?.secret_refs === "object" && env.secret_refs
+          ? env.secret_refs
+          : undefined);
+      return {
+        agentId: a.agent_id,
+        name: a.name,
+        modelName: a.model_name,
+        timeoutSeconds: a.timeout_seconds,
+        kwargs: a.kwargs,
+        environmentVariables: env?.environment_variables,
+        secrets,
+      };
+    });
+    params.cloneAgentConfigs = JSON.stringify(agentConfigs);
+
+    // Also extract legacy fields for form initialization
+    params.cloneAgentIds = job.job_spec.agent_configs
+      .map((a: any) => a.agent_id)
+      .join(",");
+    params.cloneAgentNames = job.job_spec.agent_configs
+      .map((a: any) => a.name)
+      .join(",");
+  }
+
+  // Extract orchestrator config
+  if (job.job_spec?.orchestrator_config) {
+    const orch = job.job_spec.orchestrator_config;
+    params.cloneOrchestratorConfig = JSON.stringify({
+      nAttempts: orch.n_attempts,
+      nConcurrentTrials: orch.n_concurrent_trials,
+      quiet: orch.quiet,
+      timeoutMultiplier: orch.timeout_multiplier,
+    });
+  }
+
+  return params;
+}
+
 export interface ListBenchmarkJobsOptions {
   limit?: number;
   startingAfter?: string;