docker · dgageot · Feb 10, 2026 · Feb 10, 2026 · docker-agent · Feb 10, 2026
@@ -309,7 +309,7 @@ func (r *Runner) runSingleEval(ctx context.Context, evalSess *InputSession) (Res
 		return result, fmt.Errorf("building eval image: %w", err)
 	}
 
-	events, err := r.runCagentInContainer(ctx, imageID, getUserMessages(evalSess.Session))
+	events, err := r.runCagentInContainer(ctx, imageID, getUserMessages(evalSess.Session), evals.Setup)
 	if err != nil {
 		return result, fmt.Errorf("running cagent in container: %w", err)
 	}
@@ -346,7 +346,7 @@ func (r *Runner) runSingleEval(ctx context.Context, evalSess *InputSession) (Res
 	return result, nil
 }
 
-func (r *Runner) runCagentInContainer(ctx context.Context, imageID string, questions []string) ([]map[string]any, error) {
+func (r *Runner) runCagentInContainer(ctx context.Context, imageID string, questions []string, setup string) ([]map[string]any, error) {
 	agentDir := r.agentSource.ParentDir()
 	agentFile := filepath.Base(r.agentSource.Name())
 	containerName := fmt.Sprintf("cagent-eval-%d", uuid.New().ID())
@@ -396,7 +396,31 @@ func (r *Runner) runCagentInContainer(ctx context.Context, imageID string, quest
 		}
 	}
 
-	args = append(args, imageID, "/configs/"+agentFile)
+	// When a setup script is provided, mount it into the container and
+	// override the entrypoint to run it before cagent exec.
+	// The default entrypoint is: /run.sh /cagent exec --yolo --json
+	// /run.sh starts dockerd then exec's "$@".
+	if setup != "" {
+		setupFile := filepath.Join(os.TempDir(), fmt.Sprintf("cagent-eval-setup-%d.sh", uuid.New().ID()))
+		if err := os.WriteFile(setupFile, []byte(setup), 0o600); err != nil {
+			return nil, fmt.Errorf("writing setup script: %w", err)
+		}
+		defer os.Remove(setupFile)
+
+		args = append(args,
+			"-v", setupFile+":/setup.sh:ro",
+			"--entrypoint", "/run.sh",
+		)
+	}
+
+	args = append(args, imageID)
+
+	if setup != "" {
+		// Run setup script, then cagent exec with the original arguments.
+		args = append(args, "sh", "-c", "sh /setup.sh && exec /cagent exec --yolo --json \"$@\"", "--", "/configs/"+agentFile)
+	} else {
+		args = append(args, "/configs/"+agentFile)
+	}
 	args = append(args, questions...)
 
 	cmd := exec.CommandContext(ctx, "docker", args...)

@@ -207,6 +207,7 @@ type EvalCriteria struct {
 	Relevance  []string `json:"relevance"`             // Statements that should be true about the response
 	WorkingDir string   `json:"working_dir,omitempty"` // Subdirectory under evals/working_dirs/
 	Size       string   `json:"size,omitempty"`        // Expected response size: S, M, L, XL
+	Setup      string   `json:"setup,omitempty"`       // Optional sh script to run in the container before cagent exec
 }
 
 // Session helper methods