diff --git a/.gitignore b/.gitignore index fc7b86e..d312f63 100644 --- a/.gitignore +++ b/.gitignore @@ -208,4 +208,5 @@ __marimo__/ # Anton .anton/ -.DS_Store \ No newline at end of file +.DS_Store +artifacts/ diff --git a/anton/chat.py b/anton/chat.py index 94e65ad..ec68cfe 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -324,6 +324,22 @@ async def _handle_connect( ) +def _is_publishable_html(html_path: Path, output_dir: Path) -> bool: + """Check if an HTML file is publishable. + + Returns False if: + - HTML is in a subdirectory that contains .py files (fullstack app) + + Returns True if: + - HTML is in the root of output/ + - HTML is in a subdirectory without any .py files + """ + if html_path.parent == output_dir: + return True + + parent_dir = html_path.parent + has_py_files = any(parent_dir.glob("*.py")) + return not has_py_files def _extract_html_title(path, re_module) -> str: @@ -459,12 +475,19 @@ async def _handle_publish( if not target.is_absolute(): target = Path(settings.workspace_path) / file_arg else: - # Recursively list HTML files under any artifact, sorted by mtime. - html_files = sorted( - artifacts_root.rglob("*.html"), key=lambda f: f.stat().st_mtime, reverse=True - ) if artifacts_root.is_dir() else [] + # Recursively list publishable HTML files under any artifact, sorted by mtime. + if artifacts_root.is_dir(): + all_html = list(artifacts_root.rglob("*.html")) + html_files = sorted( + [f for f in all_html if _is_publishable_html(f, artifacts_root)], + key=lambda f: f.stat().st_mtime, + reverse=True, + ) + else: + html_files = [] + if not html_files: - console.print(f" [anton.warning]No HTML files found under {artifacts_root}/[/]") + console.print(f" [anton.warning]No publishable HTML files found under {artifacts_root}/[/]") console.print() return @@ -478,9 +501,10 @@ async def _handle_publish( console.print(" [anton.cyan]Available reports:[/]") console.print() for i, f in enumerate(page, offset + 1): + rel_path = f.relative_to(artifacts_root).as_posix() title = _extract_html_title(f, re) label = title or f.name - console.print(f" [bold]{i}[/] {label} [anton.muted]{f.name}[/]") + console.print(f" [bold]{i}[/] {label} [anton.muted]{rel_path}[/]") if has_more: console.print(f"\n [anton.muted]m Show more ({len(html_files) - offset - PAGE_SIZE} remaining)[/]") @@ -512,6 +536,14 @@ async def _handle_publish( console.print() return + # Check if file is publishable + if not _is_publishable_html(target, artifacts_root): + console.print(" [anton.error]Cannot publish this HTML file:[/]") + console.print(" It is in a directory with Python files (fullstack application).") + console.print(" Only standalone HTML reports can be published.") + console.print() + return + # 3. Check if this file was previously published published_json = publish_index_dir / ".published.json" published_map = {} @@ -522,7 +554,7 @@ async def _handle_publish( pass report_id = None - file_key = target.name + file_key = target.relative_to(artifacts_root).as_posix() prev = published_map.get(file_key) if prev and prev.get("report_id"): @@ -1135,7 +1167,6 @@ async def _chat_loop( # Build runtime context so the LLM knows what it's running on runtime_context = build_runtime_context(settings) - artifacts_path = f"{settings.artifacts_dir.rstrip('/')}/" from anton.chat_session import get_runtime_factory session = ChatSession(ChatSessionConfig( @@ -1146,21 +1177,13 @@ async def _chat_loop( episodic=episodic, system_prompt_context=SystemPromptContext( runtime_context=runtime_context, - # See `chat_session.create_session` for the full version - # of this prompt fragment — both call sites use the same - # artifact-flow guidance. - output_context=( - f"User-facing artifacts live under `{artifacts_path}`. " - "Before producing one, call `create_artifact(name, description, type)`; " - "the tool returns the absolute folder path you should write into. " - "To modify an existing artifact, use `list_artifacts` then `open_artifact(slug)`." - ), ), workspace=workspace, console=console, history_store=history_store, session_id=current_session_id, proactive_dashboards=settings.proactive_dashboards, + output_dir=settings.artifacts_dir, tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL], )) diff --git a/anton/chat_session.py b/anton/chat_session.py index 7dde5d3..14d8785 100644 --- a/anton/chat_session.py +++ b/anton/chat_session.py @@ -101,7 +101,6 @@ def rebuild_session( refresh_knowledge(settings, cortex) runtime_context = build_runtime_context(settings) - artifacts_path = f"{settings.artifacts_dir.rstrip('/')}/" return ChatSession(ChatSessionConfig( llm_client=state["llm_client"], runtime_factory=get_runtime_factory(settings), @@ -110,20 +109,11 @@ def rebuild_session( episodic=episodic, system_prompt_context=SystemPromptContext( runtime_context=runtime_context, - # Tell the agent where artifacts live + how to claim a folder. - # `create_artifact` returns the actual path to write into; - # `` here is just so the LLM has the - # workspace anchor in mind when picking names. - output_context=( - f"User-facing artifacts live under `{artifacts_path}`. " - "Before producing one, call `create_artifact(name, description, type)`; " - "the tool returns the absolute folder path you should write into. " - "To modify an existing artifact, use `list_artifacts` then `open_artifact(slug)`." - ), ), workspace=workspace, console=console, history_store=history_store, session_id=session_id, proactive_dashboards=settings.proactive_dashboards, + output_dir=settings.artifacts_dir, )) diff --git a/anton/core/artifacts/models.py b/anton/core/artifacts/models.py index 96bd314..5940dbe 100644 --- a/anton/core/artifacts/models.py +++ b/anton/core/artifacts/models.py @@ -108,6 +108,7 @@ class Artifact(BaseModel): # most cases — they generally know the filename they're going # to write). primary: str | None = None + port: int | None = None # ── Server-managed contents ───────────────────────────────── files: list[FileEntry] = Field(default_factory=list) diff --git a/anton/core/artifacts/store.py b/anton/core/artifacts/store.py index 636a691..4553622 100644 --- a/anton/core/artifacts/store.py +++ b/anton/core/artifacts/store.py @@ -58,6 +58,9 @@ def _new_id() -> str: return uuid.uuid4().hex[:8] +_UNSET = object() + + def _sanitize_slug(value: str) -> str: """Map any name to a folder-safe slug. @@ -172,20 +175,29 @@ def create( self._save(artifact) return artifact - def set_primary(self, slug: str, primary: str | None) -> Artifact | None: - """Update the primary-file pointer on an existing artifact. + def update( + self, + slug: str, + *, + primary: str | None = _UNSET, # type: ignore[assignment] + port: int | None = _UNSET, # type: ignore[assignment] + ) -> Artifact | None: + """Update mutable agent-supplied fields on an existing artifact. - Used when the agent created with no `primary` and decided - later, or when the primary file got renamed. Pass `None` to - clear (the renderer reverts to the heuristic). Returns the - updated artifact, or None when the slug is missing. + Only fields explicitly passed are modified; omitted fields are + left unchanged. Pass `primary=None` or `primary=""` to clear + the entry-point pointer. Pass `port=None` to clear the port. + Returns the updated artifact, or None when the slug is missing. """ artifact = self._load_silent(slug) if artifact is None: return None - artifact.primary = ( - primary.strip() if isinstance(primary, str) and primary.strip() else None - ) + if primary is not _UNSET: + artifact.primary = ( + primary.strip() if isinstance(primary, str) and primary.strip() else None + ) + if port is not _UNSET: + artifact.port = int(port) if port is not None else None artifact.updatedAt = _utc_now() self._save(artifact) return artifact diff --git a/anton/core/backends/base.py b/anton/core/backends/base.py index c45a114..77148f6 100644 --- a/anton/core/backends/base.py +++ b/anton/core/backends/base.py @@ -92,6 +92,16 @@ async def cleanup(self) -> None: Unlike close(), cleanup() removes persistent storage too. """ + def venv_python(self) -> str | None: + """Path to the runtime's Python interpreter, if locally accessible. + + Used by tools (e.g. launch_backend) that need to spawn auxiliary + processes sharing the scratchpad's installed packages. Returns + None for runtimes whose interpreter isn't reachable from the + host process (e.g. remote / Lightsail backends). + """ + return None + async def execute( self, code: str, diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py index d3fdccf..411d26b 100644 --- a/anton/core/backends/local.py +++ b/anton/core/backends/local.py @@ -161,6 +161,31 @@ def _create_venv(self) -> None: bin_dir = os.path.join(self._venv_dir, "bin") self._venv_python = os.path.join(bin_dir, "python") + def venv_python(self) -> str | None: + """Public accessor for the scratchpad's Python interpreter path. + + Returns None when the venv has not been provisioned yet (i.e. + no exec has run). Auxiliary tools that want to share installed + packages call this to discover the interpreter. + """ + if self._venv_python and os.path.isfile(self._venv_python): + return self._venv_python + return None + + def ensure_venv(self) -> str | None: + """Provision the venv on disk (recycle if present, create if not) and + return its python interpreter path. + + Public counterpart to the internal `_ensure_venv` used by `start()` + and `install_packages`. Exposed for callers that need only the venv + — not the full runtime sidecar — to spawn auxiliary processes + (e.g. cowork's artifact backend relaunch). Cheap when the venv + already exists; falls back to a fresh `uv venv` / `python -m venv` + otherwise. + """ + self._ensure_venv() + return self.venv_python() + def _verify_venv_python(self) -> bool: if self._venv_python is None: return False diff --git a/anton/core/backends/manager.py b/anton/core/backends/manager.py index f1d7d7f..3c68487 100644 --- a/anton/core/backends/manager.py +++ b/anton/core/backends/manager.py @@ -84,3 +84,14 @@ async def close_all(self) -> None: for pad in self._pads.values(): await pad.close() self._pads.clear() + + async def venv_python(self, name: str = "main") -> str | None: + """Return the Python interpreter path of the named scratchpad. + + Provisions the scratchpad on demand so callers don't have to + synchronize with whatever cell the LLM happens to be running. + Returns None when the runtime can't expose a local interpreter + (e.g. remote backends). + """ + pad = await self.get_or_create(name) + return pad.venv_python() diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py index d7340fe..5899133 100644 --- a/anton/core/llm/prompt_builder.py +++ b/anton/core/llm/prompt_builder.py @@ -5,6 +5,7 @@ from .prompts import ( BASE_VISUALIZATIONS_PROMPT, + BACKEND_GENERATION_PROMPT, CHAT_SYSTEM_PROMPT, VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT, VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT, @@ -19,18 +20,15 @@ class SystemPromptContext: """Bundled prompt-injection points for the system prompt. - Four levels with increasing importance (later = stronger influence): + Three levels with increasing importance (later = stronger influence): 1. ``prefix`` — prepended before the base prompt 2. ``runtime_context`` — interpolated into the RUNTIME IDENTITY section - 3. ``output_context`` — free-text instructions on where to - store generated resources (visualizations, HTML files, data exports) - 4. ``suffix`` — appended after all other sections + 3. ``suffix`` — appended after all other sections """ runtime_context: str = "" prefix: str = "" suffix: str = "" - output_context: str = "" class ChatSystemPromptBuilder: @@ -110,7 +108,7 @@ def _build_visualizations_section( self, *, proactive_dashboards: bool, - output_context: str, + output_dir: str, ) -> str: visualizations_output_format_prompt = ( VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT @@ -118,7 +116,7 @@ def _build_visualizations_section( else VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT ) output_format = visualizations_output_format_prompt.format( - output_context=output_context, + output_dir=output_dir, ) return BASE_VISUALIZATIONS_PROMPT.format(output_format=output_format) @@ -128,6 +126,7 @@ def build( current_datetime: str, system_prompt_context: SystemPromptContext, proactive_dashboards: bool, + output_dir: str, tool_defs: list["ToolDef"] | None = None, memory_context: str = "", project_context: str = "", @@ -137,7 +136,7 @@ def build( ) -> str: visualizations_section = self._build_visualizations_section( proactive_dashboards=proactive_dashboards, - output_context=system_prompt_context.output_context, + output_dir=output_dir, ) prompt = "" @@ -152,6 +151,8 @@ def build( current_datetime=current_datetime, ) + prompt += "\n\n" + BACKEND_GENERATION_PROMPT.format(output_dir=output_dir) + tool_prompts = self._build_tool_prompts_section(tool_defs) if tool_prompts: prompt += tool_prompts diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py index ae3bf9e..745141a 100644 --- a/anton/core/llm/prompts.py +++ b/anton/core/llm/prompts.py @@ -320,7 +320,7 @@ Output format: - Unless the user explicitly asks for a different format, always output visualizations \ as polished, single-file HTML pages — never raw PNGs or bare image files. -{output_context} +Save output to `{output_dir}` (create it if needed). Visual design: - Make it look good by default. Use a dark theme (#0d1117 background, #e6edf3 text), \ @@ -405,13 +405,121 @@ - For large datasets, summarize the top N and offer to show more. - When the user EXPLICITLY asks for a chart, dashboard, plot, or HTML visualization, \ THEN build it as a self-contained HTML file with inlined CSS, JS, and data. \ -{output_context} +Save output to `{output_dir}` (create it if needed). Use Apache ECharts (CDN), dark theme (#0d1117), and follow standard dashboard best practices. \ If the dataset is very large (>100KB), write it to a separate .js file in the same directory. \ Never split CSS or chart logic into separate files — only large data payloads.\ """ +BACKEND_GENERATION_PROMPT = """\ +BACKEND & FULLSTACK APPLICATION GENERATION: + +When the user asks to build a backend service, web application with a backend, or stateless \ +API-driven system, follow this workflow: + +1. REGISTER THE ARTIFACT: Call the `create_artifact` tool BEFORE creating any files. \ +This creates the folder, `metadata.json`, and `README.md` automatically and returns the \ +absolute folder path. Use that path for ALL subsequent file writes. + - `name`: short human-readable app name (e.g. "Weather Dashboard") + - `description`: one sentence describing what the app does + - `type`: always `"fullstack-stateful-app"` — every app built here requires a backend process + - `primary`: set to `"index.html"` when you know that will be the frontend entry-point + For EDITING an existing app: call `list_artifacts` first to find it, then \ +`open_artifact(slug)` to get the folder path — do NOT call `create_artifact` again. + +2. TECHNICAL SPECIFICATION (as a system analyst): Create a brief technical specification for \ +the application. The specification MUST include: + - Brief description of what the application does (keep it concise) + - Core features and requirements + - REST API specification in markdown format with: + * Endpoints and HTTP methods + * Request/response schemas (JSON examples) + * Error handling + - Framework choice: PREFER Python's built-in http.server or http module if possible. \ + If that's insufficient, use Bottle (simplest, minimal surface area). \ + Only use FastAPI/Flask if the requirements demand it. + - Key dependencies and libraries needed + +3. FETCH & VALIDATE SAMPLE DATA: Using the scratchpad tool: + - Fetch representative sample data from the user's data source (API, database, file) + - Get enough data to understand: structure, data types, volume, and shape + - Answer these questions: + * Is the fetched data sufficient for building the application per the spec? + * Can this data type be used to implement the API as designed? + * Do we need different/more data, or should the spec be revised? + - If the answer to any question is "no" — go back to step 2 and revise the technical \ + specification based on what you learned about the actual data + +4. IMPLEMENT BACKEND: In a scratchpad **named exactly the artifact slug** \ +(use the `slug` returned by `create_artifact` / `open_artifact` as the scratchpad \ +name), implement the backend code. `launch_backend` runs the backend in this same \ +scratchpad's venv, so any packages you install or imports you test here will be \ +present at launch. + - Write the complete backend application (http.server, Bottle, Flask, FastAPI, etc.) + - Save it to `/backend.py`, where `` is the folder \ +path returned by `create_artifact` in step 1 + - If the backend uses any non-stdlib libraries (Bottle, Flask, FastAPI, requests, \ +pandas, etc.), save a `requirements.txt` in the same directory listing them — \ +one package spec per line (`pkg` or `pkg==1.2`). `launch_backend` reads this file \ +and installs everything into the slug-named scratchpad before spawning the process. \ +Note: only simple lines are supported — `-r`, `-e`, `--index-url` are ignored, as \ +are blank lines and `#` comments. If the backend uses ONLY the Python standard \ +library (http.server, json, sqlite3, etc.), do NOT create requirements.txt. + - The backend MUST accept `--port` via argparse and bind to that port. \ +NEVER hardcode the port — `launch_backend` picks a free one and passes it in. + - The backend serves the frontend at `/` (single-origin, no CORS for stateless backends) + - Do NOT start the server inside the scratchpad — use `launch_backend` in step 6. + +5. BUILD FRONTEND (if needed): In a separate scratchpad: + - Build a single-file HTML dashboard or web interface + - Include all CSS and JS inlined (no external file references) + - Follow the VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT guidelines + - Save to `/index.html` (or frontend.html) + - API calls MUST use RELATIVE paths only (e.g. `fetch('/api/items')`, NOT \ +`fetch('http://localhost:PORT/api/items')` and NOT any hardcoded base URL). \ +The frontend is served by the same backend at `/`, so relative paths resolve to the \ +correct origin automatically — this keeps the app portable across ports and hosts. + +6. LAUNCH THE BACKEND: Call the `launch_backend` tool with the artifact's slug: + - `launch_backend(slug=)` — the tool picks a free port, spawns \ +`python backend.py --port ` as a standalone process with `` as cwd, \ +waits for readiness, writes the port into `metadata.json`, and returns \ +`{{slug, port, pid, url, log_path}}` as JSON. + - Uses the scratchpad named `` — created automatically on first call. If \ +`/requirements.txt` exists, its packages are installed into that \ +scratchpad's venv before spawn (install output is appended to `backend.log` with a \ +banner). An install failure aborts the launch and is returned as an error string — \ +fix `requirements.txt` and retry. + - Backend stdout/stderr stream to `/backend.log` — read it if \ +the launch fails or the API misbehaves. + - Do NOT call `update_artifact(port=...)` manually — `launch_backend` does it. + - The launched process outlives the scratchpad cell and is reaped automatically \ +when the Anton session ends. + - Calling `launch_backend` again for the same slug terminates the previous \ +process and starts a fresh one — use this for hot reloads after code changes. + +7. PREVIEW THE APPLICATION: Direct the user to the `url` returned by `launch_backend` \ +(e.g. http://127.0.0.1:54321): + - CRITICAL: Open that URL, NOT the HTML file from disk (file://...). \ +The backend serves the frontend at `/`, so opening the URL loads the page and \ +its `fetch()` calls land on the same origin. + - If the user opens the HTML file directly from disk, `fetch()` calls fail due \ +to browser CORS/file:// restrictions. + +DEPLOYMENT NOTES: +- Backend must be stateless (no mutable global state that matters across requests) +- All data persistence should go through the user's connected data sources (databases, APIs) +- The backend process shuts down when the Anton CLI session ends (per MVP constraints) +- For production, the user must deploy the backend.py file to their own infrastructure + +PUBLISH OR SHARE: +- Publishing is disabled for this MVP (per constraints), but preview is fully supported +- After building, offer to preview the frontend by directing the user to the \ +URL returned by `launch_backend` +- The backend must be running for the frontend to work +""" + CONSOLIDATION_PROMPT = """\ You are a memory consolidation system for an AI coding assistant. diff --git a/anton/core/session.py b/anton/core/session.py index d012718..6436959 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -28,12 +28,13 @@ from anton.core.tools.registry import ToolRegistry from anton.core.tools.tool_defs import ( CREATE_ARTIFACT_TOOL, + LAUNCH_BACKEND_TOOL, LIST_ARTIFACTS_TOOL, MEMORIZE_TOOL, OPEN_ARTIFACT_TOOL, RECALL_TOOL, SCRATCHPAD_TOOL, - SET_ARTIFACT_PRIMARY_TOOL, + UPDATE_ARTIFACT_METADATA_TOOL, ToolDef, ) from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result @@ -83,6 +84,7 @@ class ChatSessionConfig: session_id: str | None = None proactive_dashboards: bool = False tools: list[ToolDef] = field(default_factory=list) + output_dir: str = ".anton/output" class ChatSession: @@ -101,6 +103,7 @@ def __init__(self, config: ChatSessionConfig) -> None: self._cortex = config.cortex self._episodic = config.episodic self._system_prompt_context = config.system_prompt_context + self._output_dir = config.output_dir self._proactive_dashboards = config.proactive_dashboards self._extra_tools = config.tools self._workspace = config.workspace @@ -160,6 +163,11 @@ def __init__(self, config: ChatSessionConfig) -> None: # at the start of each turn. Prevents double-summarization when # the post-recovery response still reports high pressure. self._compacted_this_turn = False + # Backends launched via the launch_backend tool. Keyed by + # artifact slug; each entry holds the asyncio.subprocess.Process + # plus its port. Reaped in close() so backend processes don't + # outlive the chat session. + self._tracked_backends: dict[str, dict] = {} @property def history(self) -> list[dict]: @@ -453,6 +461,7 @@ async def _build_system_prompt(self, user_message: str = "") -> str: current_datetime=_current_datetime, system_prompt_context=self._system_prompt_context, proactive_dashboards=self._proactive_dashboards, + output_dir=self._output_dir, tool_defs=self.tool_registry.get_tool_defs(), memory_context=memory_section, project_context=md_context, @@ -561,12 +570,35 @@ def _build_core_tools(self) -> None: self.tool_registry.register_tool(CREATE_ARTIFACT_TOOL) self.tool_registry.register_tool(LIST_ARTIFACTS_TOOL) self.tool_registry.register_tool(OPEN_ARTIFACT_TOOL) - self.tool_registry.register_tool(SET_ARTIFACT_PRIMARY_TOOL) + self.tool_registry.register_tool(UPDATE_ARTIFACT_METADATA_TOOL) + self.tool_registry.register_tool(LAUNCH_BACKEND_TOOL) async def close(self) -> None: """Clean up scratchpads and other resources.""" + await self._reap_tracked_backends() await self._scratchpads.close_all() + async def _reap_tracked_backends(self) -> None: + """Terminate every backend launched via launch_backend. + + SIGTERM first, then SIGKILL after a short grace period. Errors + are swallowed — close() must not raise on shutdown. + """ + for slug, info in list(self._tracked_backends.items()): + proc = info.get("proc") + if proc is None or proc.returncode is not None: + continue + try: + proc.terminate() + try: + await asyncio.wait_for(proc.wait(), timeout=3) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + except (ProcessLookupError, OSError): + pass + self._tracked_backends.clear() + async def _summarize_history(self) -> None: """Compress old conversation turns into a summary using the coding model. diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py index 3bd0d79..cb07891 100644 --- a/anton/core/tools/tool_defs.py +++ b/anton/core/tools/tool_defs.py @@ -1,11 +1,12 @@ from anton.core.tools.tool_handlers import ( handle_create_artifact, + handle_launch_backend, handle_list_artifacts, handle_memorize, handle_open_artifact, handle_recall, handle_scratchpad, - handle_set_artifact_primary, + handle_update_artifact_metadata, ) from dataclasses import dataclass @@ -164,7 +165,7 @@ class ToolDef: "`\"index.html\"` for a fullstack app, `\"report.pdf\"` for a " "document. The renderer uses it to decide what to open by default. " "Skip when you don't know yet — the renderer falls back to a " - "heuristic, and you can set it later via `set_artifact_primary`.\n\n" + "heuristic, and you can set it later via `update_artifact`.\n\n" "To MODIFY an existing artifact instead of creating a new one, call " "`list_artifacts` first to find it, then `open_artifact(slug)` to get " "the path." @@ -203,15 +204,15 @@ class ToolDef: ) -SET_ARTIFACT_PRIMARY_TOOL = ToolDef( - name="set_artifact_primary", +UPDATE_ARTIFACT_METADATA_TOOL = ToolDef( + name="update_artifact", description=( - "Update the primary-file pointer on an existing artifact. Call this " - "when you created the artifact without a `primary` and now know what " - "it should be, or when the entry-point file's name changed. Pass an " - "empty string or omit `primary` to clear (the renderer reverts to " - "its heuristic — `index.html` → newest `.html` → newest non-" - "housekeeping file)." + "Update mutable fields on an existing artifact. Pass only the fields you want to change.\n\n" + "- `primary`: relative path of the entry-point file (e.g. \"index.html\"). " + "Pass empty string to clear (renderer reverts to heuristic: " + "`index.html` → newest `.html` → newest non-housekeeping file).\n" + "- `port`: port the backend process is listening on (fullstack-stateful-app only). " + "Set this after the server confirms it is up." ), input_schema={ "type": "object", @@ -222,12 +223,16 @@ class ToolDef: }, "primary": { "type": "string", - "description": "Relative path of the new entry-point file. Empty string to clear.", + "description": "Relative path of the entry-point file. Empty string to clear.", + }, + "port": { + "type": "integer", + "description": "Port number the backend process is listening on.", }, }, "required": ["slug"], }, - handler=handle_set_artifact_primary, + handler=handle_update_artifact_metadata, ) @@ -272,6 +277,60 @@ class ToolDef: ) +LAUNCH_BACKEND_TOOL = ToolDef( + name="launch_backend", + description=( + "Start an artifact's backend script as a standalone subprocess. " + "Picks a free TCP port, runs the script with `--port ` " + "(plus any `extra_args`), waits until the server is reachable, " + "records the port in the artifact's `metadata.json`, and returns " + "`{slug, port, pid, url, log_path}` as JSON.\n\n" + "Runs in a scratchpad named exactly `` (created on first call). " + "If `/requirements.txt` exists, its package lines are " + "installed into that scratchpad's venv before spawn — install output " + "appended to `backend.log`, install failures abort the launch and are " + "returned as an error string. Only simple lines are supported " + "(`pkg` / `pkg==1.2`); blank lines, `#` comments, and `-`-prefixed " + "flags (`-r`, `-e`, `--index-url`) are ignored.\n\n" + "Idempotent: a second call with the same slug terminates the " + "previously-launched backend before starting a new one.\n\n" + "Requirements on the backend script:\n" + "- MUST accept `--port` via argparse (or equivalent) and bind to it.\n" + "- MUST be reachable at `health_path` (default `/`) within " + "`health_timeout` seconds.\n" + "- stdout/stderr stream to `/backend.log`." + ), + input_schema={ + "type": "object", + "properties": { + "slug": { + "type": "string", + "description": "Folder slug of the artifact whose backend to launch.", + }, + "path": { + "type": "string", + "description": "Backend script path relative to the artifact folder. Default: \"backend.py\".", + }, + "extra_args": { + "type": "array", + "items": {"type": "string"}, + "description": "Additional CLI arguments appended after `--port `.", + }, + "health_path": { + "type": "string", + "description": "URL path for the readiness probe. Default: \"/\". Any HTTP response (including 4xx) counts as ready.", + }, + "health_timeout": { + "type": "number", + "description": "Seconds to wait for readiness before failing. Default: 10.", + }, + }, + "required": ["slug"], + }, + handler=handle_launch_backend, +) + + RECALL_TOOL = ToolDef( name="recall", description=( diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py index 8ba6946..64e1e43 100644 --- a/anton/core/tools/tool_handlers.py +++ b/anton/core/tools/tool_handlers.py @@ -117,13 +117,12 @@ async def handle_create_artifact(session: "ChatSession", tc_input: dict) -> str: }, indent=2) -async def handle_set_artifact_primary(session: "ChatSession", tc_input: dict) -> str: - """Update or clear the primary-file pointer on an existing artifact. +async def handle_update_artifact_metadata(session: "ChatSession", tc_input: dict) -> str: + """Update mutable metadata fields on an existing artifact. - The agent calls this when it created an artifact without a - primary and now knows what it should be, or when the primary - file's name changed. Pass `primary: null` to clear and revert - the renderer to its heuristic. + Only fields present in the input are modified. Supports: + - `primary`: entry-point file path (empty string to clear) + - `port`: backend port number (fullstack-stateful-app only) """ import json @@ -134,17 +133,84 @@ async def handle_set_artifact_primary(session: "ChatSession", tc_input: dict) -> slug = (tc_input.get("slug") or "").strip() if not slug: return "Error: `slug` is required." - raw = tc_input.get("primary") - primary = raw if isinstance(raw, str) else None - artifact = store.set_primary(slug, primary) + + kwargs: dict = {} + if "primary" in tc_input: + kwargs["primary"] = tc_input["primary"] + if "port" in tc_input: + kwargs["port"] = tc_input["port"] + + artifact = store.update(slug, **kwargs) if artifact is None: return f"Error: no artifact found for slug `{slug}`." return json.dumps({ "slug": artifact.slug, "primary": artifact.primary, + "port": artifact.port, }, indent=2) +async def handle_launch_backend(session: "ChatSession", tc_input: dict) -> str: + """Launch the artifact's backend script as a standalone subprocess. + + Thin wrapper over `launch_artifact_backend`: validates tool-call shape, + resolves the artifact folder via the session's ArtifactStore, hands + the session's scratchpad pool + tracked-backends dict to the helper, + then persists the discovered port into metadata.json. + + The actual subprocess lifecycle (free-port discovery, dependency + install, health probe, idempotent reap) lives in + `anton.core.artifacts.backend_launcher.launch_artifact_backend` so + other entry points (e.g. cowork's auto-relaunch) can reuse it. + """ + import json + + from anton.core.artifacts.backend_launcher import launch_artifact_backend + + store = _artifact_store(session) + if store is None: + return "Artifact store unavailable (no workspace bound to this session)." + + slug = (tc_input.get("slug") or "").strip() + if not slug: + return "Error: `slug` is required." + artifact = store.open(slug) + if artifact is None: + return f"Error: no artifact found for slug `{slug}`." + + rel_path = (tc_input.get("path") or "backend.py").strip() + extra_args = tc_input.get("extra_args") or [] + health_path = tc_input.get("health_path") or "/" + try: + health_timeout = float(tc_input.get("health_timeout", 10)) + except (TypeError, ValueError): + return "Error: `health_timeout` must be a number." + + tracked = getattr(session, "_tracked_backends", None) + if tracked is None: + tracked = {} + session._tracked_backends = tracked + + result = await launch_artifact_backend( + slug=slug, + artifact_folder=store.folder_for(slug), + scratchpad_pool=session._scratchpads, + tracked_backends=tracked, + path=rel_path, + extra_args=extra_args, + health_path=health_path, + health_timeout=health_timeout, + ) + if isinstance(result, str): + return result + + store.update(slug, port=result["port"]) + return json.dumps( + {k: v for k, v in result.items() if k != "proc"}, + indent=2, + ) + + async def handle_list_artifacts(session: "ChatSession", tc_input: dict) -> str: """List every artifact in the workspace, newest first. diff --git a/tests/test_artifacts.py b/tests/test_artifacts.py index 551f7e7..9753c25 100644 --- a/tests/test_artifacts.py +++ b/tests/test_artifacts.py @@ -373,9 +373,9 @@ def test_create_strips_blank_primary(store: ArtifactStore): assert artifact.primary is None -def test_set_primary_updates(store: ArtifactStore): +def test_update_primary(store: ArtifactStore): artifact = store.create(name="X", description="x", type="html-app") - updated = store.set_primary(artifact.slug, "main.html") + updated = store.update(artifact.slug, primary="main.html") assert updated is not None assert updated.primary == "main.html" # Persisted: re-loading the same slug returns the new value. @@ -383,22 +383,48 @@ def test_set_primary_updates(store: ArtifactStore): assert reloaded.primary == "main.html" -def test_set_primary_clears_with_none(store: ArtifactStore): +def test_update_primary_clears_with_none(store: ArtifactStore): artifact = store.create( name="X", description="x", type="html-app", primary="dashboard.html", ) - cleared = store.set_primary(artifact.slug, None) + cleared = store.update(artifact.slug, primary=None) assert cleared.primary is None - # Empty string is also treated as "clear" — same intent the - # tool's input schema documents. + # Empty string is also treated as "clear". artifact2 = store.create( name="Y", description="x", type="html-app", primary="dashboard.html", ) - cleared2 = store.set_primary(artifact2.slug, " ") + cleared2 = store.update(artifact2.slug, primary=" ") assert cleared2.primary is None -def test_set_primary_returns_none_for_missing_slug(store: ArtifactStore): - assert store.set_primary("does-not-exist", "main.html") is None +def test_update_port(store: ArtifactStore): + artifact = store.create(name="App", description="x", type="fullstack-stateful-app") + updated = store.update(artifact.slug, port=8080) + assert updated is not None + assert updated.port == 8080 + reloaded = store.open(artifact.slug) + assert reloaded.port == 8080 + + +def test_update_primary_and_port_together(store: ArtifactStore): + artifact = store.create(name="App", description="x", type="fullstack-stateful-app") + updated = store.update(artifact.slug, primary="index.html", port=5000) + assert updated.primary == "index.html" + assert updated.port == 5000 + + +def test_update_omitted_field_unchanged(store: ArtifactStore): + artifact = store.create( + name="App", description="x", type="fullstack-stateful-app", + primary="index.html", + ) + # Updating only port must not touch primary. + updated = store.update(artifact.slug, port=3000) + assert updated.primary == "index.html" + assert updated.port == 3000 + + +def test_update_returns_none_for_missing_slug(store: ArtifactStore): + assert store.update("does-not-exist", primary="main.html") is None diff --git a/tests/test_prompt_builder_skills.py b/tests/test_prompt_builder_skills.py index 6796e16..0dbc600 100644 --- a/tests/test_prompt_builder_skills.py +++ b/tests/test_prompt_builder_skills.py @@ -42,6 +42,7 @@ def _build_prompt(builder: ChatSystemPromptBuilder, **overrides) -> str: current_datetime="2026-04-10T12:00:00+00:00", system_prompt_context=SystemPromptContext(runtime_context="test runtime"), proactive_dashboards=False, + output_dir="", ) defaults.update(overrides) return builder.build(**defaults) diff --git a/tests/test_publish_api_key.py b/tests/test_publish_api_key.py index 866bc4d..5e07a81 100644 --- a/tests/test_publish_api_key.py +++ b/tests/test_publish_api_key.py @@ -16,7 +16,8 @@ def _make_settings(tmp_path: Path, api_key: str | None = None) -> MagicMock: settings = MagicMock() settings.minds_api_key = api_key - settings.workspace_path = tmp_path + settings.workspace_path = str(tmp_path) + settings.artifacts_dir = str(tmp_path / "artifacts") settings.publish_url = "https://4nton.ai" settings.minds_ssl_verify = True return settings @@ -35,9 +36,9 @@ def _make_console() -> MagicMock: def _make_html_file(tmp_path: Path) -> Path: - output_dir = tmp_path / ".anton" / "output" - output_dir.mkdir(parents=True) - html = output_dir / "report.html" + artifacts_dir = tmp_path / "artifacts" + artifacts_dir.mkdir(parents=True) + html = artifacts_dir / "report.html" html.write_text("Test") return html diff --git a/tests/test_session_skills_init.py b/tests/test_session_skills_init.py index 76c584a..e542a61 100644 --- a/tests/test_session_skills_init.py +++ b/tests/test_session_skills_init.py @@ -69,6 +69,7 @@ def test_section_appears_when_store_passed( current_datetime="2026-04-10", system_prompt_context=SystemPromptContext(runtime_context="test"), proactive_dashboards=False, + output_dir="", skill_store=store_with_one_skill, ) assert "## Procedural memory" in prompt @@ -80,6 +81,7 @@ def test_section_omitted_when_no_store(self): current_datetime="2026-04-10", system_prompt_context=SystemPromptContext(runtime_context="test"), proactive_dashboards=False, + output_dir="", skill_store=None, ) assert "Procedural memory" not in prompt diff --git a/tests/test_skills_e2e.py b/tests/test_skills_e2e.py index 3c673ac..f4bfb4a 100644 --- a/tests/test_skills_e2e.py +++ b/tests/test_skills_e2e.py @@ -125,6 +125,7 @@ async def test_full_skills_loop(console, store_root): current_datetime="2026-04-10T13:00:00+00:00", system_prompt_context=SystemPromptContext(runtime_context="test"), proactive_dashboards=False, + output_dir="", skill_store=fresh_store, ) assert "## Procedural memory" in prompt