PredicateSystems · rcholic · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/sentience/__init__.py b/sentience/__init__.py
@@ -14,9 +14,6 @@
 from .cloud_tracing import CloudTraceSink, SentienceLogger
 from .conversational_agent import ConversationalAgent
 from .expect import expect
-
-# Formatting (v0.12.0+)
-from .formatting import format_snapshot_for_llm
 from .generator import ScriptGenerator, generate
 from .inspector import Inspector, inspect
 from .llm_provider import (
@@ -62,13 +59,17 @@
 from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
 
 # Utilities (v0.12.0+)
+# Import from utils package (re-exports from submodules for backward compatibility)
 from .utils import (
     canonical_snapshot_loose,
     canonical_snapshot_strict,
     compute_snapshot_digests,
     save_storage_state,
     sha256_digest,
 )
+
+# Formatting (v0.12.0+)
+from .utils.formatting import format_snapshot_for_llm
 from .wait import wait_for
 
 __version__ = "0.91.1"

diff --git a/sentience/action_executor.py b/sentience/action_executor.py
@@ -0,0 +1,191 @@
+"""
+Action Executor for Sentience Agent.
+
+Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
+This separates action execution concerns from LLM interaction.
+"""
+
+import re
+from typing import Any
+
+from .actions import click, click_async, press, press_async, type_text, type_text_async
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .models import Snapshot
+
+
+class ActionExecutor:
+    """
+    Executes actions and handles parsing of action command strings.
+
+    This class encapsulates all action execution logic, making it easier to:
+    - Test action execution independently
+    - Add new action types in one place
+    - Handle action parsing errors consistently
+    """
+
+    def __init__(self, browser: SentienceBrowser | AsyncSentienceBrowser):
+        """
+        Initialize action executor.
+
+        Args:
+            browser: SentienceBrowser or AsyncSentienceBrowser instance
+        """
+        self.browser = browser
+        self._is_async = isinstance(browser, AsyncSentienceBrowser)
+
+    def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (synchronous).
+
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+
+        Returns:
+            Execution result dictionary with keys:
+            - success: bool
+            - action: str (e.g., "click", "type", "press", "finish")
+            - element_id: Optional[int] (for click/type actions)
+            - text: Optional[str] (for type actions)
+            - key: Optional[str] (for press actions)
+            - outcome: Optional[str] (action outcome)
+            - url_changed: Optional[bool] (for click actions)
+            - error: Optional[str] (if action failed)
+            - message: Optional[str] (for finish action)
+
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on async browser (use execute_async instead)
+        """
+        if self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute() called on async browser. Use execute_async() instead."
+            )
+
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = click(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = type_text(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = press(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )
+
+    async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (asynchronous).
+
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+
+        Returns:
+            Execution result dictionary (same format as execute())
+
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on sync browser (use execute() instead)
+        """
+        if not self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute_async() called on sync browser. Use execute() instead."
+            )
+
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = await click_async(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = await type_text_async(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = await press_async(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )