From a8bb166fee7b0284b319b31acb3a9f1a6757a6cd Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 20:33:03 -0800 Subject: [PATCH 1/3] P7: runtime safety net testing --- .../test_agent_runtime_regression_safety.py | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 tests/unit/test_agent_runtime_regression_safety.py diff --git a/tests/unit/test_agent_runtime_regression_safety.py b/tests/unit/test_agent_runtime_regression_safety.py new file mode 100644 index 0000000..f807c1e --- /dev/null +++ b/tests/unit/test_agent_runtime_regression_safety.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from sentience.agent_runtime import AgentRuntime +from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues +from sentience.verification import AssertContext, AssertOutcome, is_checked, is_disabled, is_enabled, value_contains + + +class MockBackend: + async def screenshot_png(self) -> bytes: + return b"" + + +class MockTracer: + def __init__(self) -> None: + self.events: list[dict] = [] + + def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None: + self.events.append({"type": event_type, "data": data, "step_id": step_id}) + + +def make_element( + element_id: int, + *, + role: str, + text: str | None, + disabled: bool | None = None, + checked: bool | None = None, + value: str | None = None, + input_type: str | None = None, +) -> Element: + return Element( + id=element_id, + role=role, + text=text, + importance=10, + bbox=BBox(x=0, y=0, width=100, height=40), + visual_cues=VisualCues(is_primary=False, is_clickable=True, background_color_name=None), + in_viewport=True, + is_occluded=False, + disabled=disabled, + checked=checked, + value=value, + input_type=input_type, + ) + + +def make_snapshot(elements: list[Element], url: str) -> Snapshot: + return Snapshot( + status="success", + url=url, + elements=elements, + viewport=Viewport(width=1280, height=720), + ) + + +def test_v1_state_assertions_enabled_disabled_checked_value() -> None: + runtime = AgentRuntime(backend=MockBackend(), tracer=MockTracer()) + runtime.begin_step(goal="Test") + + elements = [ + make_element(1, role="button", text="Submit", disabled=False), + make_element(2, role="checkbox", text=None, checked=True), + make_element(3, role="textbox", text=None, value="hello", input_type="text"), + make_element(4, role="button", text="Disabled", disabled=True), + ] + runtime.last_snapshot = make_snapshot(elements, url="https://example.com") + + assert runtime.assert_(is_enabled("text~'Submit'"), label="enabled") is True + assert runtime.assert_(is_disabled("text~'Disabled'"), label="disabled") is True + assert runtime.assert_(is_checked("role=checkbox"), label="checked") is True + assert runtime.assert_(value_contains("role=textbox", "hello"), label="value") is True + + +@pytest.mark.asyncio +async def test_eventually_retry_loop_succeeds() -> None: + tracer = MockTracer() + runtime = AgentRuntime(backend=MockBackend(), tracer=tracer) + runtime.begin_step(goal="Test") + + snaps = [ + make_snapshot([], url="https://example.com"), + make_snapshot([], url="https://example.com"), + make_snapshot([], url="https://example.com/done"), + ] + + async def fake_snapshot(**_kwargs): + runtime.last_snapshot = snaps.pop(0) + return runtime.last_snapshot + + runtime.snapshot = AsyncMock(side_effect=fake_snapshot) # type: ignore[method-assign] + + def pred(ctx: AssertContext) -> AssertOutcome: + ok = (ctx.url or "").endswith("/done") + return AssertOutcome(passed=ok, reason="" if ok else "not done", details={}) + + ok = await runtime.check(pred, label="eventually_done").eventually(timeout_s=2.0, poll_s=0.0) + assert ok is True + + +@pytest.mark.asyncio +async def test_min_confidence_snapshot_exhausted() -> None: + tracer = MockTracer() + runtime = AgentRuntime(backend=MockBackend(), tracer=tracer) + runtime.begin_step(goal="Test") + + low_diag = MagicMock() + low_diag.confidence = 0.1 + low_diag.model_dump = lambda: {"confidence": 0.1} + + snaps = [ + MagicMock(url="https://example.com", elements=[], diagnostics=low_diag), + MagicMock(url="https://example.com", elements=[], diagnostics=low_diag), + ] + + async def fake_snapshot(**_kwargs): + runtime.last_snapshot = snaps.pop(0) + return runtime.last_snapshot + + runtime.snapshot = AsyncMock(side_effect=fake_snapshot) # type: ignore[method-assign] + + def pred(_ctx: AssertContext) -> AssertOutcome: + return AssertOutcome(passed=True, reason="would pass", details={}) + + ok = await runtime.check(pred, label="min_confidence_gate").eventually( + timeout_s=2.0, + poll_s=0.0, + min_confidence=0.7, + max_snapshot_attempts=2, + ) + assert ok is False + details = runtime._assertions_this_step[0]["details"] + assert details["reason_code"] == "snapshot_exhausted" + + +@pytest.mark.asyncio +async def test_golden_flow_same_snapshots_actions_no_captcha() -> None: + tracer = MockTracer() + runtime = AgentRuntime(backend=MockBackend(), tracer=tracer) + runtime.begin_step(goal="Test") + + class FakeActionExecutor: + def __init__(self) -> None: + self.actions: list[str] = [] + + def execute(self, action: str) -> dict: + self.actions.append(action) + return {"success": True} + + executor = FakeActionExecutor() + executor.execute("CLICK(1)") + executor.execute('TYPE(2, "hello")') + assert executor.actions == ["CLICK(1)", 'TYPE(2, "hello")'] + + snaps = [ + make_snapshot([], url="https://example.com"), + make_snapshot([], url="https://example.com/after"), + make_snapshot([], url="https://example.com/done"), + ] + + async def fake_snapshot(**_kwargs): + runtime.last_snapshot = snaps.pop(0) + return runtime.last_snapshot + + runtime.snapshot = AsyncMock(side_effect=fake_snapshot) # type: ignore[method-assign] + + def pred(ctx: AssertContext) -> AssertOutcome: + ok = (ctx.url or "").endswith("/done") + return AssertOutcome(passed=ok, reason="" if ok else "not done", details={}) + + ok = await runtime.check(pred, label="golden_flow").eventually(timeout_s=2.0, poll_s=0.0) + assert ok is True From 2a8b01627586977d3372a2462292af47c41ae530 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 20:48:53 -0800 Subject: [PATCH 2/3] implement policy support for captcha handler with examples --- README.md | 27 +++- examples/agent_runtime_captcha_strategies.py | 53 ++++++++ sentience/__init__.py | 2 + sentience/agent_runtime.py | 124 ++++++++++++++++++ sentience/captcha.py | 53 ++++++++ sentience/captcha_strategies.py | 67 ++++++++++ .../test_agent_runtime_regression_safety.py | 9 +- 7 files changed, 333 insertions(+), 2 deletions(-) create mode 100644 examples/agent_runtime_captcha_strategies.py create mode 100644 sentience/captcha.py create mode 100644 sentience/captcha_strategies.py diff --git a/README.md b/README.md index 66e1a67..f0eab6d 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify brow ```python import asyncio -from sentience import AsyncSentienceBrowser, AgentRuntime +from sentience import AsyncSentienceBrowser, AgentRuntime, CaptchaOptions, HumanHandoffSolver from sentience.verification import ( url_contains, exists, @@ -80,6 +80,11 @@ async def main(): ).eventually(timeout_s=10.0, poll_s=0.25, min_confidence=0.7, max_snapshot_attempts=3) print("eventually() result:", ok) + # CAPTCHA handling (detection + handoff + verify) + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=HumanHandoffSolver()) + ) + # Check task completion if runtime.assert_done(exists("text~'Example'"), label="task_complete"): print("✅ Task completed!") @@ -89,6 +94,26 @@ async def main(): asyncio.run(main()) ``` +#### CAPTCHA strategies (Batteries Included) + +```python +from sentience import CaptchaOptions, ExternalSolver, HumanHandoffSolver, VisionSolver + +# Human-in-loop +runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=HumanHandoffSolver())) + +# Vision verification only +runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=VisionSolver())) + +# External system/webhook +runtime.set_captcha_options( + CaptchaOptions( + policy="callback", + handler=ExternalSolver(lambda ctx: notify_webhook(ctx)), + ) +) +``` + ### Failure Artifact Buffer (Phase 1) Capture a short ring buffer of screenshots and persist them when a required assertion fails. diff --git a/examples/agent_runtime_captcha_strategies.py b/examples/agent_runtime_captcha_strategies.py new file mode 100644 index 0000000..932a0a5 --- /dev/null +++ b/examples/agent_runtime_captcha_strategies.py @@ -0,0 +1,53 @@ +import asyncio +import os + +from sentience import ( + AgentRuntime, + AsyncSentienceBrowser, + CaptchaOptions, + ExternalSolver, + HumanHandoffSolver, + VisionSolver, +) +from sentience.tracing import JsonlTraceSink, Tracer + + +async def notify_webhook(ctx) -> None: + # Example hook: send context to your system (no solver logic in Sentience). + # Replace with your own client / queue / webhook call. + print(f"[captcha] external resolver notified: url={ctx.url} run_id={ctx.run_id}") + + +async def main() -> None: + tracer = Tracer(run_id="captcha-demo", sink=JsonlTraceSink("trace.jsonl")) + + async with AsyncSentienceBrowser() as browser: + page = await browser.new_page() + runtime = await AgentRuntime.from_sentience_browser( + browser=browser, + page=page, + tracer=tracer, + ) + + # Option 1: Human-in-loop + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=HumanHandoffSolver()) + ) + + # Option 2: Vision-only verification (no actions) + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=VisionSolver()) + ) + + # Option 3: External resolver orchestration + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=ExternalSolver(lambda ctx: notify_webhook(ctx))) + ) + + await page.goto(os.environ.get("CAPTCHA_TEST_URL", "https://example.com")) + runtime.begin_step("Captcha-aware snapshot") + await runtime.snapshot() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/sentience/__init__.py b/sentience/__init__.py index 4f8c65d..22ddc06 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -39,6 +39,8 @@ # Agent Layer (Phase 1 & 2) from .base_agent import BaseAgent from .browser import SentienceBrowser +from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution +from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver # Tracing (v0.12.0+) from .cloud_tracing import CloudTraceSink, SentienceLogger diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 668ed58..ee064d1 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -70,6 +70,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any +from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions from .models import Snapshot, SnapshotOptions from .verification import AssertContext, AssertOutcome, Predicate @@ -153,6 +154,10 @@ def __init__( self._task_done: bool = False self._task_done_label: str | None = None + # CAPTCHA handling (optional, disabled by default) + self._captcha_options: CaptchaOptions | None = None + self._captcha_retry_count: int = 0 + @classmethod async def from_sentience_browser( cls, @@ -248,13 +253,132 @@ async def snapshot(self, **kwargs: Any) -> Snapshot: from .backends.snapshot import snapshot as backend_snapshot # Merge default options with call-specific kwargs + skip_captcha_handling = bool(kwargs.pop("_skip_captcha_handling", False)) options_dict = self._snapshot_options.model_dump(exclude_none=True) options_dict.update(kwargs) options = SnapshotOptions(**options_dict) self.last_snapshot = await backend_snapshot(self.backend, options=options) + if not skip_captcha_handling: + await self._handle_captcha_if_needed(self.last_snapshot, source="gateway") return self.last_snapshot + def set_captcha_options(self, options: CaptchaOptions) -> None: + """ + Configure CAPTCHA handling (disabled by default unless set). + """ + self._captcha_options = options + self._captcha_retry_count = 0 + + def _is_captcha_detected(self, snapshot: Snapshot) -> bool: + if not self._captcha_options: + return False + captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None + if not captcha or not getattr(captcha, "detected", False): + return False + confidence = getattr(captcha, "confidence", 0.0) + return confidence >= self._captcha_options.min_confidence + + def _build_captcha_context(self, snapshot: Snapshot, source: str) -> CaptchaContext: + captcha = getattr(snapshot.diagnostics, "captcha", None) + return CaptchaContext( + run_id=self.tracer.run_id, + step_index=self.step_index, + url=snapshot.url, + source=source, # type: ignore[arg-type] + captcha=captcha, + ) + + def _emit_captcha_event(self, reason_code: str, details: dict[str, Any] | None = None) -> None: + payload = { + "kind": "captcha", + "passed": False, + "label": reason_code, + "details": {"reason_code": reason_code, **(details or {})}, + } + self.tracer.emit("verification", data=payload, step_id=self.step_id) + + async def _handle_captcha_if_needed(self, snapshot: Snapshot, source: str) -> None: + if not self._captcha_options: + return + if not self._is_captcha_detected(snapshot): + return + + captcha = getattr(snapshot.diagnostics, "captcha", None) + self._emit_captcha_event( + "captcha_detected", + {"captcha": getattr(captcha, "model_dump", lambda: captcha)()}, + ) + + resolution: CaptchaResolution + if self._captcha_options.policy == "callback": + if not self._captcha_options.handler: + self._emit_captcha_event("captcha_handler_error") + raise CaptchaHandlingError( + "captcha_handler_error", + 'Captcha handler is required for policy="callback".', + ) + try: + resolution = await self._captcha_options.handler( + self._build_captcha_context(snapshot, source) + ) + except Exception as exc: # pragma: no cover - defensive + self._emit_captcha_event("captcha_handler_error", {"error": str(exc)}) + raise CaptchaHandlingError( + "captcha_handler_error", "Captcha handler failed." + ) from exc + else: + resolution = CaptchaResolution(action="abort") + + await self._apply_captcha_resolution(resolution, snapshot, source) + + async def _apply_captcha_resolution( + self, + resolution: CaptchaResolution, + snapshot: Snapshot, + source: str, + ) -> None: + if resolution.action == "abort": + self._emit_captcha_event("captcha_policy_abort", {"message": resolution.message}) + raise CaptchaHandlingError( + "captcha_policy_abort", + resolution.message or "Captcha detected. Aborting per policy.", + ) + + if resolution.action == "retry_new_session": + self._captcha_retry_count += 1 + self._emit_captcha_event("captcha_retry_new_session") + if self._captcha_retry_count > self._captcha_options.max_retries_new_session: + self._emit_captcha_event("captcha_retry_exhausted") + raise CaptchaHandlingError( + "captcha_retry_exhausted", + "Captcha retry_new_session exhausted.", + ) + if not self._captcha_options.reset_session: + raise CaptchaHandlingError( + "captcha_retry_new_session", + "reset_session callback is required for retry_new_session.", + ) + await self._captcha_options.reset_session() + return + + if resolution.action == "wait_until_cleared": + timeout_ms = resolution.timeout_ms or self._captcha_options.timeout_ms + poll_ms = resolution.poll_ms or self._captcha_options.poll_ms + await self._wait_until_cleared(timeout_ms=timeout_ms, poll_ms=poll_ms, source=source) + self._emit_captcha_event("captcha_resumed") + + async def _wait_until_cleared(self, *, timeout_ms: int, poll_ms: int, source: str) -> None: + deadline = time.time() + timeout_ms / 1000.0 + while time.time() <= deadline: + await asyncio.sleep(poll_ms / 1000.0) + snap = await self.snapshot(_skip_captcha_handling=True) + if not self._is_captcha_detected(snap): + self._emit_captcha_event("captcha_cleared", {"source": source}) + return + self._emit_captcha_event("captcha_wait_timeout", {"timeout_ms": timeout_ms}) + raise CaptchaHandlingError("captcha_wait_timeout", "Captcha wait_until_cleared timed out.") + async def enable_failure_artifacts( self, options: FailureArtifactsOptions | None = None, diff --git a/sentience/captcha.py b/sentience/captcha.py new file mode 100644 index 0000000..6bdc68c --- /dev/null +++ b/sentience/captcha.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Awaitable, Callable, Literal, Optional + +from .models import CaptchaDiagnostics + +CaptchaPolicy = Literal["abort", "callback"] +CaptchaAction = Literal["abort", "retry_new_session", "wait_until_cleared"] +CaptchaSource = Literal["extension", "gateway", "runtime"] + + +@dataclass +class CaptchaContext: + run_id: str + step_index: int + url: str + source: CaptchaSource + captcha: CaptchaDiagnostics + screenshot_path: Optional[str] = None + frames_dir: Optional[str] = None + snapshot_path: Optional[str] = None + live_session_url: Optional[str] = None + meta: Optional[dict[str, str]] = None + + +@dataclass +class CaptchaResolution: + action: CaptchaAction + message: Optional[str] = None + handled_by: Optional[Literal["human", "customer_system", "unknown"]] = None + timeout_ms: Optional[int] = None + poll_ms: Optional[int] = None + + +CaptchaHandler = Callable[[CaptchaContext], CaptchaResolution | Awaitable[CaptchaResolution]] + + +@dataclass +class CaptchaOptions: + policy: CaptchaPolicy = "abort" + min_confidence: float = 0.7 + timeout_ms: int = 120_000 + poll_ms: int = 1_000 + max_retries_new_session: int = 1 + handler: Optional[CaptchaHandler] = None + reset_session: Optional[Callable[[], Awaitable[None]]] = None + + +class CaptchaHandlingError(RuntimeError): + def __init__(self, reason_code: str, message: str) -> None: + super().__init__(message) + self.reason_code = reason_code diff --git a/sentience/captcha_strategies.py b/sentience/captcha_strategies.py new file mode 100644 index 0000000..f0a9cb1 --- /dev/null +++ b/sentience/captcha_strategies.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import inspect +from typing import Callable + +from .captcha import CaptchaContext, CaptchaHandler, CaptchaResolution + + +def HumanHandoffSolver( + *, + message: str | None = None, + handled_by: str | None = "human", + timeout_ms: int | None = None, + poll_ms: int | None = None, +) -> CaptchaHandler: + async def _handler(_ctx: CaptchaContext) -> CaptchaResolution: + return CaptchaResolution( + action="wait_until_cleared", + message=message or "Solve CAPTCHA in the live session, then resume.", + handled_by=handled_by, + timeout_ms=timeout_ms, + poll_ms=poll_ms, + ) + + return _handler + + +def VisionSolver( + *, + message: str | None = None, + handled_by: str | None = "customer_system", + timeout_ms: int | None = None, + poll_ms: int | None = None, +) -> CaptchaHandler: + async def _handler(_ctx: CaptchaContext) -> CaptchaResolution: + return CaptchaResolution( + action="wait_until_cleared", + message=message or "Waiting for CAPTCHA to clear (vision verification).", + handled_by=handled_by, + timeout_ms=timeout_ms, + poll_ms=poll_ms, + ) + + return _handler + + +def ExternalSolver( + resolver: Callable[[CaptchaContext], None | bool | dict], + *, + message: str | None = None, + handled_by: str | None = "customer_system", + timeout_ms: int | None = None, + poll_ms: int | None = None, +) -> CaptchaHandler: + async def _handler(ctx: CaptchaContext) -> CaptchaResolution: + result = resolver(ctx) + if inspect.isawaitable(result): + await result + return CaptchaResolution( + action="wait_until_cleared", + message=message or "External solver invoked; waiting for clearance.", + handled_by=handled_by, + timeout_ms=timeout_ms, + poll_ms=poll_ms, + ) + + return _handler diff --git a/tests/unit/test_agent_runtime_regression_safety.py b/tests/unit/test_agent_runtime_regression_safety.py index f807c1e..c82a3fe 100644 --- a/tests/unit/test_agent_runtime_regression_safety.py +++ b/tests/unit/test_agent_runtime_regression_safety.py @@ -6,7 +6,14 @@ from sentience.agent_runtime import AgentRuntime from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues -from sentience.verification import AssertContext, AssertOutcome, is_checked, is_disabled, is_enabled, value_contains +from sentience.verification import ( + AssertContext, + AssertOutcome, + is_checked, + is_disabled, + is_enabled, + value_contains, +) class MockBackend: From 9576f5ecd061e711ea1e15d190829616f47181d9 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 21:16:24 -0800 Subject: [PATCH 3/3] updated trace_v1 schema --- sentience/schemas/trace_v1.json | 42 ++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json index 37c28cb..25c3d20 100644 --- a/sentience/schemas/trace_v1.json +++ b/sentience/schemas/trace_v1.json @@ -73,6 +73,46 @@ "url": {"type": ["string", "null"]}, "element_count": {"type": "integer"}, "timestamp": {"type": ["string", "null"]}, + "diagnostics": { + "type": ["object", "null"], + "properties": { + "confidence": {"type": ["number", "null"]}, + "reasons": {"type": "array", "items": {"type": "string"}}, + "metrics": { + "type": ["object", "null"], + "properties": { + "ready_state": {"type": ["string", "null"]}, + "quiet_ms": {"type": ["number", "null"]}, + "node_count": {"type": ["integer", "null"]}, + "interactive_count": {"type": ["integer", "null"]}, + "raw_elements_count": {"type": ["integer", "null"]} + }, + "additionalProperties": true + }, + "captcha": { + "type": ["object", "null"], + "properties": { + "detected": {"type": "boolean"}, + "provider_hint": { + "type": ["string", "null"], + "enum": ["recaptcha", "hcaptcha", "turnstile", "arkose", "awswaf", "unknown", null] + }, + "confidence": {"type": "number"}, + "evidence": { + "type": "object", + "properties": { + "text_hits": {"type": "array", "items": {"type": "string"}}, + "selector_hits": {"type": "array", "items": {"type": "string"}}, + "iframe_src_hits": {"type": "array", "items": {"type": "string"}}, + "url_hits": {"type": "array", "items": {"type": "string"}} + } + } + }, + "required": ["detected", "confidence", "evidence"] + } + }, + "additionalProperties": true + }, "elements": { "type": "array", "items": { @@ -289,7 +329,7 @@ "passed": {"type": "boolean"}, "kind": { "type": "string", - "enum": ["assert", "task_done"], + "enum": ["assert", "task_done", "captcha"], "description": "Type of verification event" }, "label": {"type": "string", "description": "Human-readable label for the assertion"},