Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 65 additions & 22 deletions sentience/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)
from .protocols import AsyncBrowserProtocol, BrowserProtocol
from .snapshot import snapshot, snapshot_async
from .snapshot_diff import SnapshotDiff
from .trace_event_builder import TraceEventBuilder

if TYPE_CHECKING:
Expand Down Expand Up @@ -135,6 +136,9 @@ def __init__(
# Step counter for tracing
self._step_count = 0

# Previous snapshot for diff detection
self._previous_snapshot: Snapshot | None = None

def _compute_hash(self, text: str) -> str:
"""Compute SHA256 hash of text."""
return hashlib.sha256(text.encode("utf-8")).hexdigest()
Expand Down Expand Up @@ -235,13 +239,31 @@ def act( # noqa: C901
if snap.status != "success":
raise RuntimeError(f"Snapshot failed: {snap.error}")

# Compute diff_status by comparing with previous snapshot
elements_with_diff = SnapshotDiff.compute_diff_status(snap, self._previous_snapshot)

# Create snapshot with diff_status populated
snap_with_diff = Snapshot(
status=snap.status,
timestamp=snap.timestamp,
url=snap.url,
viewport=snap.viewport,
elements=elements_with_diff,
screenshot=snap.screenshot,
screenshot_format=snap.screenshot_format,
error=snap.error,
)

# Update previous snapshot for next comparison
self._previous_snapshot = snap

# Apply element filtering based on goal
filtered_elements = self.filter_elements(snap, goal)
filtered_elements = self.filter_elements(snap_with_diff, goal)

# Emit snapshot trace event if tracer is enabled
if self.tracer:
# Build snapshot event data
snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
# Build snapshot event data (use snap_with_diff to include diff_status)
snapshot_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)

# Always include screenshot in trace event for studio viewer compatibility
# CloudTraceSink will extract and upload screenshots separately, then remove
Expand Down Expand Up @@ -271,16 +293,16 @@ def act( # noqa: C901
step_id=step_id,
)

# Create filtered snapshot
# Create filtered snapshot (use snap_with_diff to preserve metadata)
filtered_snap = Snapshot(
status=snap.status,
timestamp=snap.timestamp,
url=snap.url,
viewport=snap.viewport,
status=snap_with_diff.status,
timestamp=snap_with_diff.timestamp,
url=snap_with_diff.url,
viewport=snap_with_diff.viewport,
elements=filtered_elements,
screenshot=snap.screenshot,
screenshot_format=snap.screenshot_format,
error=snap.error,
screenshot=snap_with_diff.screenshot,
screenshot_format=snap_with_diff.screenshot_format,
error=snap_with_diff.error,
)

# 2. GROUND: Format elements for LLM context
Expand Down Expand Up @@ -673,6 +695,9 @@ def __init__(
# Step counter for tracing
self._step_count = 0

# Previous snapshot for diff detection
self._previous_snapshot: Snapshot | None = None

def _compute_hash(self, text: str) -> str:
"""Compute SHA256 hash of text."""
return hashlib.sha256(text.encode("utf-8")).hexdigest()
Expand Down Expand Up @@ -773,13 +798,31 @@ async def act( # noqa: C901
if snap.status != "success":
raise RuntimeError(f"Snapshot failed: {snap.error}")

# Compute diff_status by comparing with previous snapshot
elements_with_diff = SnapshotDiff.compute_diff_status(snap, self._previous_snapshot)

# Create snapshot with diff_status populated
snap_with_diff = Snapshot(
status=snap.status,
timestamp=snap.timestamp,
url=snap.url,
viewport=snap.viewport,
elements=elements_with_diff,
screenshot=snap.screenshot,
screenshot_format=snap.screenshot_format,
error=snap.error,
)

# Update previous snapshot for next comparison
self._previous_snapshot = snap

# Apply element filtering based on goal
filtered_elements = self.filter_elements(snap, goal)
filtered_elements = self.filter_elements(snap_with_diff, goal)

# Emit snapshot trace event if tracer is enabled
if self.tracer:
# Build snapshot event data
snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
# Build snapshot event data (use snap_with_diff to include diff_status)
snapshot_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)

# Always include screenshot in trace event for studio viewer compatibility
# CloudTraceSink will extract and upload screenshots separately, then remove
Expand Down Expand Up @@ -809,16 +852,16 @@ async def act( # noqa: C901
step_id=step_id,
)

# Create filtered snapshot
# Create filtered snapshot (use snap_with_diff to preserve metadata)
filtered_snap = Snapshot(
status=snap.status,
timestamp=snap.timestamp,
url=snap.url,
viewport=snap.viewport,
status=snap_with_diff.status,
timestamp=snap_with_diff.timestamp,
url=snap_with_diff.url,
viewport=snap_with_diff.viewport,
elements=filtered_elements,
screenshot=snap.screenshot,
screenshot_format=snap.screenshot_format,
error=snap.error,
screenshot=snap_with_diff.screenshot,
screenshot_format=snap_with_diff.screenshot_format,
error=snap_with_diff.error,
)

# 2. GROUND: Format elements for LLM context
Expand Down
3 changes: 3 additions & 0 deletions sentience/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ class Element(BaseModel):
ml_probability: float | None = None # Confidence score from ONNX model (0.0 - 1.0)
ml_score: float | None = None # Raw logit score (optional, for debugging)

# Diff status for frontend Diff Overlay feature
diff_status: Literal["ADDED", "REMOVED", "MODIFIED", "MOVED"] | None = None


class Snapshot(BaseModel):
"""Snapshot response from extension"""
Expand Down
141 changes: 141 additions & 0 deletions sentience/snapshot_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
Snapshot comparison utilities for diff_status detection.

Implements change detection logic for the Diff Overlay feature.
"""

from typing import Literal

from .models import Element, Snapshot


class SnapshotDiff:
"""
Utility for comparing snapshots and computing diff_status for elements.

Implements the logic described in DIFF_STATUS_GAP_ANALYSIS.md:
- ADDED: Element exists in current but not in previous
- REMOVED: Element existed in previous but not in current
- MODIFIED: Element exists in both but has changed
- MOVED: Element exists in both but position changed
"""

@staticmethod
def _has_bbox_changed(el1: Element, el2: Element, threshold: float = 5.0) -> bool:
"""
Check if element's bounding box has changed significantly.

Args:
el1: First element
el2: Second element
threshold: Position change threshold in pixels (default: 5.0)

Returns:
True if position or size changed beyond threshold
"""
return (
abs(el1.bbox.x - el2.bbox.x) > threshold
or abs(el1.bbox.y - el2.bbox.y) > threshold
or abs(el1.bbox.width - el2.bbox.width) > threshold
or abs(el1.bbox.height - el2.bbox.height) > threshold
)

@staticmethod
def _has_content_changed(el1: Element, el2: Element) -> bool:
"""
Check if element's content has changed.

Args:
el1: First element
el2: Second element

Returns:
True if text, role, or visual properties changed
"""
# Compare text content
if el1.text != el2.text:
return True

# Compare role
if el1.role != el2.role:
return True

# Compare visual cues
if el1.visual_cues.is_primary != el2.visual_cues.is_primary:
return True
if el1.visual_cues.is_clickable != el2.visual_cues.is_clickable:
return True

return False

@staticmethod
def compute_diff_status(
current: Snapshot,
previous: Snapshot | None,
) -> list[Element]:
"""
Compare current snapshot with previous and set diff_status on elements.

Args:
current: Current snapshot
previous: Previous snapshot (None if this is the first snapshot)

Returns:
List of elements with diff_status set (includes REMOVED elements from previous)
"""
# If no previous snapshot, all current elements are ADDED
if previous is None:
result = []
for el in current.elements:
# Create a copy with diff_status set
el_dict = el.model_dump()
el_dict["diff_status"] = "ADDED"
result.append(Element(**el_dict))
return result

# Build lookup maps by element ID
current_by_id = {el.id: el for el in current.elements}
previous_by_id = {el.id: el for el in previous.elements}

current_ids = set(current_by_id.keys())
previous_ids = set(previous_by_id.keys())

result: list[Element] = []

# Process current elements
for el in current.elements:
el_dict = el.model_dump()

if el.id not in previous_ids:
# Element is new - mark as ADDED
el_dict["diff_status"] = "ADDED"
else:
# Element existed before - check for changes
prev_el = previous_by_id[el.id]

bbox_changed = SnapshotDiff._has_bbox_changed(el, prev_el)
content_changed = SnapshotDiff._has_content_changed(el, prev_el)

if bbox_changed and content_changed:
# Both position and content changed - mark as MODIFIED
el_dict["diff_status"] = "MODIFIED"
elif bbox_changed:
# Only position changed - mark as MOVED
el_dict["diff_status"] = "MOVED"
elif content_changed:
# Only content changed - mark as MODIFIED
el_dict["diff_status"] = "MODIFIED"
else:
# No change - don't set diff_status (frontend expects undefined)
el_dict["diff_status"] = None

result.append(Element(**el_dict))

# Process removed elements (existed in previous but not in current)
for prev_id in previous_ids - current_ids:
prev_el = previous_by_id[prev_id]
el_dict = prev_el.model_dump()
el_dict["diff_status"] = "REMOVED"
result.append(Element(**el_dict))

return result
29 changes: 27 additions & 2 deletions sentience/trace_event_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,34 @@ def build_snapshot_event(
Returns:
Dictionary with snapshot event data
"""
# Normalize importance values to importance_score (0-1 range) per snapshot
# Min-max normalization: (value - min) / (max - min)
importance_values = [el.importance for el in snapshot.elements]

if importance_values:
min_importance = min(importance_values)
max_importance = max(importance_values)
importance_range = max_importance - min_importance
else:
min_importance = 0
max_importance = 0
importance_range = 0

# Include ALL elements with full data for DOM tree display
# Use snap.elements (all elements) not filtered_elements
elements_data = [el.model_dump() for el in snapshot.elements]
# Add importance_score field normalized to [0, 1]
elements_data = []
for el in snapshot.elements:
el_dict = el.model_dump()

# Compute normalized importance_score
if importance_range > 0:
importance_score = (el.importance - min_importance) / importance_range
else:
# If all elements have same importance, set to 0.5
importance_score = 0.5

el_dict["importance_score"] = importance_score
elements_data.append(el_dict)

return {
"url": snapshot.url,
Expand Down
1 change: 0 additions & 1 deletion sentience/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
Provides abstract interface and JSONL implementation for emitting trace events.
"""

import json
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
Expand Down
Loading