From 1b037575d75831efa704d1c83e6a44ecc44131ab Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 8 Mar 2026 17:38:34 -0400 Subject: [PATCH 1/4] fix: browser capture end-to-end pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs prevented browser events from being captured and parsed: 1. background.js only relayed DOM_EVENT messages but the content script sends USER_EVENT — events were silently dropped. 2. background.js handleSetMode only read message.payload?.mode but the recorder sends flat {mode: "record"} — mode was never set to "record" so the content script never attached record listeners. 3. The BrowserEventType enum used "browser.click" prefix format but the content script sends raw DOM event names ("click", "keydown", etc.). This was an artificial convention introduced during the port from legacy OpenAdapt that was never tested end-to-end. Legacy used raw names throughout. Changes: - background.js: add USER_EVENT relay, fix SET_MODE format handling - browser_events.py: change enum values to raw DOM names matching the content script and legacy OpenAdapt, add BrowserMouseMoveEvent - capture.py: add _parse_element_ref() and rewrite _convert_browser_event() to handle actual content-script message format including the recorder's {"message": } wrapper, add browser_events() and browser_event_count to CaptureSession - cli.py: add --browser-events flag to record, show browser event breakdown in info command - tests: add 15 e2e tests covering both DB roundtrip and raw content-script format parsing Verified with live recording: 84/84 events captured and parsed from Chrome extension on Hacker News. Co-Authored-By: Claude Opus 4.6 --- chrome_extension/background.js | 3 +- openadapt_capture/browser_events.py | 41 +- openadapt_capture/capture.py | 218 ++++++++++ openadapt_capture/cli.py | 17 + tests/test_browser_capture_e2e.py | 594 ++++++++++++++++++++++++++++ 5 files changed, 862 insertions(+), 11 deletions(-) create mode 100644 tests/test_browser_capture_e2e.py diff --git a/chrome_extension/background.js b/chrome_extension/background.js index 404426e..e4a816a 100644 --- a/chrome_extension/background.js +++ b/chrome_extension/background.js @@ -137,7 +137,7 @@ function handleServerMessage(data) { * Handle SET_MODE message from server */ function handleSetMode(message) { - const newMode = message.payload?.mode || 'idle'; + const newMode = message.mode || message.payload?.mode || 'idle'; currentMode = newMode; console.log('[OpenAdapt] Mode set to:', currentMode); @@ -231,6 +231,7 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { const tabId = sender.tab?.id; switch (message.type) { + case 'USER_EVENT': case 'DOM_EVENT': // Add tab ID and relay to server message.tabId = tabId; diff --git a/openadapt_capture/browser_events.py b/openadapt_capture/browser_events.py index 39b3a09..c03630f 100644 --- a/openadapt_capture/browser_events.py +++ b/openadapt_capture/browser_events.py @@ -18,22 +18,27 @@ class BrowserEventType(str, Enum): - """Browser event type identifiers.""" + """Browser event type identifiers. + + Values match the raw DOM event names sent by the Chrome extension + content script (e.g. "click", "keydown"), consistent with legacy OpenAdapt. + """ # User interaction events - CLICK = "browser.click" - KEYDOWN = "browser.keydown" - KEYUP = "browser.keyup" - SCROLL = "browser.scroll" - INPUT = "browser.input" - FOCUS = "browser.focus" - BLUR = "browser.blur" + CLICK = "click" + KEYDOWN = "keydown" + KEYUP = "keyup" + SCROLL = "scroll" + INPUT = "input" + MOUSEMOVE = "mousemove" + FOCUS = "focus" + BLUR = "blur" # Navigation events - NAVIGATE = "browser.navigate" + NAVIGATE = "navigate" # Unknown/generic - UNKNOWN = "browser.unknown" + UNKNOWN = "unknown" class NavigationType(str, Enum): @@ -242,6 +247,21 @@ class BrowserNavigationEvent(BaseBrowserEvent): # ============================================================================= +class BrowserMouseMoveEvent(BaseBrowserEvent): + """Mouse move event in browser.""" + + type: Literal[BrowserEventType.MOUSEMOVE] = BrowserEventType.MOUSEMOVE + + # Coordinates + client_x: float = Field(description="Viewport X") + client_y: float = Field(description="Viewport Y") + screen_x: float = Field(default=0, description="Screen X") + screen_y: float = Field(default=0, description="Screen Y") + + # Target element + element: SemanticElementRef | None = Field(default=None) + + class BrowserFocusEvent(BaseBrowserEvent): """Element focus/blur event in browser.""" @@ -292,5 +312,6 @@ class DOMSnapshot(BaseModel): | BrowserScrollEvent | BrowserInputEvent | BrowserNavigationEvent + | BrowserMouseMoveEvent | BrowserFocusEvent ) diff --git a/openadapt_capture/capture.py b/openadapt_capture/capture.py index 36a175b..893d73d 100644 --- a/openadapt_capture/capture.py +++ b/openadapt_capture/capture.py @@ -9,6 +9,16 @@ from pathlib import Path from typing import TYPE_CHECKING, Iterator +from openadapt_capture.browser_events import ( + BrowserClickEvent, + BrowserEventType, + BrowserFocusEvent, + BrowserInputEvent, + BrowserKeyEvent, + BrowserMouseMoveEvent, + BrowserNavigationEvent, + BrowserScrollEvent, +) from openadapt_capture.events import ( ActionEvent as PydanticActionEvent, ) @@ -27,6 +37,8 @@ if TYPE_CHECKING: from PIL import Image + from openadapt_capture.browser_events import BrowserEvent + def _convert_action_event(db_event) -> PydanticActionEvent | None: """Convert a SQLAlchemy ActionEvent to a Pydantic event. @@ -99,6 +111,191 @@ def _convert_action_event(db_event) -> PydanticActionEvent | None: return None +def _parse_element_ref(raw: dict | None): + """Parse a raw element dict into a SemanticElementRef. + + Handles both the canonical format (from browser_bridge.py tests) and the + raw content-script format which uses different field names. + """ + from openadapt_capture.browser_events import ( + BoundingBox, + ElementState, + SemanticElementRef, + ) + + if not raw or not isinstance(raw, dict): + return None + + bbox_raw = raw.get("bbox", {}) + bbox = BoundingBox( + x=bbox_raw.get("x", 0), + y=bbox_raw.get("y", 0), + width=bbox_raw.get("width", 0), + height=bbox_raw.get("height", 0), + ) + + state_raw = raw.get("state", {}) + state = ElementState( + enabled=state_raw.get("enabled", True), + focused=state_raw.get("focused", False), + visible=state_raw.get("visible", True), + checked=state_raw.get("checked"), + selected=state_raw.get("selected"), + expanded=state_raw.get("expanded"), + value=state_raw.get("value"), + ) if isinstance(state_raw, dict) else ElementState() + + return SemanticElementRef( + role=raw.get("role") or "", + name=raw.get("name") or "", + bbox=bbox, + xpath=raw.get("xpath") or raw.get("dataId") or "", + css_selector=raw.get("cssSelector") or raw.get("css_selector") or "", + state=state, + tag_name=raw.get("tagName") or raw.get("tag_name") or "", + id=raw.get("id"), + class_list=raw.get("classList") or raw.get("class_list") or [], + ) + + +def _convert_browser_event(db_event) -> "BrowserEvent | None": + """Convert a SQLAlchemy BrowserEvent to a typed Pydantic browser event. + + The DB stores browser events as JSON in the `message` field. The recorder + wraps each raw WebSocket message as ``{"message": }``. + + Handles both the canonical ``browser.*`` event-type prefix format (used in + tests and browser_bridge.py) and the raw content-script format which sends + plain event names like ``"click"``, ``"keydown"``, ``"scroll"``. + + Args: + db_event: SQLAlchemy BrowserEvent instance. + + Returns: + Typed browser event or None if parsing fails. + """ + msg = db_event.message + if not isinstance(msg, dict): + return None + + # Unwrap the recorder's {"message": } wrapper + inner = msg.get("message", msg) + if not isinstance(inner, dict): + return None + + # Support both flat (content-script) and payload-wrapped (browser_bridge) formats + payload = inner.get("payload", inner) + + raw_type = payload.get("eventType", inner.get("eventType", "")) + try: + event_type = BrowserEventType(raw_type) + except ValueError: + return None + + timestamp = db_event.timestamp or 0 + url = payload.get("url", inner.get("url", "")) + tab_id = inner.get("tabId", payload.get("tab_id", 0)) + + try: + if event_type == BrowserEventType.CLICK: + elem = _parse_element_ref(payload.get("element")) + if elem is None: + return None + return BrowserClickEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + client_x=payload.get("clientX", 0), + client_y=payload.get("clientY", 0), + page_x=payload.get("pageX", payload.get("clientX", 0)), + page_y=payload.get("pageY", payload.get("clientY", 0)), + button=payload.get("button", 0), + click_count=payload.get("clickCount", 1), + element=elem, + ) + elif event_type in (BrowserEventType.KEYDOWN, BrowserEventType.KEYUP): + element = _parse_element_ref(payload.get("element")) + return BrowserKeyEvent( + timestamp=timestamp, + type=event_type, + url=url, + tab_id=tab_id, + key=payload.get("key", ""), + code=payload.get("code", ""), + key_code=payload.get("keyCode", 0), + shift_key=payload.get("shiftKey", False), + ctrl_key=payload.get("ctrlKey", False), + alt_key=payload.get("altKey", False), + meta_key=payload.get("metaKey", False), + element=element, + ) + elif event_type == BrowserEventType.SCROLL: + return BrowserScrollEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + scroll_x=payload.get("scrollX", 0), + scroll_y=payload.get("scrollY", 0), + delta_x=payload.get("deltaX", payload.get("scrollDeltaX", 0)), + delta_y=payload.get("deltaY", payload.get("scrollDeltaY", 0)), + ) + elif event_type == BrowserEventType.INPUT: + elem = _parse_element_ref(payload.get("element")) + if elem is None: + return None + return BrowserInputEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + input_type=payload.get("inputType", ""), + data=payload.get("data"), + value=payload.get("value", ""), + element=elem, + ) + elif event_type == BrowserEventType.NAVIGATE: + from openadapt_capture.browser_events import NavigationType + + nav_type = payload.get("navigationType", "link") + valid = [e.value for e in NavigationType] + return BrowserNavigationEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + previous_url=payload.get("previousUrl", ""), + navigation_type=( + NavigationType(nav_type) + if nav_type in valid + else NavigationType.LINK + ), + ) + elif event_type == BrowserEventType.MOUSEMOVE: + element = _parse_element_ref(payload.get("element")) + return BrowserMouseMoveEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + client_x=payload.get("clientX", 0), + client_y=payload.get("clientY", 0), + screen_x=payload.get("screenX", 0), + screen_y=payload.get("screenY", 0), + element=element, + ) + elif event_type in (BrowserEventType.FOCUS, BrowserEventType.BLUR): + elem = _parse_element_ref(payload.get("element")) + if elem is None: + return None + return BrowserFocusEvent( + timestamp=timestamp, + type=event_type, + url=url, + tab_id=tab_id, + element=elem, + ) + except Exception: + pass + return None + + @dataclass class Action: """A processed action event with associated screenshot. @@ -385,6 +582,27 @@ def actions(self, include_moves: bool = False) -> Iterator[Action]: continue yield Action(event=event, _capture=self) + def browser_events(self) -> list["BrowserEvent"]: + """Get all browser events as typed Pydantic models. + + Parses the JSON message field from each stored BrowserEvent into + the appropriate typed event (BrowserClickEvent, BrowserKeyEvent, etc.). + + Returns: + List of typed browser events, ordered by timestamp. + """ + events: list[BrowserEvent] = [] + for db_event in self._recording.browser_events: + parsed = _convert_browser_event(db_event) + if parsed is not None: + events.append(parsed) + return events + + @property + def browser_event_count(self) -> int: + """Number of browser events in this capture.""" + return len(self._recording.browser_events) + def get_frame_at(self, timestamp: float, tolerance: float = 0.5) -> "Image" | None: """Get the screen frame closest to a timestamp. diff --git a/openadapt_capture/cli.py b/openadapt_capture/cli.py index a96d203..cd7e391 100644 --- a/openadapt_capture/cli.py +++ b/openadapt_capture/cli.py @@ -17,6 +17,7 @@ def record( video: bool = True, audio: bool = False, images: bool = False, + browser_events: bool = False, send_profile: bool = False, ) -> None: """Record GUI interactions. @@ -27,6 +28,9 @@ def record( video: Capture video (default: True). audio: Capture audio (default: False). images: Save screenshots as PNGs (default: False). + browser_events: Capture browser DOM events via Chrome extension (default: False). + Requires the openadapt-capture Chrome extension to be installed and + connects via WebSocket on localhost:8765. send_profile: Send profiling data via wormhole after recording (default: False). """ import time @@ -36,6 +40,9 @@ def record( output_dir = str(Path(output_dir).resolve()) print(f"Recording to: {output_dir}") + if browser_events: + print("Browser event capture enabled (WebSocket on localhost:8765)") + print("Make sure the openadapt-capture Chrome extension is installed.") print("Press Ctrl+C or type stop sequence to stop recording...") print() @@ -45,6 +52,7 @@ def record( capture_video=video, capture_audio=audio, capture_images=images, + capture_browser_events=browser_events, send_profile=send_profile, ) as recorder: recorder.wait_for_ready() @@ -129,6 +137,7 @@ def info(capture_dir: str) -> None: # Count events actions = list(capture.actions()) print(f"Actions: {len(actions)}") + print(f"Browser events: {capture.browser_event_count}") # Event type breakdown from collections import Counter @@ -138,6 +147,14 @@ def info(capture_dir: str) -> None: for event_type, count in types.most_common(): print(f" {event_type}: {count}") + # Browser event breakdown + if capture.browser_event_count > 0: + browser_events = capture.browser_events() + btypes = Counter(type(e).__name__ for e in browser_events) + print("Browser event types:") + for btype, count in btypes.most_common(): + print(f" {btype}: {count}") + def transcribe( capture_dir: str, diff --git a/tests/test_browser_capture_e2e.py b/tests/test_browser_capture_e2e.py new file mode 100644 index 0000000..31d2bc4 --- /dev/null +++ b/tests/test_browser_capture_e2e.py @@ -0,0 +1,594 @@ +"""End-to-end tests for browser event capture pipeline. + +Tests the full flow: store browser events in DB → load via CaptureSession → +iterate as typed Pydantic models. +""" + +import json +import time + +import pytest + +from openadapt_capture.browser_events import ( + BrowserClickEvent, + BrowserEventType, + BrowserInputEvent, + BrowserKeyEvent, + BrowserMouseMoveEvent, + BrowserNavigationEvent, + BrowserScrollEvent, +) +from openadapt_capture.capture import CaptureSession +from openadapt_capture.db import create_db +from openadapt_capture.db.crud import insert_browser_event, insert_recording +from openadapt_capture.db.models import Recording + + +def _make_element_payload( + role="button", + name="Submit", + tag="button", + xpath="/html/body/form/button", +): + """Create a minimal semantic element ref payload.""" + return { + "role": role, + "name": name, + "tagName": tag, + "xpath": xpath, + "cssSelector": f"{tag}", + "bbox": {"x": 100, "y": 200, "width": 80, "height": 30}, + "state": {"enabled": True, "focused": False, "visible": True}, + "id": None, + "classList": [], + } + + +def _setup_capture_db(tmp_path): + """Create a capture DB with a recording and return (session, recording, db_path).""" + db_path = str(tmp_path / "recording.db") + engine, Session = create_db(db_path) + session = Session() + + recording = insert_recording(session, { + "timestamp": time.time(), + "monitor_width": 1920, + "monitor_height": 1080, + "double_click_interval_seconds": 0.5, + "double_click_distance_pixels": 5, + "platform": "darwin", + "task_description": "Test browser capture", + }) + return session, recording, db_path + + +class TestBrowserEventsAPI: + """Test CaptureSession.browser_events() with synthetic DB data.""" + + def test_empty_browser_events(self, tmp_path): + """Capture with no browser events returns empty list.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + capture = CaptureSession.load(str(tmp_path)) + assert capture.browser_events() == [] + assert capture.browser_event_count == 0 + capture.close() + session.close() + + def test_click_event_roundtrip(self, tmp_path): + """Click event stored in DB is parsed back as BrowserClickEvent.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "click", + "url": "https://app.appfolio.com/tenants", + "clientX": 150, + "clientY": 220, + "pageX": 150, + "pageY": 220, + "button": 0, + "clickCount": 1, + "element": _make_element_payload(), + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserClickEvent) + assert events[0].client_x == 150 + assert events[0].client_y == 220 + assert events[0].element.role == "button" + assert events[0].element.name == "Submit" + assert events[0].element.xpath == "/html/body/form/button" + assert events[0].url == "https://app.appfolio.com/tenants" + capture.close() + + def test_key_event_roundtrip(self, tmp_path): + """Key event with modifiers is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "keydown", + "url": "https://app.appfolio.com/search", + "key": "a", + "code": "KeyA", + "keyCode": 65, + "shiftKey": False, + "ctrlKey": True, + "altKey": False, + "metaKey": False, + "element": _make_element_payload( + role="textbox", + name="Search", + tag="input", + xpath="/html/body/form/input", + ), + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserKeyEvent) + assert events[0].key == "a" + assert events[0].ctrl_key is True + assert events[0].element.role == "textbox" + capture.close() + + def test_input_event_roundtrip(self, tmp_path): + """Input event captures field value.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "input", + "url": "https://app.appfolio.com/form", + "inputType": "insertText", + "data": "John Doe", + "value": "John Doe", + "element": _make_element_payload( + role="textbox", + name="Tenant Name", + tag="input", + xpath="/html/body/form/input[name='tenant']", + ), + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserInputEvent) + assert events[0].value == "John Doe" + assert events[0].element.name == "Tenant Name" + capture.close() + + def test_scroll_event_roundtrip(self, tmp_path): + """Scroll event captures position and delta.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "scroll", + "url": "https://app.appfolio.com/list", + "scrollX": 0, + "scrollY": 500, + "deltaX": 0, + "deltaY": 100, + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserScrollEvent) + assert events[0].scroll_y == 500 + assert events[0].delta_y == 100 + capture.close() + + def test_navigation_event_roundtrip(self, tmp_path): + """Navigation event captures URL transition.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "navigate", + "url": "https://app.appfolio.com/tenants/123", + "previousUrl": "https://app.appfolio.com/tenants", + "navigationType": "link", + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserNavigationEvent) + assert events[0].previous_url == "https://app.appfolio.com/tenants" + assert events[0].url == "https://app.appfolio.com/tenants/123" + capture.close() + + def test_mixed_events_ordering(self, tmp_path): + """Multiple event types maintain timestamp ordering.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + base_ts = time.time() + events_data = [ + (base_ts, "navigate", { + "url": "https://example.com", + "previousUrl": "", + "navigationType": "typed", + }), + (base_ts + 1, "click", { + "url": "https://example.com", + "clientX": 100, "clientY": 200, + "pageX": 100, "pageY": 200, + "button": 0, "clickCount": 1, + "element": _make_element_payload(), + }), + (base_ts + 2, "input", { + "url": "https://example.com", + "inputType": "insertText", + "data": "test", + "value": "test", + "element": _make_element_payload( + role="textbox", name="Field", tag="input", + xpath="/html/body/input", + ), + }), + (base_ts + 3, "scroll", { + "url": "https://example.com", + "scrollX": 0, "scrollY": 300, + "deltaX": 0, "deltaY": 300, + }), + ] + + for ts, event_type, payload in events_data: + payload["eventType"] = event_type + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": payload, + } + insert_browser_event(session, recording, ts, {"message": msg}) + + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 4 + assert isinstance(events[0], BrowserNavigationEvent) + assert isinstance(events[1], BrowserClickEvent) + assert isinstance(events[2], BrowserInputEvent) + assert isinstance(events[3], BrowserScrollEvent) + + # Verify ordering + for i in range(len(events) - 1): + assert events[i].timestamp <= events[i + 1].timestamp + + assert capture.browser_event_count == 4 + capture.close() + + def test_malformed_event_skipped(self, tmp_path): + """Events with unparseable messages are skipped gracefully.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + # Valid event + insert_browser_event(session, recording, ts, { + "message": { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "scroll", + "url": "https://example.com", + "scrollX": 0, "scrollY": 0, + "deltaX": 0, "deltaY": 50, + }, + } + }) + # Malformed event (no eventType) + insert_browser_event(session, recording, ts + 1, { + "message": {"garbage": True} + }) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + # Only the valid event should parse + assert len(events) == 1 + assert isinstance(events[0], BrowserScrollEvent) + # But browser_event_count counts raw DB rows + assert capture.browser_event_count == 2 + capture.close() + + def test_element_state_preserved(self, tmp_path): + """Element state (checked, value, etc.) survives roundtrip.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "click", + "url": "https://example.com/form", + "clientX": 50, "clientY": 50, + "pageX": 50, "pageY": 50, + "button": 0, "clickCount": 1, + "element": { + "role": "checkbox", + "name": "Government Assistance", + "tagName": "input", + "xpath": "/html/body/form/input[@type='checkbox']", + "cssSelector": "input[type='checkbox']", + "bbox": {"x": 40, "y": 40, "width": 20, "height": 20}, + "state": { + "enabled": True, + "focused": True, + "visible": True, + "checked": True, + "value": "on", + }, + "id": "gov-assist", + "classList": ["form-check"], + }, + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + click = events[0] + assert click.element.state.checked is True + assert click.element.state.value == "on" + assert click.element.id == "gov-assist" + assert click.element.class_list == ["form-check"] + capture.close() + + +class TestRawContentScriptFormat: + """Test parsing of raw content-script events (no 'browser.' prefix).""" + + def test_raw_click_event(self, tmp_path): + """Click from content script with flat format is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "click", + "targetId": "elem-109", + "timestamp": ts, + "devicePixelRatio": 2.0, + "element": { + "role": "link", + "name": "55\u00a0comments", + "dataId": "elem-109", + "bbox": {"x": 323, "y": 131, "width": 63, "height": 11}, + "tagName": "a", + "id": None, + "classList": None, + }, + "clientX": 346, + "clientY": 141, + "screenX": 606, + "screenY": 372, + "button": 0, + "url": "https://news.ycombinator.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserClickEvent) + assert events[0].client_x == 346 + assert events[0].client_y == 141 + assert events[0].element.role == "link" + assert events[0].element.tag_name == "a" + assert events[0].url == "https://news.ycombinator.com/" + capture.close() + + def test_raw_keydown_event(self, tmp_path): + """Keydown from content script is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "keydown", + "timestamp": ts, + "element": { + "role": "textbox", + "name": "", + "dataId": "elem-0", + "bbox": {"x": 59, "y": 95, "width": 657, "height": 129}, + "tagName": "textarea", + "id": None, + "classList": None, + }, + "key": "t", + "code": "KeyT", + "shiftKey": False, + "ctrlKey": False, + "altKey": False, + "metaKey": False, + "url": "https://example.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserKeyEvent) + assert events[0].key == "t" + assert events[0].code == "KeyT" + assert events[0].element.role == "textbox" + assert events[0].element.tag_name == "textarea" + capture.close() + + def test_raw_scroll_event(self, tmp_path): + """Scroll from content script (scrollDeltaX/Y) is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "scroll", + "timestamp": ts, + "scrollDeltaX": 0, + "scrollDeltaY": -1.14, + "clientX": 538, + "clientY": 300, + "url": "https://example.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserScrollEvent) + assert events[0].delta_y == -1.14 + assert events[0].delta_x == 0 + capture.close() + + def test_raw_mousemove_event(self, tmp_path): + """Mousemove from content script is parsed as BrowserMouseMoveEvent.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "mousemove", + "timestamp": ts, + "element": { + "role": None, + "name": "Some text", + "dataId": "elem-0", + "bbox": {"x": 69, "y": 183, "width": 652, "height": 19}, + "tagName": "td", + "id": None, + "classList": ["title"], + }, + "clientX": 577, + "clientY": 185, + "screenX": 1010, + "screenY": 449, + "url": "https://example.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserMouseMoveEvent) + assert events[0].client_x == 577 + assert events[0].client_y == 185 + assert events[0].screen_x == 1010 + assert events[0].element.tag_name == "td" + assert events[0].element.class_list == ["title"] + capture.close() + + def test_raw_mixed_events(self, tmp_path): + """Mixed raw content-script events are all parsed.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + base_ts = time.time() + raw_events = [ + {"type": "USER_EVENT", "eventType": "mousemove", "timestamp": base_ts, + "clientX": 100, "clientY": 200, "screenX": 200, "screenY": 300, + "url": "https://example.com/", "tabId": 1, + "element": {"role": None, "name": "", "dataId": "elem-0", + "bbox": {"x": 0, "y": 0, "width": 100, "height": 100}, + "tagName": "div", "id": None, "classList": None}}, + {"type": "USER_EVENT", "eventType": "click", "timestamp": base_ts + 1, + "clientX": 100, "clientY": 200, "screenX": 200, "screenY": 300, + "button": 0, "url": "https://example.com/", "tabId": 1, + "element": {"role": "button", "name": "Submit", "dataId": "elem-1", + "bbox": {"x": 90, "y": 190, "width": 20, "height": 20}, + "tagName": "button", "id": "submit-btn", "classList": ["btn"]}}, + {"type": "USER_EVENT", "eventType": "keydown", "timestamp": base_ts + 2, + "key": "Enter", "code": "Enter", "shiftKey": False, "ctrlKey": False, + "altKey": False, "metaKey": False, "url": "https://example.com/", "tabId": 1, + "element": {"role": "textbox", "name": "Search", "dataId": "elem-2", + "bbox": {"x": 50, "y": 50, "width": 200, "height": 30}, + "tagName": "input", "id": None, "classList": None}}, + {"type": "USER_EVENT", "eventType": "scroll", "timestamp": base_ts + 3, + "scrollDeltaX": 0, "scrollDeltaY": 100, "clientX": 400, "clientY": 300, + "url": "https://example.com/", "tabId": 1}, + ] + for i, evt in enumerate(raw_events): + insert_browser_event(session, recording, base_ts + i, {"message": evt}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 4 + assert isinstance(events[0], BrowserMouseMoveEvent) + assert isinstance(events[1], BrowserClickEvent) + assert isinstance(events[2], BrowserKeyEvent) + assert isinstance(events[3], BrowserScrollEvent) + assert events[1].element.id == "submit-btn" + assert events[1].element.class_list == ["btn"] + capture.close() + + +class TestCLIBrowserFlag: + """Test that the CLI record function accepts browser_events flag.""" + + def test_record_accepts_browser_events_param(self): + """Verify record() function signature includes browser_events.""" + import inspect + + from openadapt_capture.cli import record + + sig = inspect.signature(record) + assert "browser_events" in sig.parameters + # Default should be False + assert sig.parameters["browser_events"].default is False From 2473af199b9a85a6eaf2e3b0be0303b43e1a9544 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 8 Mar 2026 17:40:28 -0400 Subject: [PATCH 2/4] fix: clean up stale docstring and unused import Co-Authored-By: Claude Opus 4.6 --- openadapt_capture/capture.py | 4 +--- tests/test_browser_capture_e2e.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/openadapt_capture/capture.py b/openadapt_capture/capture.py index 893d73d..c35da78 100644 --- a/openadapt_capture/capture.py +++ b/openadapt_capture/capture.py @@ -164,9 +164,7 @@ def _convert_browser_event(db_event) -> "BrowserEvent | None": The DB stores browser events as JSON in the `message` field. The recorder wraps each raw WebSocket message as ``{"message": }``. - Handles both the canonical ``browser.*`` event-type prefix format (used in - tests and browser_bridge.py) and the raw content-script format which sends - plain event names like ``"click"``, ``"keydown"``, ``"scroll"``. + Handles both flat (content-script) and payload-wrapped message formats. Args: db_event: SQLAlchemy BrowserEvent instance. diff --git a/tests/test_browser_capture_e2e.py b/tests/test_browser_capture_e2e.py index 31d2bc4..020d39b 100644 --- a/tests/test_browser_capture_e2e.py +++ b/tests/test_browser_capture_e2e.py @@ -4,7 +4,6 @@ iterate as typed Pydantic models. """ -import json import time import pytest From d978480094275422d97b9661e933773fef382559 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 8 Mar 2026 17:59:36 -0400 Subject: [PATCH 3/4] fix: address review feedback - Replace bare except with debug logging in _convert_browser_event - Move lazy imports to module level (BoundingBox, ElementState, etc.) - Remove unused imports (pytest, Recording) from test file - Update test class names to reflect structure tested, not removed format - Fix stale docstring in _parse_element_ref Co-Authored-By: Claude Opus 4.6 --- openadapt_capture/capture.py | 23 ++++++++++------------- tests/test_browser_capture_e2e.py | 11 ++++------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/openadapt_capture/capture.py b/openadapt_capture/capture.py index c35da78..73b6576 100644 --- a/openadapt_capture/capture.py +++ b/openadapt_capture/capture.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Iterator from openadapt_capture.browser_events import ( + BoundingBox, BrowserClickEvent, BrowserEventType, BrowserFocusEvent, @@ -18,6 +19,9 @@ BrowserMouseMoveEvent, BrowserNavigationEvent, BrowserScrollEvent, + ElementState, + NavigationType, + SemanticElementRef, ) from openadapt_capture.events import ( ActionEvent as PydanticActionEvent, @@ -111,18 +115,12 @@ def _convert_action_event(db_event) -> PydanticActionEvent | None: return None -def _parse_element_ref(raw: dict | None): +def _parse_element_ref(raw: dict | None) -> SemanticElementRef | None: """Parse a raw element dict into a SemanticElementRef. - Handles both the canonical format (from browser_bridge.py tests) and the - raw content-script format which uses different field names. + Handles field name variations between the content-script format + (e.g. ``dataId``, ``tagName``, ``classList``) and snake_case alternatives. """ - from openadapt_capture.browser_events import ( - BoundingBox, - ElementState, - SemanticElementRef, - ) - if not raw or not isinstance(raw, dict): return None @@ -251,8 +249,6 @@ def _convert_browser_event(db_event) -> "BrowserEvent | None": element=elem, ) elif event_type == BrowserEventType.NAVIGATE: - from openadapt_capture.browser_events import NavigationType - nav_type = payload.get("navigationType", "link") valid = [e.value for e in NavigationType] return BrowserNavigationEvent( @@ -289,8 +285,9 @@ def _convert_browser_event(db_event) -> "BrowserEvent | None": tab_id=tab_id, element=elem, ) - except Exception: - pass + except Exception as e: + import logging + logging.getLogger(__name__).debug("Failed to parse browser event: %s", e) return None diff --git a/tests/test_browser_capture_e2e.py b/tests/test_browser_capture_e2e.py index 020d39b..5d22aaa 100644 --- a/tests/test_browser_capture_e2e.py +++ b/tests/test_browser_capture_e2e.py @@ -6,8 +6,6 @@ import time -import pytest - from openadapt_capture.browser_events import ( BrowserClickEvent, BrowserEventType, @@ -20,7 +18,6 @@ from openadapt_capture.capture import CaptureSession from openadapt_capture.db import create_db from openadapt_capture.db.crud import insert_browser_event, insert_recording -from openadapt_capture.db.models import Recording def _make_element_payload( @@ -61,8 +58,8 @@ def _setup_capture_db(tmp_path): return session, recording, db_path -class TestBrowserEventsAPI: - """Test CaptureSession.browser_events() with synthetic DB data.""" +class TestBrowserEventsPayloadWrapped: + """Test browser events stored with payload-wrapped message format.""" def test_empty_browser_events(self, tmp_path): """Capture with no browser events returns empty list.""" @@ -383,8 +380,8 @@ def test_element_state_preserved(self, tmp_path): capture.close() -class TestRawContentScriptFormat: - """Test parsing of raw content-script events (no 'browser.' prefix).""" +class TestContentScriptFlatFormat: + """Test parsing of flat content-script events (real Chrome extension format).""" def test_raw_click_event(self, tmp_path): """Click from content script with flat format is parsed correctly.""" From b1d82dd3ba6f6d43ff4729699a164ea268ab3116 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 8 Mar 2026 19:39:29 -0400 Subject: [PATCH 4/4] fix: remove unused BrowserEventType import from tests Co-Authored-By: Claude Opus 4.6 --- tests/test_browser_capture_e2e.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_browser_capture_e2e.py b/tests/test_browser_capture_e2e.py index 5d22aaa..413ba57 100644 --- a/tests/test_browser_capture_e2e.py +++ b/tests/test_browser_capture_e2e.py @@ -8,7 +8,6 @@ from openadapt_capture.browser_events import ( BrowserClickEvent, - BrowserEventType, BrowserInputEvent, BrowserKeyEvent, BrowserMouseMoveEvent,