diff --git a/chrome_extension/background.js b/chrome_extension/background.js index 404426e..e4a816a 100644 --- a/chrome_extension/background.js +++ b/chrome_extension/background.js @@ -137,7 +137,7 @@ function handleServerMessage(data) { * Handle SET_MODE message from server */ function handleSetMode(message) { - const newMode = message.payload?.mode || 'idle'; + const newMode = message.mode || message.payload?.mode || 'idle'; currentMode = newMode; console.log('[OpenAdapt] Mode set to:', currentMode); @@ -231,6 +231,7 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { const tabId = sender.tab?.id; switch (message.type) { + case 'USER_EVENT': case 'DOM_EVENT': // Add tab ID and relay to server message.tabId = tabId; diff --git a/openadapt_capture/browser_events.py b/openadapt_capture/browser_events.py index 39b3a09..c03630f 100644 --- a/openadapt_capture/browser_events.py +++ b/openadapt_capture/browser_events.py @@ -18,22 +18,27 @@ class BrowserEventType(str, Enum): - """Browser event type identifiers.""" + """Browser event type identifiers. + + Values match the raw DOM event names sent by the Chrome extension + content script (e.g. "click", "keydown"), consistent with legacy OpenAdapt. + """ # User interaction events - CLICK = "browser.click" - KEYDOWN = "browser.keydown" - KEYUP = "browser.keyup" - SCROLL = "browser.scroll" - INPUT = "browser.input" - FOCUS = "browser.focus" - BLUR = "browser.blur" + CLICK = "click" + KEYDOWN = "keydown" + KEYUP = "keyup" + SCROLL = "scroll" + INPUT = "input" + MOUSEMOVE = "mousemove" + FOCUS = "focus" + BLUR = "blur" # Navigation events - NAVIGATE = "browser.navigate" + NAVIGATE = "navigate" # Unknown/generic - UNKNOWN = "browser.unknown" + UNKNOWN = "unknown" class NavigationType(str, Enum): @@ -242,6 +247,21 @@ class BrowserNavigationEvent(BaseBrowserEvent): # ============================================================================= +class BrowserMouseMoveEvent(BaseBrowserEvent): + """Mouse move event in browser.""" + + type: Literal[BrowserEventType.MOUSEMOVE] = BrowserEventType.MOUSEMOVE + + # Coordinates + client_x: float = Field(description="Viewport X") + client_y: float = Field(description="Viewport Y") + screen_x: float = Field(default=0, description="Screen X") + screen_y: float = Field(default=0, description="Screen Y") + + # Target element + element: SemanticElementRef | None = Field(default=None) + + class BrowserFocusEvent(BaseBrowserEvent): """Element focus/blur event in browser.""" @@ -292,5 +312,6 @@ class DOMSnapshot(BaseModel): | BrowserScrollEvent | BrowserInputEvent | BrowserNavigationEvent + | BrowserMouseMoveEvent | BrowserFocusEvent ) diff --git a/openadapt_capture/capture.py b/openadapt_capture/capture.py index 36a175b..73b6576 100644 --- a/openadapt_capture/capture.py +++ b/openadapt_capture/capture.py @@ -9,6 +9,20 @@ from pathlib import Path from typing import TYPE_CHECKING, Iterator +from openadapt_capture.browser_events import ( + BoundingBox, + BrowserClickEvent, + BrowserEventType, + BrowserFocusEvent, + BrowserInputEvent, + BrowserKeyEvent, + BrowserMouseMoveEvent, + BrowserNavigationEvent, + BrowserScrollEvent, + ElementState, + NavigationType, + SemanticElementRef, +) from openadapt_capture.events import ( ActionEvent as PydanticActionEvent, ) @@ -27,6 +41,8 @@ if TYPE_CHECKING: from PIL import Image + from openadapt_capture.browser_events import BrowserEvent + def _convert_action_event(db_event) -> PydanticActionEvent | None: """Convert a SQLAlchemy ActionEvent to a Pydantic event. @@ -99,6 +115,182 @@ def _convert_action_event(db_event) -> PydanticActionEvent | None: return None +def _parse_element_ref(raw: dict | None) -> SemanticElementRef | None: + """Parse a raw element dict into a SemanticElementRef. + + Handles field name variations between the content-script format + (e.g. ``dataId``, ``tagName``, ``classList``) and snake_case alternatives. + """ + if not raw or not isinstance(raw, dict): + return None + + bbox_raw = raw.get("bbox", {}) + bbox = BoundingBox( + x=bbox_raw.get("x", 0), + y=bbox_raw.get("y", 0), + width=bbox_raw.get("width", 0), + height=bbox_raw.get("height", 0), + ) + + state_raw = raw.get("state", {}) + state = ElementState( + enabled=state_raw.get("enabled", True), + focused=state_raw.get("focused", False), + visible=state_raw.get("visible", True), + checked=state_raw.get("checked"), + selected=state_raw.get("selected"), + expanded=state_raw.get("expanded"), + value=state_raw.get("value"), + ) if isinstance(state_raw, dict) else ElementState() + + return SemanticElementRef( + role=raw.get("role") or "", + name=raw.get("name") or "", + bbox=bbox, + xpath=raw.get("xpath") or raw.get("dataId") or "", + css_selector=raw.get("cssSelector") or raw.get("css_selector") or "", + state=state, + tag_name=raw.get("tagName") or raw.get("tag_name") or "", + id=raw.get("id"), + class_list=raw.get("classList") or raw.get("class_list") or [], + ) + + +def _convert_browser_event(db_event) -> "BrowserEvent | None": + """Convert a SQLAlchemy BrowserEvent to a typed Pydantic browser event. + + The DB stores browser events as JSON in the `message` field. The recorder + wraps each raw WebSocket message as ``{"message": }``. + + Handles both flat (content-script) and payload-wrapped message formats. + + Args: + db_event: SQLAlchemy BrowserEvent instance. + + Returns: + Typed browser event or None if parsing fails. + """ + msg = db_event.message + if not isinstance(msg, dict): + return None + + # Unwrap the recorder's {"message": } wrapper + inner = msg.get("message", msg) + if not isinstance(inner, dict): + return None + + # Support both flat (content-script) and payload-wrapped (browser_bridge) formats + payload = inner.get("payload", inner) + + raw_type = payload.get("eventType", inner.get("eventType", "")) + try: + event_type = BrowserEventType(raw_type) + except ValueError: + return None + + timestamp = db_event.timestamp or 0 + url = payload.get("url", inner.get("url", "")) + tab_id = inner.get("tabId", payload.get("tab_id", 0)) + + try: + if event_type == BrowserEventType.CLICK: + elem = _parse_element_ref(payload.get("element")) + if elem is None: + return None + return BrowserClickEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + client_x=payload.get("clientX", 0), + client_y=payload.get("clientY", 0), + page_x=payload.get("pageX", payload.get("clientX", 0)), + page_y=payload.get("pageY", payload.get("clientY", 0)), + button=payload.get("button", 0), + click_count=payload.get("clickCount", 1), + element=elem, + ) + elif event_type in (BrowserEventType.KEYDOWN, BrowserEventType.KEYUP): + element = _parse_element_ref(payload.get("element")) + return BrowserKeyEvent( + timestamp=timestamp, + type=event_type, + url=url, + tab_id=tab_id, + key=payload.get("key", ""), + code=payload.get("code", ""), + key_code=payload.get("keyCode", 0), + shift_key=payload.get("shiftKey", False), + ctrl_key=payload.get("ctrlKey", False), + alt_key=payload.get("altKey", False), + meta_key=payload.get("metaKey", False), + element=element, + ) + elif event_type == BrowserEventType.SCROLL: + return BrowserScrollEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + scroll_x=payload.get("scrollX", 0), + scroll_y=payload.get("scrollY", 0), + delta_x=payload.get("deltaX", payload.get("scrollDeltaX", 0)), + delta_y=payload.get("deltaY", payload.get("scrollDeltaY", 0)), + ) + elif event_type == BrowserEventType.INPUT: + elem = _parse_element_ref(payload.get("element")) + if elem is None: + return None + return BrowserInputEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + input_type=payload.get("inputType", ""), + data=payload.get("data"), + value=payload.get("value", ""), + element=elem, + ) + elif event_type == BrowserEventType.NAVIGATE: + nav_type = payload.get("navigationType", "link") + valid = [e.value for e in NavigationType] + return BrowserNavigationEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + previous_url=payload.get("previousUrl", ""), + navigation_type=( + NavigationType(nav_type) + if nav_type in valid + else NavigationType.LINK + ), + ) + elif event_type == BrowserEventType.MOUSEMOVE: + element = _parse_element_ref(payload.get("element")) + return BrowserMouseMoveEvent( + timestamp=timestamp, + url=url, + tab_id=tab_id, + client_x=payload.get("clientX", 0), + client_y=payload.get("clientY", 0), + screen_x=payload.get("screenX", 0), + screen_y=payload.get("screenY", 0), + element=element, + ) + elif event_type in (BrowserEventType.FOCUS, BrowserEventType.BLUR): + elem = _parse_element_ref(payload.get("element")) + if elem is None: + return None + return BrowserFocusEvent( + timestamp=timestamp, + type=event_type, + url=url, + tab_id=tab_id, + element=elem, + ) + except Exception as e: + import logging + logging.getLogger(__name__).debug("Failed to parse browser event: %s", e) + return None + + @dataclass class Action: """A processed action event with associated screenshot. @@ -385,6 +577,27 @@ def actions(self, include_moves: bool = False) -> Iterator[Action]: continue yield Action(event=event, _capture=self) + def browser_events(self) -> list["BrowserEvent"]: + """Get all browser events as typed Pydantic models. + + Parses the JSON message field from each stored BrowserEvent into + the appropriate typed event (BrowserClickEvent, BrowserKeyEvent, etc.). + + Returns: + List of typed browser events, ordered by timestamp. + """ + events: list[BrowserEvent] = [] + for db_event in self._recording.browser_events: + parsed = _convert_browser_event(db_event) + if parsed is not None: + events.append(parsed) + return events + + @property + def browser_event_count(self) -> int: + """Number of browser events in this capture.""" + return len(self._recording.browser_events) + def get_frame_at(self, timestamp: float, tolerance: float = 0.5) -> "Image" | None: """Get the screen frame closest to a timestamp. diff --git a/openadapt_capture/cli.py b/openadapt_capture/cli.py index a96d203..cd7e391 100644 --- a/openadapt_capture/cli.py +++ b/openadapt_capture/cli.py @@ -17,6 +17,7 @@ def record( video: bool = True, audio: bool = False, images: bool = False, + browser_events: bool = False, send_profile: bool = False, ) -> None: """Record GUI interactions. @@ -27,6 +28,9 @@ def record( video: Capture video (default: True). audio: Capture audio (default: False). images: Save screenshots as PNGs (default: False). + browser_events: Capture browser DOM events via Chrome extension (default: False). + Requires the openadapt-capture Chrome extension to be installed and + connects via WebSocket on localhost:8765. send_profile: Send profiling data via wormhole after recording (default: False). """ import time @@ -36,6 +40,9 @@ def record( output_dir = str(Path(output_dir).resolve()) print(f"Recording to: {output_dir}") + if browser_events: + print("Browser event capture enabled (WebSocket on localhost:8765)") + print("Make sure the openadapt-capture Chrome extension is installed.") print("Press Ctrl+C or type stop sequence to stop recording...") print() @@ -45,6 +52,7 @@ def record( capture_video=video, capture_audio=audio, capture_images=images, + capture_browser_events=browser_events, send_profile=send_profile, ) as recorder: recorder.wait_for_ready() @@ -129,6 +137,7 @@ def info(capture_dir: str) -> None: # Count events actions = list(capture.actions()) print(f"Actions: {len(actions)}") + print(f"Browser events: {capture.browser_event_count}") # Event type breakdown from collections import Counter @@ -138,6 +147,14 @@ def info(capture_dir: str) -> None: for event_type, count in types.most_common(): print(f" {event_type}: {count}") + # Browser event breakdown + if capture.browser_event_count > 0: + browser_events = capture.browser_events() + btypes = Counter(type(e).__name__ for e in browser_events) + print("Browser event types:") + for btype, count in btypes.most_common(): + print(f" {btype}: {count}") + def transcribe( capture_dir: str, diff --git a/tests/test_browser_capture_e2e.py b/tests/test_browser_capture_e2e.py new file mode 100644 index 0000000..413ba57 --- /dev/null +++ b/tests/test_browser_capture_e2e.py @@ -0,0 +1,589 @@ +"""End-to-end tests for browser event capture pipeline. + +Tests the full flow: store browser events in DB → load via CaptureSession → +iterate as typed Pydantic models. +""" + +import time + +from openadapt_capture.browser_events import ( + BrowserClickEvent, + BrowserInputEvent, + BrowserKeyEvent, + BrowserMouseMoveEvent, + BrowserNavigationEvent, + BrowserScrollEvent, +) +from openadapt_capture.capture import CaptureSession +from openadapt_capture.db import create_db +from openadapt_capture.db.crud import insert_browser_event, insert_recording + + +def _make_element_payload( + role="button", + name="Submit", + tag="button", + xpath="/html/body/form/button", +): + """Create a minimal semantic element ref payload.""" + return { + "role": role, + "name": name, + "tagName": tag, + "xpath": xpath, + "cssSelector": f"{tag}", + "bbox": {"x": 100, "y": 200, "width": 80, "height": 30}, + "state": {"enabled": True, "focused": False, "visible": True}, + "id": None, + "classList": [], + } + + +def _setup_capture_db(tmp_path): + """Create a capture DB with a recording and return (session, recording, db_path).""" + db_path = str(tmp_path / "recording.db") + engine, Session = create_db(db_path) + session = Session() + + recording = insert_recording(session, { + "timestamp": time.time(), + "monitor_width": 1920, + "monitor_height": 1080, + "double_click_interval_seconds": 0.5, + "double_click_distance_pixels": 5, + "platform": "darwin", + "task_description": "Test browser capture", + }) + return session, recording, db_path + + +class TestBrowserEventsPayloadWrapped: + """Test browser events stored with payload-wrapped message format.""" + + def test_empty_browser_events(self, tmp_path): + """Capture with no browser events returns empty list.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + capture = CaptureSession.load(str(tmp_path)) + assert capture.browser_events() == [] + assert capture.browser_event_count == 0 + capture.close() + session.close() + + def test_click_event_roundtrip(self, tmp_path): + """Click event stored in DB is parsed back as BrowserClickEvent.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "click", + "url": "https://app.appfolio.com/tenants", + "clientX": 150, + "clientY": 220, + "pageX": 150, + "pageY": 220, + "button": 0, + "clickCount": 1, + "element": _make_element_payload(), + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserClickEvent) + assert events[0].client_x == 150 + assert events[0].client_y == 220 + assert events[0].element.role == "button" + assert events[0].element.name == "Submit" + assert events[0].element.xpath == "/html/body/form/button" + assert events[0].url == "https://app.appfolio.com/tenants" + capture.close() + + def test_key_event_roundtrip(self, tmp_path): + """Key event with modifiers is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "keydown", + "url": "https://app.appfolio.com/search", + "key": "a", + "code": "KeyA", + "keyCode": 65, + "shiftKey": False, + "ctrlKey": True, + "altKey": False, + "metaKey": False, + "element": _make_element_payload( + role="textbox", + name="Search", + tag="input", + xpath="/html/body/form/input", + ), + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserKeyEvent) + assert events[0].key == "a" + assert events[0].ctrl_key is True + assert events[0].element.role == "textbox" + capture.close() + + def test_input_event_roundtrip(self, tmp_path): + """Input event captures field value.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "input", + "url": "https://app.appfolio.com/form", + "inputType": "insertText", + "data": "John Doe", + "value": "John Doe", + "element": _make_element_payload( + role="textbox", + name="Tenant Name", + tag="input", + xpath="/html/body/form/input[name='tenant']", + ), + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserInputEvent) + assert events[0].value == "John Doe" + assert events[0].element.name == "Tenant Name" + capture.close() + + def test_scroll_event_roundtrip(self, tmp_path): + """Scroll event captures position and delta.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "scroll", + "url": "https://app.appfolio.com/list", + "scrollX": 0, + "scrollY": 500, + "deltaX": 0, + "deltaY": 100, + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserScrollEvent) + assert events[0].scroll_y == 500 + assert events[0].delta_y == 100 + capture.close() + + def test_navigation_event_roundtrip(self, tmp_path): + """Navigation event captures URL transition.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "navigate", + "url": "https://app.appfolio.com/tenants/123", + "previousUrl": "https://app.appfolio.com/tenants", + "navigationType": "link", + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserNavigationEvent) + assert events[0].previous_url == "https://app.appfolio.com/tenants" + assert events[0].url == "https://app.appfolio.com/tenants/123" + capture.close() + + def test_mixed_events_ordering(self, tmp_path): + """Multiple event types maintain timestamp ordering.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + base_ts = time.time() + events_data = [ + (base_ts, "navigate", { + "url": "https://example.com", + "previousUrl": "", + "navigationType": "typed", + }), + (base_ts + 1, "click", { + "url": "https://example.com", + "clientX": 100, "clientY": 200, + "pageX": 100, "pageY": 200, + "button": 0, "clickCount": 1, + "element": _make_element_payload(), + }), + (base_ts + 2, "input", { + "url": "https://example.com", + "inputType": "insertText", + "data": "test", + "value": "test", + "element": _make_element_payload( + role="textbox", name="Field", tag="input", + xpath="/html/body/input", + ), + }), + (base_ts + 3, "scroll", { + "url": "https://example.com", + "scrollX": 0, "scrollY": 300, + "deltaX": 0, "deltaY": 300, + }), + ] + + for ts, event_type, payload in events_data: + payload["eventType"] = event_type + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": payload, + } + insert_browser_event(session, recording, ts, {"message": msg}) + + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 4 + assert isinstance(events[0], BrowserNavigationEvent) + assert isinstance(events[1], BrowserClickEvent) + assert isinstance(events[2], BrowserInputEvent) + assert isinstance(events[3], BrowserScrollEvent) + + # Verify ordering + for i in range(len(events) - 1): + assert events[i].timestamp <= events[i + 1].timestamp + + assert capture.browser_event_count == 4 + capture.close() + + def test_malformed_event_skipped(self, tmp_path): + """Events with unparseable messages are skipped gracefully.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + # Valid event + insert_browser_event(session, recording, ts, { + "message": { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "scroll", + "url": "https://example.com", + "scrollX": 0, "scrollY": 0, + "deltaX": 0, "deltaY": 50, + }, + } + }) + # Malformed event (no eventType) + insert_browser_event(session, recording, ts + 1, { + "message": {"garbage": True} + }) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + # Only the valid event should parse + assert len(events) == 1 + assert isinstance(events[0], BrowserScrollEvent) + # But browser_event_count counts raw DB rows + assert capture.browser_event_count == 2 + capture.close() + + def test_element_state_preserved(self, tmp_path): + """Element state (checked, value, etc.) survives roundtrip.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + msg = { + "type": "DOM_EVENT", + "timestamp": ts * 1000, + "tabId": 1, + "payload": { + "eventType": "click", + "url": "https://example.com/form", + "clientX": 50, "clientY": 50, + "pageX": 50, "pageY": 50, + "button": 0, "clickCount": 1, + "element": { + "role": "checkbox", + "name": "Government Assistance", + "tagName": "input", + "xpath": "/html/body/form/input[@type='checkbox']", + "cssSelector": "input[type='checkbox']", + "bbox": {"x": 40, "y": 40, "width": 20, "height": 20}, + "state": { + "enabled": True, + "focused": True, + "visible": True, + "checked": True, + "value": "on", + }, + "id": "gov-assist", + "classList": ["form-check"], + }, + }, + } + insert_browser_event(session, recording, ts, {"message": msg}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + click = events[0] + assert click.element.state.checked is True + assert click.element.state.value == "on" + assert click.element.id == "gov-assist" + assert click.element.class_list == ["form-check"] + capture.close() + + +class TestContentScriptFlatFormat: + """Test parsing of flat content-script events (real Chrome extension format).""" + + def test_raw_click_event(self, tmp_path): + """Click from content script with flat format is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "click", + "targetId": "elem-109", + "timestamp": ts, + "devicePixelRatio": 2.0, + "element": { + "role": "link", + "name": "55\u00a0comments", + "dataId": "elem-109", + "bbox": {"x": 323, "y": 131, "width": 63, "height": 11}, + "tagName": "a", + "id": None, + "classList": None, + }, + "clientX": 346, + "clientY": 141, + "screenX": 606, + "screenY": 372, + "button": 0, + "url": "https://news.ycombinator.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserClickEvent) + assert events[0].client_x == 346 + assert events[0].client_y == 141 + assert events[0].element.role == "link" + assert events[0].element.tag_name == "a" + assert events[0].url == "https://news.ycombinator.com/" + capture.close() + + def test_raw_keydown_event(self, tmp_path): + """Keydown from content script is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "keydown", + "timestamp": ts, + "element": { + "role": "textbox", + "name": "", + "dataId": "elem-0", + "bbox": {"x": 59, "y": 95, "width": 657, "height": 129}, + "tagName": "textarea", + "id": None, + "classList": None, + }, + "key": "t", + "code": "KeyT", + "shiftKey": False, + "ctrlKey": False, + "altKey": False, + "metaKey": False, + "url": "https://example.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserKeyEvent) + assert events[0].key == "t" + assert events[0].code == "KeyT" + assert events[0].element.role == "textbox" + assert events[0].element.tag_name == "textarea" + capture.close() + + def test_raw_scroll_event(self, tmp_path): + """Scroll from content script (scrollDeltaX/Y) is parsed correctly.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "scroll", + "timestamp": ts, + "scrollDeltaX": 0, + "scrollDeltaY": -1.14, + "clientX": 538, + "clientY": 300, + "url": "https://example.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserScrollEvent) + assert events[0].delta_y == -1.14 + assert events[0].delta_x == 0 + capture.close() + + def test_raw_mousemove_event(self, tmp_path): + """Mousemove from content script is parsed as BrowserMouseMoveEvent.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + ts = time.time() + raw_event = { + "type": "USER_EVENT", + "eventType": "mousemove", + "timestamp": ts, + "element": { + "role": None, + "name": "Some text", + "dataId": "elem-0", + "bbox": {"x": 69, "y": 183, "width": 652, "height": 19}, + "tagName": "td", + "id": None, + "classList": ["title"], + }, + "clientX": 577, + "clientY": 185, + "screenX": 1010, + "screenY": 449, + "url": "https://example.com/", + "tabId": 1, + } + insert_browser_event(session, recording, ts, {"message": raw_event}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 1 + assert isinstance(events[0], BrowserMouseMoveEvent) + assert events[0].client_x == 577 + assert events[0].client_y == 185 + assert events[0].screen_x == 1010 + assert events[0].element.tag_name == "td" + assert events[0].element.class_list == ["title"] + capture.close() + + def test_raw_mixed_events(self, tmp_path): + """Mixed raw content-script events are all parsed.""" + session, recording, db_path = _setup_capture_db(tmp_path) + + base_ts = time.time() + raw_events = [ + {"type": "USER_EVENT", "eventType": "mousemove", "timestamp": base_ts, + "clientX": 100, "clientY": 200, "screenX": 200, "screenY": 300, + "url": "https://example.com/", "tabId": 1, + "element": {"role": None, "name": "", "dataId": "elem-0", + "bbox": {"x": 0, "y": 0, "width": 100, "height": 100}, + "tagName": "div", "id": None, "classList": None}}, + {"type": "USER_EVENT", "eventType": "click", "timestamp": base_ts + 1, + "clientX": 100, "clientY": 200, "screenX": 200, "screenY": 300, + "button": 0, "url": "https://example.com/", "tabId": 1, + "element": {"role": "button", "name": "Submit", "dataId": "elem-1", + "bbox": {"x": 90, "y": 190, "width": 20, "height": 20}, + "tagName": "button", "id": "submit-btn", "classList": ["btn"]}}, + {"type": "USER_EVENT", "eventType": "keydown", "timestamp": base_ts + 2, + "key": "Enter", "code": "Enter", "shiftKey": False, "ctrlKey": False, + "altKey": False, "metaKey": False, "url": "https://example.com/", "tabId": 1, + "element": {"role": "textbox", "name": "Search", "dataId": "elem-2", + "bbox": {"x": 50, "y": 50, "width": 200, "height": 30}, + "tagName": "input", "id": None, "classList": None}}, + {"type": "USER_EVENT", "eventType": "scroll", "timestamp": base_ts + 3, + "scrollDeltaX": 0, "scrollDeltaY": 100, "clientX": 400, "clientY": 300, + "url": "https://example.com/", "tabId": 1}, + ] + for i, evt in enumerate(raw_events): + insert_browser_event(session, recording, base_ts + i, {"message": evt}) + session.close() + + capture = CaptureSession.load(str(tmp_path)) + events = capture.browser_events() + assert len(events) == 4 + assert isinstance(events[0], BrowserMouseMoveEvent) + assert isinstance(events[1], BrowserClickEvent) + assert isinstance(events[2], BrowserKeyEvent) + assert isinstance(events[3], BrowserScrollEvent) + assert events[1].element.id == "submit-btn" + assert events[1].element.class_list == ["btn"] + capture.close() + + +class TestCLIBrowserFlag: + """Test that the CLI record function accepts browser_events flag.""" + + def test_record_accepts_browser_events_param(self): + """Verify record() function signature includes browser_events.""" + import inspect + + from openadapt_capture.cli import record + + sig = inspect.signature(record) + assert "browser_events" in sig.parameters + # Default should be False + assert sig.parameters["browser_events"].default is False