From 115272db8a20351e6cb878a9db6a2b19a7fa9bd1 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 11 Jan 2026 20:25:52 -0800 Subject: [PATCH] port python backends to ts --- src/asserts/expect.ts | 597 +++++++++++++++++++++ src/asserts/index.ts | 61 +++ src/asserts/query.ts | 369 +++++++++++++ src/backends/actions.ts | 327 +++++++++++ src/backends/browser-use-adapter.ts | 248 +++++++++ src/backends/cdp-backend.ts | 341 ++++++++++++ src/backends/index.ts | 115 ++++ src/backends/protocol.ts | 175 ++++++ src/backends/sentience-context.ts | 476 ++++++++++++++++ src/backends/snapshot.ts | 403 ++++++++++++++ src/index.ts | 3 + tests/backends/actions.test.ts | 254 +++++++++ tests/backends/browser-use-adapter.test.ts | 239 +++++++++ tests/backends/cdp-backend.test.ts | 380 +++++++++++++ tests/backends/protocol.test.ts | 127 +++++ tests/backends/sentience-context.test.ts | 559 +++++++++++++++++++ tests/backends/snapshot.test.ts | 249 +++++++++ 17 files changed, 4923 insertions(+) create mode 100644 src/asserts/expect.ts create mode 100644 src/asserts/index.ts create mode 100644 src/asserts/query.ts create mode 100644 src/backends/actions.ts create mode 100644 src/backends/browser-use-adapter.ts create mode 100644 src/backends/cdp-backend.ts create mode 100644 src/backends/index.ts create mode 100644 src/backends/protocol.ts create mode 100644 src/backends/sentience-context.ts create mode 100644 src/backends/snapshot.ts create mode 100644 tests/backends/actions.test.ts create mode 100644 tests/backends/browser-use-adapter.test.ts create mode 100644 tests/backends/cdp-backend.test.ts create mode 100644 tests/backends/protocol.test.ts create mode 100644 tests/backends/sentience-context.test.ts create mode 100644 tests/backends/snapshot.test.ts diff --git a/src/asserts/expect.ts b/src/asserts/expect.ts new file mode 100644 index 00000000..e4bc0ffd --- /dev/null +++ b/src/asserts/expect.ts @@ -0,0 +1,597 @@ +/** + * Expectation builder for assertion DSL. + * + * This module provides the expect() builder that creates fluent assertions + * which compile to existing Predicate objects. + * + * Key classes: + * - ExpectBuilder: Fluent builder for element-based assertions + * - EventuallyConfig: Configuration for .eventually() retry logic + * + * The expect() function is the main entry point. It returns a builder that + * can be chained with matchers: + * expect(E({ role: "button" })).toExist() + * expect(E({ textContains: "Error" })).notToExist() + * expect.textPresent("Welcome") + * + * All builders compile to Predicate functions compatible with AgentRuntime.assert(). + */ + +import { Predicate, AssertOutcome, AssertContext } from '../verification'; +import { ElementQuery, MultiQuery, MultiTextPredicate } from './query'; + +// Default values for .eventually() +const DEFAULT_TIMEOUT = 10000; // milliseconds +const DEFAULT_POLL = 200; // milliseconds +const DEFAULT_MAX_RETRIES = 3; + +/** + * Configuration for .eventually() retry logic. + */ +export interface EventuallyConfig { + /** Max time to wait (milliseconds, default 10000) */ + timeout?: number; + /** Interval between retries (milliseconds, default 200) */ + poll?: number; + /** Max number of retry attempts (default 3) */ + maxRetries?: number; +} + +/** + * Convert query to a serializable object for debugging. + */ +function queryToDict( + query: ElementQuery | MultiQuery | MultiTextPredicate +): Record { + if (query instanceof ElementQuery) { + const result: Record = {}; + if (query.role) result.role = query.role; + if (query.name) result.name = query.name; + if (query.text) result.text = query.text; + if (query.textContains) result.textContains = query.textContains; + if (query.hrefContains) result.hrefContains = query.hrefContains; + if (query.inViewport !== undefined) result.inViewport = query.inViewport; + if (query.occluded !== undefined) result.occluded = query.occluded; + if (query.group) result.group = query.group; + if (query.inDominantGroup !== undefined) result.inDominantGroup = query.inDominantGroup; + if (query._groupIndex !== undefined) result.groupIndex = query._groupIndex; + if (query._fromDominantList) result.fromDominantList = true; + return result; + } else if (query instanceof MultiQuery) { + return { type: 'multi', limit: query.limit }; + } else if ( + typeof query === 'object' && + query !== null && + 'checkType' in query && + 'text' in query && + 'multiQuery' in query + ) { + return { type: 'multi_text', text: query.text, checkType: query.checkType }; + } + return { type: String(typeof query) }; +} + +/** + * Fluent builder for element-based assertions. + * + * Created by expect(E(...)) or expect(inDominantList().nth(k)). + * + * Methods return Predicate functions that can be passed to runtime.assert(). + * + * @example + * expect(E({ role: "button" })).toExist() + * expect(E({ textContains: "Error" })).notToExist() + * expect(E({ role: "link" })).toBeVisible() + */ +export class ExpectBuilder { + private _query: ElementQuery | MultiQuery | MultiTextPredicate; + + constructor(query: ElementQuery | MultiQuery | MultiTextPredicate) { + this._query = query; + } + + /** + * Assert that at least one element matches the query. + * + * @returns Predicate function for use with runtime.assert() + * + * @example + * await runtime.assert( + * expect(E({ role: "button", textContains: "Save" })).toExist(), + * "save_button_exists" + * ); + */ + toExist(): Predicate { + const query = this._query; + + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { query: queryToDict(query) }, + }; + } + + if (query instanceof ElementQuery) { + const matches = query.findAll(snap); + const ok = matches.length > 0; + return { + passed: ok, + reason: ok ? '' : `no elements matched query: ${JSON.stringify(queryToDict(query))}`, + details: { query: queryToDict(query), matched: matches.length }, + }; + } + + return { + passed: false, + reason: 'toExist() requires ElementQuery', + details: {}, + }; + }; + } + + /** + * Assert that NO elements match the query. + * + * Useful for asserting absence of error messages, loading indicators, etc. + * + * @returns Predicate function for use with runtime.assert() + * + * @example + * await runtime.assert( + * expect(E({ textContains: "Error" })).notToExist(), + * "no_error_message" + * ); + */ + notToExist(): Predicate { + const query = this._query; + + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { query: queryToDict(query) }, + }; + } + + if (query instanceof ElementQuery) { + const matches = query.findAll(snap); + const ok = matches.length === 0; + return { + passed: ok, + reason: ok + ? '' + : `found ${matches.length} elements matching: ${JSON.stringify(queryToDict(query))}`, + details: { query: queryToDict(query), matched: matches.length }, + }; + } + + return { + passed: false, + reason: 'notToExist() requires ElementQuery', + details: {}, + }; + }; + } + + /** + * Assert that element exists AND is visible (in_viewport=true, is_occluded=false). + * + * @returns Predicate function for use with runtime.assert() + * + * @example + * await runtime.assert( + * expect(E({ textContains: "Checkout" })).toBeVisible(), + * "checkout_button_visible" + * ); + */ + toBeVisible(): Predicate { + const query = this._query; + + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { query: queryToDict(query) }, + }; + } + + if (query instanceof ElementQuery) { + const matches = query.findAll(snap); + if (matches.length === 0) { + return { + passed: false, + reason: `no elements matched query: ${JSON.stringify(queryToDict(query))}`, + details: { query: queryToDict(query), matched: 0 }, + }; + } + + // Check visibility of first match + const el = matches[0]; + const isVisible = el.in_viewport && !el.is_occluded; + return { + passed: isVisible, + reason: isVisible + ? '' + : `element found but not visible (in_viewport=${el.in_viewport}, is_occluded=${el.is_occluded})`, + details: { + query: queryToDict(query), + elementId: el.id, + inViewport: el.in_viewport, + isOccluded: el.is_occluded, + }, + }; + } + + return { + passed: false, + reason: 'toBeVisible() requires ElementQuery', + details: {}, + }; + }; + } + + /** + * Assert that element's text contains the specified substring. + * + * @param text - Substring to search for (case-insensitive) + * @returns Predicate function for use with runtime.assert() + * + * @example + * await runtime.assert( + * expect(inDominantList().nth(0)).toHaveTextContains("Show HN"), + * "first_item_is_show_hn" + * ); + */ + toHaveTextContains(text: string): Predicate { + const query = this._query; + + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { query: queryToDict(query), expectedText: text }, + }; + } + + if (query instanceof ElementQuery) { + const matches = query.findAll(snap); + if (matches.length === 0) { + return { + passed: false, + reason: `no elements matched query: ${JSON.stringify(queryToDict(query))}`, + details: { query: queryToDict(query), matched: 0, expectedText: text }, + }; + } + + // Check text of first match + const el = matches[0]; + const elText = el.text || ''; + const ok = elText.toLowerCase().includes(text.toLowerCase()); + return { + passed: ok, + reason: ok ? '' : `element text '${elText.substring(0, 100)}' does not contain '${text}'`, + details: { + query: queryToDict(query), + elementId: el.id, + elementText: elText.substring(0, 200), + expectedText: text, + }, + }; + } + + return { + passed: false, + reason: 'toHaveTextContains() requires ElementQuery', + details: {}, + }; + }; + } +} + +/** + * Factory for creating ExpectBuilder instances and global assertions. + * + * This is the main entry point for the assertion DSL. + * + * @example + * import { expect, E } from './asserts'; + * + * // Element-based assertions + * expect(E({ role: "button" })).toExist() + * expect(E({ textContains: "Error" })).notToExist() + * + * // Global text assertions + * expect.textPresent("Welcome back") + * expect.noText("Error") + */ +class ExpectFactory { + /** + * Create an expectation builder for the given query. + * + * @param query - ElementQuery, MultiQuery, or MultiTextPredicate + * @returns ExpectBuilder for chaining matchers + * + * @example + * expect(E({ role: "button" })).toExist() + * expect(inDominantList().nth(0)).toHaveTextContains("Show HN") + */ + call(query: ElementQuery | MultiQuery | MultiTextPredicate): ExpectBuilder { + return new ExpectBuilder(query); + } + + /** + * Global assertion: check if text is present anywhere on the page. + * + * Searches across all element text fields. + * + * @param text - Text to search for (case-insensitive substring) + * @returns Predicate function for use with runtime.assert() + * + * @example + * await runtime.assert( + * expect.textPresent("Welcome back"), + * "user_logged_in" + * ); + */ + textPresent(text: string): Predicate { + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { searchText: text }, + }; + } + + // Search all element texts + const textLower = text.toLowerCase(); + for (const el of snap.elements) { + const elText = el.text || ''; + if (elText.toLowerCase().includes(textLower)) { + return { + passed: true, + reason: '', + details: { searchText: text, foundInElement: el.id }, + }; + } + } + + return { + passed: false, + reason: `text '${text}' not found on page`, + details: { searchText: text, elementsSearched: snap.elements.length }, + }; + }; + } + + /** + * Global assertion: check that text is NOT present anywhere on the page. + * + * Searches across all element text fields. + * + * @param text - Text that should not be present (case-insensitive substring) + * @returns Predicate function for use with runtime.assert() + * + * @example + * await runtime.assert( + * expect.noText("Error"), + * "no_error_message" + * ); + */ + noText(text: string): Predicate { + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { searchText: text }, + }; + } + + // Search all element texts + const textLower = text.toLowerCase(); + for (const el of snap.elements) { + const elText = el.text || ''; + if (elText.toLowerCase().includes(textLower)) { + return { + passed: false, + reason: `text '${text}' found in element id=${el.id}`, + details: { + searchText: text, + foundInElement: el.id, + elementText: elText.substring(0, 200), + }, + }; + } + } + + return { + passed: true, + reason: '', + details: { searchText: text, elementsSearched: snap.elements.length }, + }; + }; + } +} + +// Create the singleton factory +const factoryInstance = new ExpectFactory(); + +/** + * Main entry point for the assertion DSL. + * + * Use as a function to create element-based assertions: + * expect(E({ role: "button" })).toExist() + * + * Use static methods for global assertions: + * expect.textPresent("Welcome") + * expect.noText("Error") + */ +export const expect = Object.assign( + (query: ElementQuery | MultiQuery | MultiTextPredicate) => factoryInstance.call(query), + { + textPresent: (text: string) => factoryInstance.textPresent(text), + noText: (text: string) => factoryInstance.noText(text), + } +); + +/** + * Wrapper that adds retry logic to a predicate. + * + * Created by withEventually(). Provides an async evaluate() method + * that retries the predicate with fresh snapshots. + * + * Note: TypeScript uses milliseconds for timeout/poll. + */ +export class EventuallyWrapper { + private _predicate: Predicate; + private _config: Required; + + constructor(predicate: Predicate, config: EventuallyConfig = {}) { + this._predicate = predicate; + this._config = { + timeout: config.timeout ?? DEFAULT_TIMEOUT, + poll: config.poll ?? DEFAULT_POLL, + maxRetries: config.maxRetries ?? DEFAULT_MAX_RETRIES, + }; + } + + /** + * Evaluate predicate with retry logic. + * + * @param ctx - Initial assertion context + * @param snapshotFn - Async function to take fresh snapshots + * @returns Promise resolving to AssertOutcome + */ + async evaluate( + ctx: AssertContext, + snapshotFn: () => Promise + ): Promise { + const startTime = Date.now(); + let lastOutcome: AssertOutcome | null = null; + let attempts = 0; + + while (true) { + // Check timeout (higher precedence than maxRetries) + const elapsed = Date.now() - startTime; + if (elapsed >= this._config.timeout) { + if (lastOutcome) { + lastOutcome.reason = `timeout after ${elapsed}ms: ${lastOutcome.reason}`; + return lastOutcome; + } + return { + passed: false, + reason: `timeout after ${elapsed}ms`, + details: { attempts }, + }; + } + + // Check max retries + if (attempts >= this._config.maxRetries) { + if (lastOutcome) { + lastOutcome.reason = `max retries (${this._config.maxRetries}) exceeded: ${lastOutcome.reason}`; + return lastOutcome; + } + return { + passed: false, + reason: `max retries (${this._config.maxRetries}) exceeded`, + details: { attempts }, + }; + } + + // Take fresh snapshot if not first attempt + if (attempts > 0) { + try { + const freshSnapshot = await snapshotFn(); + ctx = { + snapshot: freshSnapshot, + url: freshSnapshot?.url ?? ctx.url, + stepId: ctx.stepId, + }; + } catch (e) { + lastOutcome = { + passed: false, + reason: `failed to take snapshot: ${e}`, + details: { attempts, error: String(e) }, + }; + attempts++; + await this.sleep(this._config.poll); + continue; + } + } + + // Evaluate predicate + const outcome = this._predicate(ctx); + if (outcome.passed) { + outcome.details.attempts = attempts + 1; + return outcome; + } + + lastOutcome = outcome; + attempts++; + + // Wait before next retry + if (attempts < this._config.maxRetries) { + // Check if we'd exceed timeout with the poll delay + if (Date.now() - startTime + this._config.poll < this._config.timeout) { + await this.sleep(this._config.poll); + } else { + // No point waiting, we'll timeout anyway + lastOutcome.reason = `timeout after ${Date.now() - startTime}ms: ${lastOutcome.reason}`; + return lastOutcome; + } + } + } + } + + private sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + /** Get the configured timeout in milliseconds */ + get timeout(): number { + return this._config.timeout; + } + + /** Get the configured poll interval in milliseconds */ + get poll(): number { + return this._config.poll; + } + + /** Get the configured max retries */ + get maxRetries(): number { + return this._config.maxRetries; + } +} + +/** + * Wrap a predicate with retry logic. + * + * This is the TypeScript API for .eventually(). Returns a wrapper + * that provides an async evaluate() method for use with the runtime. + * + * @param predicate - Predicate to wrap + * @param config - Retry configuration (timeout/poll in milliseconds) + * @returns EventuallyWrapper with async evaluate() method + * + * @example + * const wrapper = withEventually( + * expect(E({ role: "button" })).toExist(), + * { timeout: 5000, maxRetries: 10 } + * ); + * const result = await wrapper.evaluate(ctx, runtime.snapshot); + */ +export function withEventually(predicate: Predicate, config?: EventuallyConfig): EventuallyWrapper { + return new EventuallyWrapper(predicate, config); +} diff --git a/src/asserts/index.ts b/src/asserts/index.ts new file mode 100644 index 00000000..de9228ef --- /dev/null +++ b/src/asserts/index.ts @@ -0,0 +1,61 @@ +/** + * Assertion DSL for Sentience SDK. + * + * This module provides a Playwright/Cypress-like assertion API for verifying + * browser state in agent verification loops. + * + * Main exports: + * - E: Element query builder (filters elements by role, text, href, etc.) + * - expect: Expectation builder (creates predicates from queries) + * - inDominantList: Query over dominant group elements (ordinal access) + * + * @example + * ```typescript + * import { E, expect, inDominantList } from '@anthropic/sentience-ts/asserts'; + * + * // Basic presence assertions + * await runtime.assert( + * expect(E({ role: "button", textContains: "Save" })).toExist(), + * "save_button_visible" + * ); + * + * // Visibility assertions + * await runtime.assert( + * expect(E({ textContains: "Checkout" })).toBeVisible(), + * "checkout_visible" + * ); + * + * // Global text assertions + * await runtime.assert( + * expect.textPresent("Welcome back"), + * "user_logged_in" + * ); + * await runtime.assert( + * expect.noText("Error"), + * "no_error_message" + * ); + * + * // Ordinal assertions on dominant group + * await runtime.assert( + * expect(inDominantList().nth(0)).toHaveTextContains("Show HN"), + * "first_item_is_show_hn" + * ); + * + * // Task completion + * await runtime.assertDone( + * expect.textPresent("Order confirmed"), + * "checkout_complete" + * ); + * ``` + * + * The DSL compiles to existing Predicate functions, so it works seamlessly + * with AgentRuntime.assert() and assertDone(). + */ + +// Query builders +export { E, ElementQuery, ListQuery, MultiQuery, inDominantList } from './query'; +export type { EOptions, MultiTextPredicate } from './query'; + +// Expectation builders +export { expect, ExpectBuilder, EventuallyWrapper, withEventually } from './expect'; +export type { EventuallyConfig } from './expect'; diff --git a/src/asserts/query.ts b/src/asserts/query.ts new file mode 100644 index 00000000..a6b6f6e4 --- /dev/null +++ b/src/asserts/query.ts @@ -0,0 +1,369 @@ +/** + * Element query builders for assertion DSL. + * + * This module provides the E() query builder and dominant-group list operations + * for creating element queries that compile to existing Predicates. + * + * Key classes: + * - ElementQuery: Pure data object for filtering elements (E()) + * - ListQuery: Query over dominant-group elements (inDominantList()) + * - MultiQuery: Represents multiple elements from ListQuery.top(n) + * + * All queries work with existing Snapshot fields only: + * id, role, text, bbox, doc_y, group_key, group_index, + * dominant_group_key, in_viewport, is_occluded, href + */ + +import { Element, Snapshot } from '../types'; + +/** + * Options for creating an ElementQuery via E(). + */ +export interface EOptions { + /** ARIA role to match (e.g., "button", "textbox", "link") */ + role?: string; + /** Text to match exactly (alias for text, best-effort) */ + name?: string; + /** Exact text match */ + text?: string; + /** Substring match against text (case-insensitive) */ + textContains?: string; + /** Substring match against href (case-insensitive) */ + hrefContains?: string; + /** Filter by viewport visibility */ + inViewport?: boolean; + /** Filter by occlusion state */ + occluded?: boolean; + /** Exact match against group_key */ + group?: string; + /** True = must be in dominant group */ + inDominantGroup?: boolean; +} + +/** + * Pure query object for filtering elements. + * + * This is the data representation of an E() call. It does not execute + * anything - it just stores the filter criteria. + * + * @example + * E({ role: "button", textContains: "Save" }) + * E({ role: "link", hrefContains: "/cart" }) + * E({ inViewport: true, occluded: false }) + */ +export class ElementQuery { + role?: string; + name?: string; + text?: string; + textContains?: string; + hrefContains?: string; + inViewport?: boolean; + occluded?: boolean; + group?: string; + inDominantGroup?: boolean; + + // Internal: for ordinal selection from ListQuery + _groupIndex?: number; + _fromDominantList: boolean = false; + + constructor(options: EOptions = {}) { + this.role = options.role; + this.name = options.name; + this.text = options.text; + this.textContains = options.textContains; + this.hrefContains = options.hrefContains; + this.inViewport = options.inViewport; + this.occluded = options.occluded; + this.group = options.group; + this.inDominantGroup = options.inDominantGroup; + } + + /** + * Check if element matches this query criteria. + * + * @param element - Element to check + * @param snapshot - Snapshot (needed for dominant_group_key comparison) + * @returns True if element matches all criteria + */ + matches(element: Element, snapshot?: Snapshot | null): boolean { + // Role filter + if (this.role !== undefined) { + if (element.role !== this.role) { + return false; + } + } + + // Text exact match (name is alias for text) + const textToMatch = this.text ?? this.name; + if (textToMatch !== undefined) { + const elementText = element.text || ''; + if (elementText !== textToMatch) { + return false; + } + } + + // Text contains (substring, case-insensitive) + if (this.textContains !== undefined) { + const elementText = element.text || ''; + if (!elementText.toLowerCase().includes(this.textContains.toLowerCase())) { + return false; + } + } + + // Href contains (substring) + if (this.hrefContains !== undefined) { + const elementHref = element.href || ''; + if (!elementHref.toLowerCase().includes(this.hrefContains.toLowerCase())) { + return false; + } + } + + // In viewport filter + if (this.inViewport !== undefined) { + if (element.in_viewport !== this.inViewport) { + return false; + } + } + + // Occluded filter + if (this.occluded !== undefined) { + if (element.is_occluded !== this.occluded) { + return false; + } + } + + // Group key exact match + if (this.group !== undefined) { + if (element.group_key !== this.group) { + return false; + } + } + + // In dominant group check + if (this.inDominantGroup !== undefined) { + if (this.inDominantGroup) { + // Element must be in dominant group + if (!snapshot) { + return false; + } + if (element.group_key !== snapshot.dominant_group_key) { + return false; + } + } else { + // Element must NOT be in dominant group + if (snapshot && element.group_key === snapshot.dominant_group_key) { + return false; + } + } + } + + // Group index filter (from ListQuery.nth()) + if (this._groupIndex !== undefined) { + if (element.group_index !== this._groupIndex) { + return false; + } + } + + // Dominant list filter (from inDominantList()) + if (this._fromDominantList) { + if (!snapshot) { + return false; + } + if (element.group_key !== snapshot.dominant_group_key) { + return false; + } + } + + return true; + } + + /** + * Find all elements matching this query in the snapshot. + * + * @param snapshot - Snapshot to search + * @returns Array of matching elements, sorted by doc_y (top to bottom) + */ + findAll(snapshot: Snapshot): Element[] { + const matches = snapshot.elements.filter(el => this.matches(el, snapshot)); + // Sort by doc_y for consistent ordering (top to bottom) + matches.sort((a, b) => (a.doc_y ?? a.bbox.y) - (b.doc_y ?? b.bbox.y)); + return matches; + } + + /** + * Find first matching element. + * + * @param snapshot - Snapshot to search + * @returns First matching element or null + */ + findFirst(snapshot: Snapshot): Element | null { + const matches = this.findAll(snapshot); + return matches.length > 0 ? matches[0] : null; + } +} + +/** + * Interface for the E function with static convenience methods. + */ +interface EFunction { + (options?: EOptions): ElementQuery; + /** Query for submit-like buttons */ + submit: () => ElementQuery; + /** Query for search input boxes */ + searchBox: () => ElementQuery; + /** Query for links with optional text filter */ + link: (options?: { textContains?: string }) => ElementQuery; +} + +/** + * Create an element query. + * + * This is the main entry point for building element queries. + * It returns a pure data object that can be used with expect(). + * + * @param options - Query filter options + * @returns ElementQuery object + * + * @example + * E({ role: "button", textContains: "Save" }) + * E({ role: "link", hrefContains: "/checkout" }) + * E({ inViewport: true, occluded: false }) + */ +export const E: EFunction = Object.assign( + function (options: EOptions = {}): ElementQuery { + return new ElementQuery(options); + }, + { + /** + * Query for submit-like buttons. + * Matches buttons with text like "Submit", "Save", "Continue", etc. + */ + submit: function (): ElementQuery { + return new ElementQuery({ role: 'button', textContains: 'submit' }); + }, + + /** + * Query for search input boxes. + * Matches textbox/combobox with search-related names. + */ + searchBox: function (): ElementQuery { + return new ElementQuery({ role: 'textbox', name: 'search' }); + }, + + /** + * Query for links with optional text filter. + * + * @param options - Optional text filter + */ + link: function (options?: { textContains?: string }): ElementQuery { + return new ElementQuery({ role: 'link', textContains: options?.textContains }); + }, + } +); + +/** + * Internal predicate for MultiQuery text checks. + * Used by expect() to evaluate multi-element text assertions. + */ +export interface MultiTextPredicate { + multiQuery: MultiQuery; + text: string; + checkType: 'any_contains'; +} + +/** + * Represents multiple elements from a dominant list query. + * + * Created by ListQuery.top(n) to represent the first n elements + * in a dominant group. + * + * @example + * inDominantList().top(5) // First 5 items in dominant group + */ +export class MultiQuery { + limit: number; + _parentListQuery?: ListQuery; + + constructor(limit: number, parentListQuery?: ListQuery) { + this.limit = limit; + this._parentListQuery = parentListQuery; + } + + /** + * Create a predicate that checks if any element's text contains the substring. + * + * @param text - Substring to search for + * @returns Predicate that can be used with expect() + */ + anyTextContains(text: string): MultiTextPredicate { + return { + multiQuery: this, + text, + checkType: 'any_contains', + }; + } +} + +/** + * Query over elements in the dominant group. + * + * Provides ordinal access to dominant-group elements via .nth(k) + * and range access via .top(n). + * + * Created by inDominantList(). + * + * @example + * inDominantList().nth(0) // First item in dominant group + * inDominantList().top(5) // First 5 items + */ +export class ListQuery { + /** + * Select element at specific index in the dominant group. + * + * @param index - 0-based index in the dominant group + * @returns ElementQuery targeting the element at that position + * + * @example + * inDominantList().nth(0) // First item + * inDominantList().nth(2) // Third item + */ + nth(index: number): ElementQuery { + const query = new ElementQuery(); + query._groupIndex = index; + query._fromDominantList = true; + return query; + } + + /** + * Select the first n elements in the dominant group. + * + * @param n - Number of elements to select + * @returns MultiQuery representing the first n elements + * + * @example + * inDominantList().top(5) // First 5 items + */ + top(n: number): MultiQuery { + return new MultiQuery(n, this); + } +} + +/** + * Create a query over elements in the dominant group. + * + * The dominant group is the most common group_key in the snapshot, + * typically representing the main content list (search results, + * news feed items, product listings, etc.). + * + * @returns ListQuery for chaining .nth(k) or .top(n) + * + * @example + * inDominantList().nth(0) // First item in dominant group + * inDominantList().top(5) // First 5 items + * + * // With expect(): + * expect(inDominantList().nth(0)).toHaveTextContains("Show HN") + */ +export function inDominantList(): ListQuery { + return new ListQuery(); +} diff --git a/src/backends/actions.ts b/src/backends/actions.ts new file mode 100644 index 00000000..05a2fde0 --- /dev/null +++ b/src/backends/actions.ts @@ -0,0 +1,327 @@ +/** + * Backend-agnostic actions for browser-use integration. + * + * These actions work with any BrowserBackend implementation, + * enabling Sentience grounding with browser-use or other frameworks. + * + * Usage with browser-use: + * import { BrowserUseAdapter } from './backends/browser-use-adapter'; + * import { click, typeText, scroll } from './backends/actions'; + * + * const adapter = new BrowserUseAdapter(session); + * const backend = await adapter.createBackend(); + * + * // Take snapshot and click element + * const snap = await snapshot(backend); + * const element = find(snap, 'role=button[name="Submit"]'); + * await click(backend, element.bbox); + */ + +import type { ActionResult, BBox } from '../types'; +import type { BrowserBackend, MouseButton } from './protocol'; + +/** + * Target type for coordinate resolution. + * Can be a BBox (clicks center), {x, y} object, or [x, y] tuple. + */ +export type ClickTarget = + | BBox + | { x: number; y: number; width?: number; height?: number } + | [number, number]; + +/** + * Scroll behavior for scrollToElement. + */ +export type ScrollBehavior = 'smooth' | 'instant' | 'auto'; + +/** + * Vertical alignment for scrollToElement. + */ +export type ScrollBlock = 'start' | 'center' | 'end' | 'nearest'; + +/** + * Resolve target to (x, y) coordinates. + * + * - BBox: Returns center point + * - {x, y, width?, height?}: Returns center if width/height present, else x/y directly + * - [x, y]: Returns as-is + */ +function resolveCoordinates(target: ClickTarget): [number, number] { + if (Array.isArray(target)) { + return target; + } + + if ('width' in target && 'height' in target) { + // BBox or object with dimensions - compute center + const x = (target.x || 0) + (target.width || 0) / 2; + const y = (target.y || 0) + (target.height || 0) / 2; + return [x, y]; + } + + // Simple {x, y} object + return [target.x || 0, target.y || 0]; +} + +/** + * Helper to measure duration + */ +function measureDuration(startTime: number): number { + return Math.floor(Date.now() - startTime); +} + +/** + * Helper to create successful ActionResult + */ +function successResult(durationMs: number): ActionResult { + return { + success: true, + duration_ms: durationMs, + outcome: 'dom_updated', + }; +} + +/** + * Helper to create error ActionResult + */ +function errorResult(durationMs: number, code: string, reason: string): ActionResult { + return { + success: false, + duration_ms: durationMs, + outcome: 'error', + error: { code, reason }, + }; +} + +/** + * Click at coordinates using the backend. + * + * @param backend - BrowserBackend implementation + * @param target - Click target - BBox (clicks center), dict with x/y, or (x, y) tuple + * @param button - Mouse button to click + * @param clickCount - Number of clicks (1=single, 2=double) + * @param moveFirst - Whether to move mouse to position before clicking + * @returns ActionResult with success status + * + * @example + * // Click at coordinates + * await click(backend, [100, 200]); + * + * // Click element bbox center + * await click(backend, element.bbox); + * + * // Double-click + * await click(backend, element.bbox, 'left', 2); + */ +export async function click( + backend: BrowserBackend, + target: ClickTarget, + button: MouseButton = 'left', + clickCount: number = 1, + moveFirst: boolean = true +): Promise { + const startTime = Date.now(); + + const [x, y] = resolveCoordinates(target); + + try { + // Optional mouse move for hover effects + if (moveFirst) { + await backend.mouseMove(x, y); + await sleep(20); // Brief pause for hover + } + + // Perform click + await backend.mouseClick(x, y, button, clickCount); + + return successResult(measureDuration(startTime)); + } catch (e) { + const reason = e instanceof Error ? e.message : String(e); + return errorResult(measureDuration(startTime), 'click_failed', reason); + } +} + +/** + * Type text, optionally clicking a target first. + * + * @param backend - BrowserBackend implementation + * @param text - Text to type + * @param target - Optional click target before typing (BBox, dict, or tuple) + * @param clearFirst - If true, select all and delete before typing + * @returns ActionResult with success status + * + * @example + * // Type into focused element + * await typeText(backend, 'Hello World'); + * + * // Click input then type + * await typeText(backend, 'search query', searchBox.bbox); + * + * // Clear and type + * await typeText(backend, 'new value', input.bbox, true); + */ +export async function typeText( + backend: BrowserBackend, + text: string, + target?: ClickTarget, + clearFirst: boolean = false +): Promise { + const startTime = Date.now(); + + try { + // Click target if provided + if (target !== undefined) { + const [x, y] = resolveCoordinates(target); + await backend.mouseClick(x, y); + await sleep(50); // Wait for focus + } + + // Clear existing content if requested + if (clearFirst) { + // Select all and delete + await backend.eval("document.execCommand('selectAll')"); + await sleep(20); + } + + // Type the text + await backend.typeText(text); + + return successResult(measureDuration(startTime)); + } catch (e) { + const reason = e instanceof Error ? e.message : String(e); + return errorResult(measureDuration(startTime), 'type_failed', reason); + } +} + +/** + * Scroll the page or element. + * + * @param backend - BrowserBackend implementation + * @param deltaY - Scroll amount (positive=down, negative=up) + * @param target - Optional position for scroll (defaults to viewport center) + * @returns ActionResult with success status + * + * @example + * // Scroll down 300px + * await scroll(backend, 300); + * + * // Scroll up 500px + * await scroll(backend, -500); + * + * // Scroll at specific position + * await scroll(backend, 200, [500, 300]); + */ +export async function scroll( + backend: BrowserBackend, + deltaY: number = 300, + target?: ClickTarget +): Promise { + const startTime = Date.now(); + + try { + let x: number | undefined; + let y: number | undefined; + + if (target !== undefined) { + [x, y] = resolveCoordinates(target); + } + + await backend.wheel(deltaY, x, y); + + // Wait for scroll to settle + await sleep(100); + + return successResult(measureDuration(startTime)); + } catch (e) { + const reason = e instanceof Error ? e.message : String(e); + return errorResult(measureDuration(startTime), 'scroll_failed', reason); + } +} + +/** + * Scroll element into view using JavaScript scrollIntoView. + * + * @param backend - BrowserBackend implementation + * @param elementId - Element ID from snapshot (requires sentience_registry) + * @param behavior - Scroll behavior + * @param block - Vertical alignment + * @returns ActionResult with success status + */ +export async function scrollToElement( + backend: BrowserBackend, + elementId: number, + behavior: ScrollBehavior = 'instant', + block: ScrollBlock = 'center' +): Promise { + const startTime = Date.now(); + + try { + const scrolled = await backend.eval(` + (() => { + const el = window.sentience_registry && window.sentience_registry[${elementId}]; + if (el && el.scrollIntoView) { + el.scrollIntoView({ + behavior: '${behavior}', + block: '${block}', + inline: 'nearest' + }); + return true; + } + return false; + })() + `); + + // Wait for scroll animation + const waitTime = behavior === 'smooth' ? 300 : 50; + await sleep(waitTime); + + const durationMs = measureDuration(startTime); + + if (scrolled) { + return successResult(durationMs); + } else { + return errorResult(durationMs, 'scroll_failed', 'Element not found in registry'); + } + } catch (e) { + const reason = e instanceof Error ? e.message : String(e); + return errorResult(measureDuration(startTime), 'scroll_failed', reason); + } +} + +/** + * Wait for page to reach stable state. + * + * @param backend - BrowserBackend implementation + * @param state - Target document.readyState + * @param timeoutMs - Maximum wait time + * @returns ActionResult with success status + */ +export async function waitForStable( + backend: BrowserBackend, + state: 'interactive' | 'complete' = 'complete', + timeoutMs: number = 10000 +): Promise { + const startTime = Date.now(); + + try { + await backend.waitReadyState(state, timeoutMs); + + return successResult(measureDuration(startTime)); + } catch (e) { + const durationMs = measureDuration(startTime); + const reason = e instanceof Error ? e.message : String(e); + + // Check if it's a timeout error + if (reason.includes('Timed out')) { + return errorResult(durationMs, 'timeout', reason); + } + + return errorResult(durationMs, 'wait_failed', reason); + } +} + +/** + * Helper sleep function. + */ +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/src/backends/browser-use-adapter.ts b/src/backends/browser-use-adapter.ts new file mode 100644 index 00000000..63137382 --- /dev/null +++ b/src/backends/browser-use-adapter.ts @@ -0,0 +1,248 @@ +/** + * Browser-use adapter for Sentience SDK. + * + * This module provides BrowserUseAdapter which wraps browser-use's BrowserSession + * and provides a CDPBackend for Sentience operations. + * + * Usage: + * import { BrowserUseAdapter, BrowserUseCDPTransport } from './backends/browser-use-adapter'; + * + * // Create adapter with browser-use session + * const adapter = new BrowserUseAdapter(session); + * const backend = await adapter.createBackend(); + * + * // Use backend for Sentience operations + * const viewport = await backend.refreshPageInfo(); + * await backend.mouseClick(100, 200); + */ + +import { CDPBackend, CDPTransport } from './cdp-backend'; + +/** + * CDP transport implementation for browser-use. + * + * Wraps browser-use's CDP client to provide the CDPTransport interface. + * Uses cdp-use library pattern: cdpClient.send.Domain.method(params={}, sessionId=) + */ +export class BrowserUseCDPTransport implements CDPTransport { + private client: unknown; + private sessionId: string; + + /** + * Initialize transport with browser-use CDP client. + * + * @param cdpClient - browser-use's CDP client (from cdpSession.cdpClient) + * @param sessionId - CDP session ID (from cdpSession.sessionId) + */ + constructor(cdpClient: unknown, sessionId: string) { + this.client = cdpClient; + this.sessionId = sessionId; + } + + /** + * Send CDP command using browser-use's cdp-use client. + * + * Translates method name like "Runtime.evaluate" to + * cdpClient.send.Runtime.evaluate(params={...}, sessionId=...). + * + * @param method - CDP method name, e.g., "Runtime.evaluate" + * @param params - Method parameters + * @returns CDP response dict + */ + async send(method: string, params?: Record): Promise> { + // Split method into domain and method name + // e.g., "Runtime.evaluate" -> ("Runtime", "evaluate") + const parts = method.split('.', 2); + if (parts.length !== 2) { + throw new Error(`Invalid CDP method format: ${method}`); + } + + const [domainName, methodName] = parts; + + // Get the domain object from cdpClient.send + const clientAny = this.client as Record; + const send = clientAny.send as Record | undefined; + if (!send) { + throw new Error('CDP client does not have a send property'); + } + + const domain = send[domainName] as Record | undefined; + if (!domain) { + throw new Error(`Unknown CDP domain: ${domainName}`); + } + + // Get the method from the domain + const methodFunc = domain[methodName] as + | ((options: { params: Record; session_id: string }) => Promise) + | undefined; + if (!methodFunc || typeof methodFunc !== 'function') { + throw new Error(`Unknown CDP method: ${method}`); + } + + // Call the method with params and session_id + const result = await methodFunc({ + params: params || {}, + session_id: this.sessionId, + }); + + // cdp-use returns the result directly or null + return (result as Record) ?? {}; + } +} + +/** + * Adapter to use Sentience with browser-use's BrowserSession. + * + * This adapter: + * 1. Wraps browser-use's CDP client with BrowserUseCDPTransport + * 2. Creates CDPBackend for Sentience operations + * 3. Provides access to the underlying page for extension calls + * + * Example: + * import { BrowserSession, BrowserProfile } from 'browser-use'; + * import { getExtensionDir } from 'sentience'; + * import { BrowserUseAdapter } from './backends/browser-use-adapter'; + * + * // Setup browser-use with Sentience extension + * const profile = new BrowserProfile({ args: [`--load-extension=${getExtensionDir()}`] }); + * const session = new BrowserSession({ browserProfile: profile }); + * await session.start(); + * + * // Create adapter and backend + * const adapter = new BrowserUseAdapter(session); + * const backend = await adapter.createBackend(); + * + * // Navigate (using browser-use) + * const page = await session.getCurrentPage(); + * await page.goto('https://example.com'); + * + * // Use backend for precise clicking + * await backend.mouseClick(100, 200); + */ +export class BrowserUseAdapter { + private session: unknown; + private backend: CDPBackend | null = null; + private transport: BrowserUseCDPTransport | null = null; + + /** + * Initialize adapter with browser-use BrowserSession. + * + * @param session - browser-use BrowserSession instance + */ + constructor(session: unknown) { + this.session = session; + } + + /** + * Get the current Playwright page from browser-use. + * + * This is needed for Sentience snapshot() which calls window.sentience.snapshot(). + * + * @returns Playwright Page object + */ + get page(): unknown { + const sessionAny = this.session as Record; + + // browser-use stores page in session + // Access pattern may vary by browser-use version + if ('page' in sessionAny) { + return sessionAny.page; + } + if ('_page' in sessionAny) { + return sessionAny._page; + } + if ('getCurrentPage' in sessionAny) { + // This is async, but we need sync access for property + // Caller should use getPageAsync() instead + throw new Error('Use await adapter.getPageAsync() to get the page'); + } + throw new Error('Could not find page in browser-use session'); + } + + /** + * Get the current Playwright page (async). + * + * @returns Playwright Page object + */ + async getPageAsync(): Promise { + const sessionAny = this.session as Record; + + if ('getCurrentPage' in sessionAny && typeof sessionAny.getCurrentPage === 'function') { + return await sessionAny.getCurrentPage(); + } + return this.page; + } + + /** + * API key for Sentience API (for snapshot compatibility). + * + * Returns null since browser-use users pass apiKey via SnapshotOptions. + */ + get apiKey(): string | null { + return null; + } + + /** + * API URL for Sentience API (for snapshot compatibility). + * + * Returns null to use default. + */ + get apiUrl(): string | null { + return null; + } + + /** + * Create CDP backend for Sentience operations. + * + * This method: + * 1. Gets or creates a CDP session from browser-use + * 2. Creates BrowserUseCDPTransport to wrap the CDP client + * 3. Creates CDPBackend with the transport + * + * @returns CDPBackend instance ready for use + * @throws Error if CDP session cannot be created + */ + async createBackend(): Promise { + if (this.backend !== null) { + return this.backend; + } + + const sessionAny = this.session as Record; + + // Get CDP session from browser-use + // browser-use uses: cdpSession = await session.getOrCreateCdpSession() + if ( + !('getOrCreateCdpSession' in sessionAny) || + typeof sessionAny.getOrCreateCdpSession !== 'function' + ) { + throw new Error( + 'browser-use session does not have getOrCreateCdpSession method. ' + + "Make sure you're using a compatible version of browser-use." + ); + } + + const cdpSession = (await sessionAny.getOrCreateCdpSession()) as Record; + + // Extract CDP client and session ID + const cdpClient = cdpSession.cdpClient; + const sessionId = cdpSession.sessionId as string; + + // Create transport and backend + this.transport = new BrowserUseCDPTransport(cdpClient, sessionId); + this.backend = new CDPBackend(this.transport); + + return this.backend; + } + + /** + * Get the CDP transport (creates backend if needed). + * + * @returns BrowserUseCDPTransport instance + */ + async getTransport(): Promise { + if (this.transport === null) { + await this.createBackend(); + } + return this.transport!; + } +} diff --git a/src/backends/cdp-backend.ts b/src/backends/cdp-backend.ts new file mode 100644 index 00000000..b21a0942 --- /dev/null +++ b/src/backends/cdp-backend.ts @@ -0,0 +1,341 @@ +/** + * CDP Backend implementation for browser-use integration. + * + * This module provides CDPBackend, which implements BrowserBackend protocol + * using Chrome DevTools Protocol (CDP) commands. + * + * Usage with browser-use: + * import { CDPBackend, CDPTransport } from './backends/cdp-backend'; + * + * // Create transport from browser-use CDP client + * const transport: CDPTransport = { + * send: async (method, params) => { + * // Call browser-use's CDP client + * return await cdpClient.send[domain][method](params, sessionId); + * } + * }; + * + * const backend = new CDPBackend(transport); + * + * // Now use backend for Sentience operations + * const viewport = await backend.refreshPageInfo(); + * await backend.mouseClick(100, 200); + */ + +import { BrowserBackend, LayoutMetrics, MouseButton, ReadyState, ViewportInfo } from './protocol'; + +/** + * Protocol for CDP transport layer. + * + * This abstracts the actual CDP communication, allowing different + * implementations (browser-use, Playwright CDP, raw WebSocket). + */ +export interface CDPTransport { + /** + * Send a CDP command and return the result. + * + * @param method - CDP method name, e.g., "Runtime.evaluate" + * @param params - Method parameters + * @returns CDP response dict + */ + send(method: string, params?: Record): Promise>; +} + +/** + * CDP-based implementation of BrowserBackend. + * + * This backend uses CDP commands to interact with the browser, + * making it compatible with browser-use's CDP client. + */ +export class CDPBackend implements BrowserBackend { + private transport: CDPTransport; + private cachedViewport: ViewportInfo | null = null; + private executionContextId: number | null = null; + + constructor(transport: CDPTransport) { + this.transport = transport; + } + + private async getExecutionContext(): Promise { + if (this.executionContextId !== null) { + return this.executionContextId; + } + + // Enable Runtime domain if not already enabled + try { + await this.transport.send('Runtime.enable'); + } catch { + // May already be enabled + } + + // Get the main frame's execution context + const result = await this.transport.send('Runtime.evaluate', { + expression: '1', + returnByValue: true, + }); + + // Extract context ID from the result + if ('executionContextId' in result) { + this.executionContextId = result.executionContextId as number; + } else { + // Fallback: use context ID 1 (main frame) + this.executionContextId = 1; + } + + return this.executionContextId; + } + + async refreshPageInfo(): Promise { + const result = (await this.eval(`(() => ({ + width: window.innerWidth, + height: window.innerHeight, + scrollX: window.scrollX, + scrollY: window.scrollY, + contentWidth: document.documentElement.scrollWidth, + contentHeight: document.documentElement.scrollHeight + }))()`)) as Record; + + this.cachedViewport = { + width: (result.width as number) || 0, + height: (result.height as number) || 0, + scrollX: (result.scrollX as number) || 0, + scrollY: (result.scrollY as number) || 0, + contentWidth: result.contentWidth as number | undefined, + contentHeight: result.contentHeight as number | undefined, + }; + return this.cachedViewport; + } + + async eval(expression: string): Promise { + const result = await this.transport.send('Runtime.evaluate', { + expression, + returnByValue: true, + awaitPromise: true, + }); + + // Check for exceptions + if ('exceptionDetails' in result) { + const exc = result.exceptionDetails as Record; + const text = (exc.text as string) || 'Unknown error'; + throw new Error(`JavaScript evaluation failed: ${text}`); + } + + // Extract value from result + if ('result' in result) { + const res = result.result as Record; + if (res.type === 'undefined') { + return null; + } + return res.value; + } + + return null; + } + + async call(functionDeclaration: string, args?: unknown[]): Promise { + // Build call arguments + const callArgs: Array<{ value: unknown }> = []; + if (args) { + for (const arg of args) { + callArgs.push({ value: arg }); + } + } + + // We need an object ID to call function on + // Use globalThis (window) as the target + const globalResult = await this.transport.send('Runtime.evaluate', { + expression: 'globalThis', + returnByValue: false, + }); + + const resultObj = globalResult.result as Record | undefined; + const objectId = resultObj?.objectId as string | undefined; + + if (!objectId) { + // Fallback: evaluate the function directly + if (args && args.length > 0) { + const argsJson = args + .map(a => (typeof a === 'string' ? JSON.stringify(a) : String(a))) + .join(', '); + const expression = `(${functionDeclaration})(${argsJson})`; + return await this.eval(expression); + } else { + const expression = `(${functionDeclaration})()`; + return await this.eval(expression); + } + } + + const result = await this.transport.send('Runtime.callFunctionOn', { + functionDeclaration, + objectId, + arguments: callArgs, + returnByValue: true, + awaitPromise: true, + }); + + // Check for exceptions + if ('exceptionDetails' in result) { + const exc = result.exceptionDetails as Record; + const text = (exc.text as string) || 'Unknown error'; + throw new Error(`JavaScript call failed: ${text}`); + } + + // Extract value from result + if ('result' in result) { + const res = result.result as Record; + if (res.type === 'undefined') { + return null; + } + return res.value; + } + + return null; + } + + async getLayoutMetrics(): Promise { + const result = await this.transport.send('Page.getLayoutMetrics'); + + // Extract metrics from result + const layoutViewport = (result.layoutViewport as Record) || {}; + const contentSize = (result.contentSize as Record) || {}; + const visualViewport = (result.visualViewport as Record) || {}; + + return { + viewportX: (visualViewport.pageX as number) || 0, + viewportY: (visualViewport.pageY as number) || 0, + viewportWidth: + (visualViewport.clientWidth as number) || (layoutViewport.clientWidth as number) || 0, + viewportHeight: + (visualViewport.clientHeight as number) || (layoutViewport.clientHeight as number) || 0, + contentWidth: (contentSize.width as number) || 0, + contentHeight: (contentSize.height as number) || 0, + deviceScaleFactor: (visualViewport.scale as number) || 1.0, + }; + } + + async screenshotPng(): Promise { + const result = await this.transport.send('Page.captureScreenshot', { + format: 'png', + captureBeyondViewport: false, + }); + + return (result.data as string) || ''; + } + + async mouseMove(x: number, y: number): Promise { + await this.transport.send('Input.dispatchMouseEvent', { + type: 'mouseMoved', + x, + y, + }); + } + + async mouseClick( + x: number, + y: number, + button: MouseButton = 'left', + clickCount: number = 1 + ): Promise { + // Mouse down + await this.transport.send('Input.dispatchMouseEvent', { + type: 'mousePressed', + x, + y, + button, + clickCount, + }); + + // Small delay between press and release + await this.sleep(50); + + // Mouse up + await this.transport.send('Input.dispatchMouseEvent', { + type: 'mouseReleased', + x, + y, + button, + clickCount, + }); + } + + async wheel(deltaY: number, x?: number, y?: number): Promise { + // Get viewport center if coordinates not provided + if (x === undefined || y === undefined) { + if (this.cachedViewport === null) { + await this.refreshPageInfo(); + } + x = x ?? (this.cachedViewport?.width ?? 0) / 2; + y = y ?? (this.cachedViewport?.height ?? 0) / 2; + } + + await this.transport.send('Input.dispatchMouseEvent', { + type: 'mouseWheel', + x, + y, + deltaX: 0, + deltaY, + }); + } + + async typeText(text: string): Promise { + for (const char of text) { + // Key down + await this.transport.send('Input.dispatchKeyEvent', { + type: 'keyDown', + text: char, + }); + + // Char event (for text input) + await this.transport.send('Input.dispatchKeyEvent', { + type: 'char', + text: char, + }); + + // Key up + await this.transport.send('Input.dispatchKeyEvent', { + type: 'keyUp', + text: char, + }); + + // Small delay between characters + await this.sleep(10); + } + } + + async waitReadyState( + state: ReadyState = 'interactive', + timeoutMs: number = 15000 + ): Promise { + const startTime = Date.now(); + + // Map state to acceptable states + const acceptableStates: Set = + state === 'complete' ? new Set(['complete']) : new Set(['interactive', 'complete']); + + while (true) { + const elapsed = Date.now() - startTime; + if (elapsed >= timeoutMs) { + throw new Error( + `Timed out waiting for document.readyState='${state}' after ${timeoutMs}ms` + ); + } + + const currentState = (await this.eval('document.readyState')) as string; + if (acceptableStates.has(currentState)) { + return; + } + + // Poll every 100ms + await this.sleep(100); + } + } + + async getUrl(): Promise { + const result = await this.eval('window.location.href'); + return (result as string) || ''; + } + + private sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); + } +} diff --git a/src/backends/index.ts b/src/backends/index.ts new file mode 100644 index 00000000..4b4f51df --- /dev/null +++ b/src/backends/index.ts @@ -0,0 +1,115 @@ +/** + * Browser backend abstractions for Sentience SDK. + * + * This module provides backend protocols and implementations that allow + * Sentience actions (click, type, scroll) to work with different browser + * automation frameworks. + * + * Supported Backends + * ------------------ + * + * **CDPBackend** + * Low-level CDP (Chrome DevTools Protocol) backend. Use this when you have + * direct access to a CDP client and session. + * + * **BrowserUseAdapter** + * High-level adapter for browser-use framework. Automatically creates a + * CDPBackend from a BrowserSession. + * + * Quick Start with browser-use + * ---------------------------- + * + * import { BrowserSession, BrowserProfile } from 'browser-use'; + * import { getExtensionDir } from 'sentience'; + * import { BrowserUseAdapter, snapshot, click, typeText } from 'sentience/backends'; + * + * // Setup browser-use with Sentience extension + * const profile = new BrowserProfile({ args: [`--load-extension=${getExtensionDir()}`] }); + * const session = new BrowserSession({ browserProfile: profile }); + * await session.start(); + * + * // Create adapter and backend + * const adapter = new BrowserUseAdapter(session); + * const backend = await adapter.createBackend(); + * + * // Take snapshot and interact with elements + * const snap = await snapshot(backend); + * const searchBox = find(snap, 'role=textbox[name*="Search"]'); + * await click(backend, searchBox.bbox); + * await typeText(backend, 'Sentience AI'); + * + * Snapshot Caching + * ---------------- + * + * Use CachedSnapshot to reduce redundant snapshot calls in action loops: + * + * import { CachedSnapshot } from 'sentience/backends'; + * + * const cache = new CachedSnapshot(backend, 2000); + * + * const snap1 = await cache.get(); // Takes fresh snapshot + * const snap2 = await cache.get(); // Returns cached if < 2s old + * + * await click(backend, element.bbox); + * cache.invalidate(); // Force refresh on next get() + * + * Error Handling + * -------------- + * + * The module provides specific exceptions for common failure modes: + * + * - `ExtensionNotLoadedError`: Extension not loaded in browser launch args + * - `SnapshotError`: window.sentience.snapshot() failed + * + * import { ExtensionNotLoadedError, snapshot } from 'sentience/backends'; + * + * try { + * const snap = await snapshot(backend); + * } catch (e) { + * if (e instanceof ExtensionNotLoadedError) { + * console.log(`Fix suggestion: ${e.message}`); + * } + * } + */ + +// Protocol and types +export { BrowserBackend, ViewportInfo, LayoutMetrics, MouseButton, ReadyState } from './protocol'; + +// CDP Backend +export { CDPTransport, CDPBackend } from './cdp-backend'; + +// browser-use adapter +export { BrowserUseAdapter, BrowserUseCDPTransport } from './browser-use-adapter'; + +// Backend-agnostic functions +export { + snapshot, + CachedSnapshot, + SnapshotOptions, + ScreenshotOptions, + SnapshotFilter, + ExtensionNotLoadedError, + SnapshotError, + ExtensionDiagnostics, +} from './snapshot'; + +// Actions +export { + click, + typeText, + scroll, + scrollToElement, + waitForStable, + ClickTarget, + ScrollBehavior, + ScrollBlock, +} from './actions'; + +// SentienceContext (Token-Slasher Context Middleware) +export { + SentienceContext, + SentienceContextState, + SentienceContextOptions, + TopElementSelector, + BuildOptions, +} from './sentience-context'; diff --git a/src/backends/protocol.ts b/src/backends/protocol.ts new file mode 100644 index 00000000..fb794db0 --- /dev/null +++ b/src/backends/protocol.ts @@ -0,0 +1,175 @@ +/** + * v0 BrowserBackend Protocol - Minimal interface for browser-use integration. + * + * This protocol defines the minimal interface required to: + * - Take Sentience snapshots (DOM/geometry via extension) + * - Compute viewport-coord clicks + * - Scroll + re-snapshot + click + * - Stabilize after action + * + * No navigation API required (browser-use already handles navigation). + * + * Design principle: Keep it so small that nothing can break. + */ + +/** + * Viewport and scroll position information. + */ +export interface ViewportInfo { + width: number; + height: number; + scrollX: number; + scrollY: number; + contentWidth?: number; + contentHeight?: number; +} + +/** + * Page layout metrics from CDP Page.getLayoutMetrics. + */ +export interface LayoutMetrics { + // Viewport dimensions + viewportX: number; + viewportY: number; + viewportWidth: number; + viewportHeight: number; + + // Content dimensions (scrollable area) + contentWidth: number; + contentHeight: number; + + // Device scale factor + deviceScaleFactor: number; +} + +/** + * Mouse button type for click operations. + */ +export type MouseButton = 'left' | 'right' | 'middle'; + +/** + * Document ready state for wait operations. + */ +export type ReadyState = 'interactive' | 'complete'; + +/** + * Minimal backend protocol for v0 proof-of-concept. + * + * This is enough to: + * - Take Sentience snapshots (DOM/geometry via extension) + * - Execute JavaScript for element interaction + * - Perform mouse operations (move, click, scroll) + * - Wait for page stability + * + * Implementers: + * - CDPBackend: For browser-use integration via CDP + * - PlaywrightBackend: Wrapper around existing SentienceBrowser (future) + */ +export interface BrowserBackend { + /** + * Cache viewport + scroll offsets + url; cheap & safe to call often. + * + * @returns ViewportInfo with current viewport state + */ + refreshPageInfo(): Promise; + + /** + * Evaluate JavaScript expression in page context. + * + * Uses CDP Runtime.evaluate with returnByValue=True. + * + * @param expression - JavaScript expression to evaluate + * @returns Result value (JSON-serializable) + */ + eval(expression: string): Promise; + + /** + * Call a JavaScript function with arguments. + * + * Uses CDP Runtime.callFunctionOn for safe argument passing. + * Safer than eval() for passing complex arguments. + * + * @param functionDeclaration - JavaScript function body, e.g., "(x, y) => x + y" + * @param args - Arguments to pass to the function + * @returns Result value (JSON-serializable) + */ + call(functionDeclaration: string, args?: unknown[]): Promise; + + /** + * Get page layout metrics. + * + * Uses CDP Page.getLayoutMetrics to get viewport and content dimensions. + * + * @returns LayoutMetrics with viewport and content size info + */ + getLayoutMetrics(): Promise; + + /** + * Capture viewport screenshot as PNG bytes. + * + * Uses CDP Page.captureScreenshot. + * + * @returns PNG image as base64 string + */ + screenshotPng(): Promise; + + /** + * Move mouse to viewport coordinates. + * + * Uses CDP Input.dispatchMouseEvent with type="mouseMoved". + * + * @param x - X coordinate in viewport + * @param y - Y coordinate in viewport + */ + mouseMove(x: number, y: number): Promise; + + /** + * Click at viewport coordinates. + * + * Uses CDP Input.dispatchMouseEvent with mousePressed + mouseReleased. + * + * @param x - X coordinate in viewport + * @param y - Y coordinate in viewport + * @param button - Mouse button to click (default: 'left') + * @param clickCount - Number of clicks (1 for single, 2 for double) + */ + mouseClick(x: number, y: number, button?: MouseButton, clickCount?: number): Promise; + + /** + * Scroll using mouse wheel. + * + * Uses CDP Input.dispatchMouseEvent with type="mouseWheel". + * + * @param deltaY - Scroll amount (positive = down, negative = up) + * @param x - X coordinate for scroll (default: viewport center) + * @param y - Y coordinate for scroll (default: viewport center) + */ + wheel(deltaY: number, x?: number, y?: number): Promise; + + /** + * Type text using keyboard input. + * + * Uses CDP Input.dispatchKeyEvent for each character. + * + * @param text - Text to type + */ + typeText(text: string): Promise; + + /** + * Wait for document.readyState to reach target state. + * + * Uses polling instead of CDP events (no leak from unregistered listeners). + * + * @param state - Target state ("interactive" or "complete") + * @param timeoutMs - Maximum time to wait in milliseconds + * @throws TimeoutError if state not reached within timeout + */ + waitReadyState(state?: ReadyState, timeoutMs?: number): Promise; + + /** + * Get current page URL. + * + * @returns Current page URL (window.location.href) + */ + getUrl(): Promise; +} diff --git a/src/backends/sentience-context.ts b/src/backends/sentience-context.ts new file mode 100644 index 00000000..084044d6 --- /dev/null +++ b/src/backends/sentience-context.ts @@ -0,0 +1,476 @@ +/** + * SentienceContext: Token-Slasher Context Middleware for browser-use. + * + * This module provides a compact, ranked DOM context block for browser-use agents, + * reducing tokens and improving reliability by using Sentience snapshots. + * + * Example usage: + * import { SentienceContext } from 'sentience/backends'; + * + * const ctx = new SentienceContext({ showOverlay: true }); + * const state = await ctx.build(browserSession, { goal: "Click the first Show HN post" }); + * if (state) { + * agent.addContext(state.promptBlock); // or however browser-use injects state + * } + */ + +import type { Element, Snapshot } from '../types'; +import type { BrowserBackend } from './protocol'; +import { BrowserUseAdapter } from './browser-use-adapter'; +import { snapshot, SnapshotOptions } from './snapshot'; + +/** + * Configuration for element selection strategy. + * + * The selector uses a 3-way merge to pick elements for the LLM context: + * 1. Top N by importance score (most actionable elements) + * 2. Top N from dominant group (for ordinal tasks like "click 3rd item") + * 3. Top N by position (elements at top of page, lowest doc_y) + * + * Elements are deduplicated across all three sources. + */ +export interface TopElementSelector { + /** Number of top elements to select by importance score (descending). Default: 60 */ + byImportance?: number; + /** Number of top elements to select from the dominant group (for ordinal tasks). Default: 15 */ + fromDominantGroup?: number; + /** Number of top elements to select by position (lowest doc_y = top of page). Default: 10 */ + byPosition?: number; +} + +/** + * Sentience context state with snapshot and formatted prompt block. + */ +export interface SentienceContextState { + url: string; + snapshot: Snapshot; + promptBlock: string; +} + +/** + * Options for SentienceContext initialization. + */ +export interface SentienceContextOptions { + /** Sentience API key for gateway mode */ + sentienceApiKey?: string; + /** Force API vs extension mode (auto-detected if undefined) */ + useApi?: boolean; + /** Maximum elements to fetch from snapshot. Default: 60 */ + maxElements?: number; + /** Show visual overlay highlighting elements in browser. Default: false */ + showOverlay?: boolean; + /** Configuration for element selection strategy */ + topElementSelector?: TopElementSelector; +} + +/** + * Options for the build() method. + */ +export interface BuildOptions { + /** Optional goal/task description (passed to gateway for reranking) */ + goal?: string; + /** Maximum time to wait for extension injection in milliseconds. Default: 5000 */ + waitForExtensionMs?: number; + /** Number of retry attempts on snapshot failure. Default: 2 */ + retries?: number; + /** Delay between retries in milliseconds. Default: 1000 */ + retryDelayMs?: number; +} + +/** Interactive roles that should be included in the context */ +const INTERACTIVE_ROLES = new Set([ + 'button', + 'link', + 'textbox', + 'searchbox', + 'combobox', + 'checkbox', + 'radio', + 'slider', + 'tab', + 'menuitem', + 'option', + 'switch', + 'cell', + 'a', + 'input', + 'select', + 'textarea', +]); + +/** + * Token-Slasher Context Middleware for browser-use. + * + * Creates a compact, ranked DOM context block using Sentience snapshots, + * reducing tokens and improving reliability for LLM-based browser agents. + * + * Example: + * import { SentienceContext } from 'sentience/backends'; + * + * const ctx = new SentienceContext({ showOverlay: true }); + * const state = await ctx.build(browserSession, { goal: "Click the first Show HN post" }); + * if (state) { + * agent.addContext(state.promptBlock); + * } + */ +export class SentienceContext { + private _apiKey: string | undefined; + private _useApi: boolean | undefined; + private _maxElements: number; + private _showOverlay: boolean; + private _selector: Required; + + constructor(options: SentienceContextOptions = {}) { + this._apiKey = options.sentienceApiKey; + this._useApi = options.useApi; + this._maxElements = options.maxElements ?? 60; + this._showOverlay = options.showOverlay ?? false; + this._selector = { + byImportance: options.topElementSelector?.byImportance ?? 60, + fromDominantGroup: options.topElementSelector?.fromDominantGroup ?? 15, + byPosition: options.topElementSelector?.byPosition ?? 10, + }; + } + + /** + * Build context state from browser session. + * + * Takes a snapshot using the Sentience extension and formats it for LLM consumption. + * Returns null if snapshot fails (extension not loaded, timeout, etc.). + * + * @param browserSession - Browser-use BrowserSession instance (or any object with getOrCreateCdpSession) + * @param options - Build options + * @returns SentienceContextState with snapshot and formatted prompt, or null if failed + */ + async build( + browserSession: unknown, + options: BuildOptions = {} + ): Promise { + const { goal, waitForExtensionMs = 5000, retries = 2, retryDelayMs = 1000 } = options; + + try { + // Create adapter and backend + const adapter = new BrowserUseAdapter(browserSession); + const backend = await adapter.createBackend(); + + // Wait for extension to inject (poll until ready or timeout) + await this._waitForExtension(backend, waitForExtensionMs); + + // Build snapshot options + const snapshotOptions: SnapshotOptions = { + limit: this._maxElements, + showOverlay: this._showOverlay, + goal, + }; + + // Set API options + if (this._apiKey) { + snapshotOptions.sentienceApiKey = this._apiKey; + } + if (this._useApi !== undefined) { + snapshotOptions.useApi = this._useApi; + } else if (this._apiKey) { + snapshotOptions.useApi = true; + } + + // Take snapshot with retry logic + let snap: Snapshot | null = null; + let lastError: Error | null = null; + + for (let attempt = 0; attempt < retries; attempt++) { + try { + snap = await snapshot(backend, snapshotOptions); + break; // Success + } catch (e) { + lastError = e instanceof Error ? e : new Error(String(e)); + if (attempt < retries - 1) { + console.debug( + `Sentience snapshot attempt ${attempt + 1} failed: ${lastError.message}, retrying...` + ); + await this._sleep(retryDelayMs); + } else { + console.warn( + `Sentience snapshot failed after ${retries} attempts: ${lastError.message}` + ); + return null; + } + } + } + + if (!snap) { + console.warn('Sentience snapshot returned null'); + return null; + } + + // Get URL from snapshot + const url = snap.url || ''; + + // Format for LLM + const formatted = this._formatSnapshotForLLM(snap); + + // Build prompt block + const promptBlock = + 'Elements: ID|role|text|imp|is_primary|docYq|ord|DG|href\n' + + 'Rules: ordinal→DG=1 then ord asc; otherwise imp desc. ' + + 'Use click(ID)/input_text(ID,...).\n' + + formatted; + + console.info(`SentienceContext snapshot: ${snap.elements.length} elements URL=${url}`); + + return { url, snapshot: snap, promptBlock }; + } catch (e) { + const error = e instanceof Error ? e : new Error(String(e)); + console.warn(`Sentience snapshot skipped: ${error.message}`); + return null; + } + } + + /** + * Format Sentience snapshot for LLM consumption. + * + * Creates an ultra-compact inventory of interactive elements optimized + * for minimal token usage. Uses 3-way selection: by importance, + * from dominant group, and by position. + * + * @param snap - Sentience Snapshot object + * @returns Formatted string with format: ID|role|text|imp|is_primary|docYq|ord|DG|href + */ + private _formatSnapshotForLLM(snap: Snapshot): string { + // Filter to interactive elements only + const interactiveElements: Element[] = snap.elements.filter(el => { + const role = (el.role || '').toLowerCase(); + return INTERACTIVE_ROLES.has(role); + }); + + // Sort by importance (descending) for importance-based selection + interactiveElements.sort((a, b) => (b.importance || 0) - (a.importance || 0)); + + // Get top N by importance (track by ID for deduplication) + const selectedIds = new Set(); + const selectedElements: Element[] = []; + + for (const el of interactiveElements.slice(0, this._selector.byImportance)) { + if (!selectedIds.has(el.id)) { + selectedIds.add(el.id); + selectedElements.push(el); + } + } + + // Get top elements from dominant group (for ordinal tasks) + // Prefer in_dominant_group field (uses fuzzy matching from gateway) + let dominantGroupElements = interactiveElements.filter(el => el.in_dominant_group === true); + + // Fallback to exact group_key match if in_dominant_group not populated + if (dominantGroupElements.length === 0 && snap.dominant_group_key) { + dominantGroupElements = interactiveElements.filter( + el => el.group_key === snap.dominant_group_key + ); + } + + // Sort by group_index for ordinal ordering + dominantGroupElements.sort((a, b) => (a.group_index ?? 999) - (b.group_index ?? 999)); + + for (const el of dominantGroupElements.slice(0, this._selector.fromDominantGroup)) { + if (!selectedIds.has(el.id)) { + selectedIds.add(el.id); + selectedElements.push(el); + } + } + + // Get top elements by position (lowest doc_y = top of page) + const getYPosition = (el: Element): number => { + if (el.doc_y !== undefined) return el.doc_y; + if (el.bbox) return el.bbox.y; + return Infinity; + }; + + const elementsByPosition = [...interactiveElements].sort((a, b) => { + const yDiff = getYPosition(a) - getYPosition(b); + if (yDiff !== 0) return yDiff; + // Tie-breaker: higher importance first + return (b.importance || 0) - (a.importance || 0); + }); + + for (const el of elementsByPosition.slice(0, this._selector.byPosition)) { + if (!selectedIds.has(el.id)) { + selectedIds.add(el.id); + selectedElements.push(el); + } + } + + // Compute local rank_in_group for dominant group elements + const rankInGroupMap = new Map(); + + // Get all dominant group elements for rank computation + let dgElementsForRank = interactiveElements.filter(el => el.in_dominant_group === true); + if (dgElementsForRank.length === 0 && snap.dominant_group_key) { + dgElementsForRank = interactiveElements.filter( + el => el.group_key === snap.dominant_group_key + ); + } + + // Sort by (doc_y, bbox.y, bbox.x, -importance) + dgElementsForRank.sort((a, b) => { + const docYA = a.doc_y ?? Infinity; + const docYB = b.doc_y ?? Infinity; + if (docYA !== docYB) return docYA - docYB; + + const bboxYA = a.bbox?.y ?? Infinity; + const bboxYB = b.bbox?.y ?? Infinity; + if (bboxYA !== bboxYB) return bboxYA - bboxYB; + + const bboxXA = a.bbox?.x ?? Infinity; + const bboxXB = b.bbox?.x ?? Infinity; + if (bboxXA !== bboxXB) return bboxXA - bboxXB; + + return (b.importance || 0) - (a.importance || 0); + }); + + dgElementsForRank.forEach((el, rank) => { + rankInGroupMap.set(el.id, rank); + }); + + // Format lines + const lines: string[] = []; + for (const el of selectedElements) { + // Get role (override to "link" if element has href) + let role = el.role || ''; + if (el.href) { + role = 'link'; + } else if (!role) { + // Generic fallback for interactive elements without explicit role + role = 'element'; + } + + // Get name/text (truncate aggressively, normalize whitespace) + let name = el.text || ''; + // Remove newlines and normalize whitespace + name = name.replace(/\s+/g, ' ').trim(); + if (name.length > 30) { + name = name.slice(0, 27) + '...'; + } + + // Extract fields + const importance = el.importance || 0; + const docY = el.doc_y || 0; + + // is_primary: from visual_cues.is_primary (boolean) + const isPrimary = el.visual_cues?.is_primary || false; + const isPrimaryFlag = isPrimary ? '1' : '0'; + + // docYq: bucketed doc_y (round to nearest 200 for smaller numbers) + const docYq = docY ? Math.round(docY / 200) : 0; + + // Determine if in dominant group + let inDg = el.in_dominant_group; + if (inDg === undefined && snap.dominant_group_key) { + // Fallback for older gateway versions + inDg = el.group_key === snap.dominant_group_key; + } + + // ord_val: rank_in_group if in dominant group + let ordVal: string | number = '-'; + if (inDg && rankInGroupMap.has(el.id)) { + ordVal = rankInGroupMap.get(el.id)!; + } + + // DG: 1 if dominant group, else 0 + const dgFlag = inDg ? '1' : '0'; + + // href: short token (domain or last path segment, or blank) + const href = this._compressHref(el.href); + + // Ultra-compact format: ID|role|text|imp|is_primary|docYq|ord|DG|href + const line = `${el.id}|${role}|${name}|${importance}|${isPrimaryFlag}|${docYq}|${ordVal}|${dgFlag}|${href}`; + lines.push(line); + } + + console.debug( + `Formatted ${lines.length} elements (top ${this._selector.byImportance} by importance + top ${this._selector.fromDominantGroup} from dominant group + top ${this._selector.byPosition} by position)` + ); + + return lines.join('\n'); + } + + /** + * Wait for Sentience extension to be ready in the browser. + * + * Polls window.sentience until it's defined or timeout is reached. + * + * @param backend - Browser backend with eval() method + * @param timeoutMs - Maximum time to wait in milliseconds + * @param pollIntervalMs - Interval between polls in milliseconds + * @returns true if extension is ready, false if timeout + */ + private async _waitForExtension( + backend: BrowserBackend, + timeoutMs: number = 5000, + pollIntervalMs: number = 100 + ): Promise { + let elapsedMs = 0; + + while (elapsedMs < timeoutMs) { + try { + const result = await backend.eval("typeof window.sentience !== 'undefined'"); + if (result === true) { + console.debug(`Sentience extension ready after ${elapsedMs}ms`); + return true; + } + } catch { + // Extension not ready yet, continue polling + } + + await this._sleep(pollIntervalMs); + elapsedMs += pollIntervalMs; + } + + console.warn(`Sentience extension not ready after ${timeoutMs}ms timeout`); + return false; + } + + /** + * Compress href into a short token for minimal tokens. + * + * @param href - Full URL or undefined + * @returns Short token (domain second-level or last path segment) + */ + private _compressHref(href: string | undefined): string { + if (!href) { + return ''; + } + + try { + // Check if it's a full URL + if (href.startsWith('http://') || href.startsWith('https://')) { + const url = new URL(href); + if (url.hostname) { + // Extract second-level domain (e.g., "github" from "github.com") + const parts = url.hostname.split('.'); + if (parts.length >= 2) { + return parts[parts.length - 2].slice(0, 10); + } + return url.hostname.slice(0, 10); + } + } + + // Handle relative URLs - use last path segment + const segments = href.split('/').filter(s => s); + if (segments.length > 0) { + return segments[segments.length - 1].slice(0, 10); + } + + return 'item'; + } catch { + return 'item'; + } + } + + private _sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + // Expose selector for testing + get selector(): Required { + return this._selector; + } +} diff --git a/src/backends/snapshot.ts b/src/backends/snapshot.ts new file mode 100644 index 00000000..d5dccb5f --- /dev/null +++ b/src/backends/snapshot.ts @@ -0,0 +1,403 @@ +/** + * Backend-agnostic snapshot for browser-use integration. + * + * Takes Sentience snapshots using BrowserBackend protocol, + * enabling element grounding with browser-use or other frameworks. + * + * Usage with browser-use: + * import { BrowserUseAdapter } from './backends/browser-use-adapter'; + * import { snapshot, CachedSnapshot } from './backends/snapshot'; + * + * const adapter = new BrowserUseAdapter(session); + * const backend = await adapter.createBackend(); + * + * // Take snapshot + * const snap = await snapshot(backend); + * console.log(`Found ${snap.elements.length} elements`); + * + * // With caching (reuse if fresh) + * const cache = new CachedSnapshot(backend, 2000); + * const snap1 = await cache.get(); // Fresh snapshot + * const snap2 = await cache.get(); // Returns cached if < 2s old + * cache.invalidate(); // Force refresh on next get() + */ + +import type { Snapshot } from '../types'; +import type { BrowserBackend } from './protocol'; + +/** + * Error thrown when Sentience extension is not loaded. + */ +export class ExtensionNotLoadedError extends Error { + constructor( + message: string, + public timeoutMs?: number, + public diagnostics?: ExtensionDiagnostics + ) { + super(message); + this.name = 'ExtensionNotLoadedError'; + } + + static fromTimeout( + timeoutMs: number, + diagnostics?: ExtensionDiagnostics + ): ExtensionNotLoadedError { + let message = `Sentience extension not loaded after ${timeoutMs}ms. `; + message += 'Make sure to launch browser with --load-extension='; + + if (diagnostics) { + message += `\n\nDiagnostics:\n${JSON.stringify(diagnostics, null, 2)}`; + } + + return new ExtensionNotLoadedError(message, timeoutMs, diagnostics); + } +} + +/** + * Error thrown when snapshot operation fails. + */ +export class SnapshotError extends Error { + constructor( + message: string, + public url?: string + ) { + super(message); + this.name = 'SnapshotError'; + } + + static fromNullResult(url?: string): SnapshotError { + let message = 'window.sentience.snapshot() returned null.'; + if (url) { + message += ` URL: ${url}`; + } + return new SnapshotError(message, url); + } +} + +/** + * Extension diagnostics for debugging. + */ +export interface ExtensionDiagnostics { + sentienceDefined?: boolean; + sentienceSnapshot?: boolean; + url?: string; + extensionId?: string | null; + hasContentScript?: boolean; + error?: string; +} + +/** + * Options for snapshot operations. + */ +export interface SnapshotOptions { + /** Maximum number of elements to return (default: 50) */ + limit?: number; + /** Whether to capture screenshot (default: false) */ + screenshot?: boolean | ScreenshotOptions; + /** Filter options for elements */ + filter?: SnapshotFilter; + /** Show visual overlay on page */ + showOverlay?: boolean; + /** Use server-side API (Pro/Enterprise tier) */ + useApi?: boolean; + /** API key for server-side processing */ + sentienceApiKey?: string; + /** Goal/task description for ordinal support and gateway reranking */ + goal?: string; +} + +/** + * Screenshot options. + */ +export interface ScreenshotOptions { + format?: 'png' | 'jpeg'; + quality?: number; +} + +/** + * Filter options for snapshot. + */ +export interface SnapshotFilter { + clickable?: boolean; + visible?: boolean; + inViewport?: boolean; +} + +/** + * Snapshot cache with staleness detection. + * + * Caches snapshots and returns cached version if still fresh. + * Useful for reducing redundant snapshot calls in action loops. + * + * Usage: + * const cache = new CachedSnapshot(backend, 2000); + * + * // First call takes fresh snapshot + * const snap1 = await cache.get(); + * + * // Second call returns cached if < 2s old + * const snap2 = await cache.get(); + * + * // Invalidate after actions that change DOM + * await click(backend, element.bbox); + * cache.invalidate(); + * + * // Next get() will take fresh snapshot + * const snap3 = await cache.get(); + */ +export class CachedSnapshot { + private backend: BrowserBackend; + private maxAgeMs: number; + private defaultOptions?: SnapshotOptions; + private cached: Snapshot | null = null; + private cachedAt: number = 0; + private cachedUrl: string | null = null; + + /** + * Initialize cached snapshot. + * + * @param backend - BrowserBackend implementation + * @param maxAgeMs - Maximum cache age in milliseconds (default: 2000) + * @param options - Default snapshot options + */ + constructor(backend: BrowserBackend, maxAgeMs: number = 2000, options?: SnapshotOptions) { + this.backend = backend; + this.maxAgeMs = maxAgeMs; + this.defaultOptions = options; + } + + /** + * Get snapshot, using cache if fresh. + * + * @param options - Override default options for this call + * @param forceRefresh - If true, always take fresh snapshot + * @returns Snapshot (cached or fresh) + */ + async get(options?: SnapshotOptions, forceRefresh: boolean = false): Promise { + // Check if we need to refresh + if (forceRefresh || this.isStale()) { + this.cached = await snapshot(this.backend, options || this.defaultOptions); + this.cachedAt = Date.now(); + this.cachedUrl = this.cached.url; + } + + return this.cached!; + } + + /** + * Invalidate cache, forcing refresh on next get(). + * + * Call this after actions that modify the DOM. + */ + invalidate(): void { + this.cached = null; + this.cachedAt = 0; + this.cachedUrl = null; + } + + /** + * Check if cache is stale and needs refresh. + */ + private isStale(): boolean { + if (this.cached === null) { + return true; + } + + // Check age + const ageMs = Date.now() - this.cachedAt; + if (ageMs > this.maxAgeMs) { + return true; + } + + return false; + } + + /** + * Check if a cached snapshot exists. + */ + get isCached(): boolean { + return this.cached !== null; + } + + /** + * Get age of cached snapshot in milliseconds. + */ + get ageMs(): number { + if (this.cached === null) { + return Infinity; + } + return Date.now() - this.cachedAt; + } +} + +/** + * Take a Sentience snapshot using the backend protocol. + * + * Requires: + * - Sentience extension loaded in browser (via --load-extension) + * - Extension injected window.sentience API + * + * @param backend - BrowserBackend implementation (CDPBackend, PlaywrightBackend, etc.) + * @param options - Snapshot options (limit, filter, screenshot, etc.) + * @returns Snapshot with elements, viewport, and optional screenshot + * + * @example + * import { BrowserUseAdapter } from './backends/browser-use-adapter'; + * import { snapshot } from './backends/snapshot'; + * + * const adapter = new BrowserUseAdapter(session); + * const backend = await adapter.createBackend(); + * + * // Basic snapshot (uses local extension) + * const snap = await snapshot(backend); + * + * // With options + * const snap = await snapshot(backend, { + * limit: 100, + * screenshot: true + * }); + */ +export async function snapshot( + backend: BrowserBackend, + options?: SnapshotOptions +): Promise { + const opts = options || {}; + + // Use local extension (Free tier) + return await snapshotViaExtension(backend, opts); +} + +/** + * Wait for Sentience extension to inject window.sentience API. + * + * @param backend - BrowserBackend implementation + * @param timeoutMs - Maximum wait time + * @throws ExtensionNotLoadedError if extension not injected within timeout + */ +async function waitForExtension(backend: BrowserBackend, timeoutMs: number = 5000): Promise { + const startTime = Date.now(); + let pollCount = 0; + + while (true) { + const elapsed = Date.now() - startTime; + pollCount++; + + if (elapsed >= timeoutMs) { + // Gather diagnostics + let diagnostics: ExtensionDiagnostics | undefined; + try { + const diagDict = (await backend.eval(` + (() => ({ + sentienceDefined: typeof window.sentience !== 'undefined', + sentienceSnapshot: typeof window.sentience?.snapshot === 'function', + url: window.location.href, + extensionId: document.documentElement.dataset.sentienceExtensionId || null, + hasContentScript: !!document.documentElement.dataset.sentienceExtensionId + }))() + `)) as ExtensionDiagnostics; + diagnostics = diagDict; + } catch (e) { + diagnostics = { + error: `Could not gather diagnostics: ${e instanceof Error ? e.message : String(e)}`, + }; + } + + throw ExtensionNotLoadedError.fromTimeout(timeoutMs, diagnostics); + } + + // Check if extension is ready + try { + const ready = await backend.eval( + "typeof window.sentience !== 'undefined' && " + + "typeof window.sentience.snapshot === 'function'" + ); + if (ready) { + return; + } + } catch { + // Keep polling + } + + await sleep(100); + } +} + +/** + * Take snapshot using local extension (Free tier). + */ +async function snapshotViaExtension( + backend: BrowserBackend, + options: SnapshotOptions +): Promise { + // Wait for extension injection + await waitForExtension(backend, 5000); + + // Build options dict for extension API + const extOptions = buildExtensionOptions(options); + + // Call extension's snapshot function + const result = (await backend.eval(` + (() => { + const options = ${JSON.stringify(extOptions)}; + return window.sentience.snapshot(options); + })() + `)) as Snapshot | null; + + if (result === null) { + // Try to get URL for better error message + let url: string | undefined; + try { + url = (await backend.eval('window.location.href')) as string; + } catch { + // Ignore + } + throw SnapshotError.fromNullResult(url); + } + + // Show overlay if requested + if (options.showOverlay) { + const rawElements = (result as unknown as Record).raw_elements; + if (rawElements) { + await backend.eval(` + (() => { + if (window.sentience && window.sentience.showOverlay) { + window.sentience.showOverlay(${JSON.stringify(rawElements)}, null); + } + })() + `); + } + } + + return result; +} + +/** + * Build options dict for extension API call. + */ +function buildExtensionOptions(options: SnapshotOptions): Record { + const extOptions: Record = {}; + + // Screenshot config + if (options.screenshot !== false && options.screenshot !== undefined) { + extOptions.screenshot = options.screenshot; + } + + // Limit (only if not default) + if (options.limit !== undefined && options.limit !== 50) { + extOptions.limit = options.limit; + } + + // Filter + if (options.filter !== undefined) { + extOptions.filter = options.filter; + } + + return extOptions; +} + +/** + * Helper sleep function. + */ +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/src/index.ts b/src/index.ts index c17354ed..68713998 100644 --- a/src/index.ts +++ b/src/index.ts @@ -67,3 +67,6 @@ export { selectByOrdinal, boostOrdinalElements, } from './ordinal'; + +// Backends (browser-use integration) +export * as backends from './backends'; diff --git a/tests/backends/actions.test.ts b/tests/backends/actions.test.ts new file mode 100644 index 00000000..f2d0c7e7 --- /dev/null +++ b/tests/backends/actions.test.ts @@ -0,0 +1,254 @@ +/** + * Tests for backend actions + */ + +import { + click, + typeText, + scroll, + scrollToElement, + waitForStable, + ClickTarget, +} from '../../src/backends/actions'; +import { BrowserBackend } from '../../src/backends/protocol'; + +describe('backends/actions', () => { + let mockBackend: jest.Mocked; + + beforeEach(() => { + mockBackend = { + refreshPageInfo: jest.fn().mockResolvedValue({ + width: 1920, + height: 1080, + scrollX: 0, + scrollY: 0, + }), + eval: jest.fn().mockResolvedValue(null), + call: jest.fn().mockResolvedValue(null), + getLayoutMetrics: jest.fn().mockResolvedValue({ + viewportX: 0, + viewportY: 0, + viewportWidth: 1920, + viewportHeight: 1080, + contentWidth: 1920, + contentHeight: 5000, + deviceScaleFactor: 1.0, + }), + screenshotPng: jest.fn().mockResolvedValue('base64data'), + mouseMove: jest.fn().mockResolvedValue(undefined), + mouseClick: jest.fn().mockResolvedValue(undefined), + wheel: jest.fn().mockResolvedValue(undefined), + typeText: jest.fn().mockResolvedValue(undefined), + waitReadyState: jest.fn().mockResolvedValue(undefined), + getUrl: jest.fn().mockResolvedValue('https://example.com'), + }; + }); + + describe('click', () => { + it('should click at tuple coordinates', async () => { + const result = await click(mockBackend, [100, 200]); + + expect(mockBackend.mouseMove).toHaveBeenCalledWith(100, 200); + expect(mockBackend.mouseClick).toHaveBeenCalledWith(100, 200, 'left', 1); + expect(result.success).toBe(true); + expect(result.outcome).toBe('dom_updated'); + }); + + it('should click at BBox center', async () => { + const bbox = { x: 100, y: 100, width: 50, height: 30 }; + const result = await click(mockBackend, bbox); + + // Center should be (125, 115) + expect(mockBackend.mouseMove).toHaveBeenCalledWith(125, 115); + expect(mockBackend.mouseClick).toHaveBeenCalledWith(125, 115, 'left', 1); + expect(result.success).toBe(true); + }); + + it('should click at object coordinates', async () => { + const result = await click(mockBackend, { x: 200, y: 300 }); + + expect(mockBackend.mouseMove).toHaveBeenCalledWith(200, 300); + expect(mockBackend.mouseClick).toHaveBeenCalledWith(200, 300, 'left', 1); + expect(result.success).toBe(true); + }); + + it('should support double-click', async () => { + const result = await click(mockBackend, [100, 200], 'left', 2); + + expect(mockBackend.mouseClick).toHaveBeenCalledWith(100, 200, 'left', 2); + expect(result.success).toBe(true); + }); + + it('should support right-click', async () => { + const result = await click(mockBackend, [100, 200], 'right'); + + expect(mockBackend.mouseClick).toHaveBeenCalledWith(100, 200, 'right', 1); + expect(result.success).toBe(true); + }); + + it('should skip mouse move when moveFirst is false', async () => { + const result = await click(mockBackend, [100, 200], 'left', 1, false); + + expect(mockBackend.mouseMove).not.toHaveBeenCalled(); + expect(mockBackend.mouseClick).toHaveBeenCalled(); + expect(result.success).toBe(true); + }); + + it('should return error result on failure', async () => { + mockBackend.mouseClick.mockRejectedValue(new Error('Click failed')); + + const result = await click(mockBackend, [100, 200]); + + expect(result.success).toBe(false); + expect(result.outcome).toBe('error'); + expect(result.error?.code).toBe('click_failed'); + expect(result.error?.reason).toContain('Click failed'); + }); + }); + + describe('typeText', () => { + it('should type text without target', async () => { + const result = await typeText(mockBackend, 'Hello World'); + + expect(mockBackend.mouseClick).not.toHaveBeenCalled(); + expect(mockBackend.typeText).toHaveBeenCalledWith('Hello World'); + expect(result.success).toBe(true); + }); + + it('should click target before typing', async () => { + const result = await typeText(mockBackend, 'Hello', [100, 200]); + + expect(mockBackend.mouseClick).toHaveBeenCalledWith(100, 200); + expect(mockBackend.typeText).toHaveBeenCalledWith('Hello'); + expect(result.success).toBe(true); + }); + + it('should clear before typing when clearFirst is true', async () => { + const result = await typeText(mockBackend, 'New text', [100, 200], true); + + expect(mockBackend.eval).toHaveBeenCalledWith("document.execCommand('selectAll')"); + expect(mockBackend.typeText).toHaveBeenCalledWith('New text'); + expect(result.success).toBe(true); + }); + + it('should return error result on failure', async () => { + mockBackend.typeText.mockRejectedValue(new Error('Type failed')); + + const result = await typeText(mockBackend, 'Hello'); + + expect(result.success).toBe(false); + expect(result.error?.code).toBe('type_failed'); + }); + }); + + describe('scroll', () => { + it('should scroll without target', async () => { + const result = await scroll(mockBackend, 300); + + expect(mockBackend.wheel).toHaveBeenCalledWith(300, undefined, undefined); + expect(result.success).toBe(true); + }); + + it('should scroll at target position', async () => { + const result = await scroll(mockBackend, 300, [500, 400]); + + expect(mockBackend.wheel).toHaveBeenCalledWith(300, 500, 400); + expect(result.success).toBe(true); + }); + + it('should scroll up with negative deltaY', async () => { + const result = await scroll(mockBackend, -500); + + expect(mockBackend.wheel).toHaveBeenCalledWith(-500, undefined, undefined); + expect(result.success).toBe(true); + }); + + it('should return error result on failure', async () => { + mockBackend.wheel.mockRejectedValue(new Error('Scroll failed')); + + const result = await scroll(mockBackend, 300); + + expect(result.success).toBe(false); + expect(result.error?.code).toBe('scroll_failed'); + }); + }); + + describe('scrollToElement', () => { + it('should scroll element into view', async () => { + mockBackend.eval.mockResolvedValue(true); + + const result = await scrollToElement(mockBackend, 42); + + expect(mockBackend.eval).toHaveBeenCalled(); + expect(result.success).toBe(true); + }); + + it('should fail when element not found', async () => { + mockBackend.eval.mockResolvedValue(false); + + const result = await scrollToElement(mockBackend, 42); + + expect(result.success).toBe(false); + expect(result.error?.reason).toContain('Element not found'); + }); + + it('should support smooth scrolling', async () => { + mockBackend.eval.mockResolvedValue(true); + + const result = await scrollToElement(mockBackend, 42, 'smooth'); + + expect(mockBackend.eval).toHaveBeenCalledWith(expect.stringContaining("behavior: 'smooth'")); + expect(result.success).toBe(true); + }); + + it('should support different block alignments', async () => { + mockBackend.eval.mockResolvedValue(true); + + const result = await scrollToElement(mockBackend, 42, 'instant', 'start'); + + expect(mockBackend.eval).toHaveBeenCalledWith(expect.stringContaining("block: 'start'")); + expect(result.success).toBe(true); + }); + }); + + describe('waitForStable', () => { + it('should wait for complete state', async () => { + const result = await waitForStable(mockBackend, 'complete'); + + expect(mockBackend.waitReadyState).toHaveBeenCalledWith('complete', 10000); + expect(result.success).toBe(true); + }); + + it('should wait for interactive state', async () => { + const result = await waitForStable(mockBackend, 'interactive'); + + expect(mockBackend.waitReadyState).toHaveBeenCalledWith('interactive', 10000); + expect(result.success).toBe(true); + }); + + it('should use custom timeout', async () => { + const result = await waitForStable(mockBackend, 'complete', 5000); + + expect(mockBackend.waitReadyState).toHaveBeenCalledWith('complete', 5000); + expect(result.success).toBe(true); + }); + + it('should return timeout error', async () => { + mockBackend.waitReadyState.mockRejectedValue(new Error('Timed out waiting')); + + const result = await waitForStable(mockBackend, 'complete', 100); + + expect(result.success).toBe(false); + expect(result.error?.code).toBe('timeout'); + }); + + it('should return wait_failed error for other failures', async () => { + mockBackend.waitReadyState.mockRejectedValue(new Error('Network error')); + + const result = await waitForStable(mockBackend, 'complete'); + + expect(result.success).toBe(false); + expect(result.error?.code).toBe('wait_failed'); + }); + }); +}); diff --git a/tests/backends/browser-use-adapter.test.ts b/tests/backends/browser-use-adapter.test.ts new file mode 100644 index 00000000..bcfbac62 --- /dev/null +++ b/tests/backends/browser-use-adapter.test.ts @@ -0,0 +1,239 @@ +/** + * Tests for BrowserUseAdapter + */ + +import { BrowserUseAdapter, BrowserUseCDPTransport } from '../../src/backends/browser-use-adapter'; + +describe('BrowserUseCDPTransport', () => { + it('should send CDP commands correctly', async () => { + const mockMethod = jest.fn().mockResolvedValue({ result: 'success' }); + const mockCdpClient = { + send: { + Runtime: { + evaluate: mockMethod, + }, + }, + }; + + const transport = new BrowserUseCDPTransport(mockCdpClient, 'session-123'); + + const result = await transport.send('Runtime.evaluate', { + expression: '1 + 1', + }); + + expect(mockMethod).toHaveBeenCalledWith({ + params: { expression: '1 + 1' }, + session_id: 'session-123', + }); + expect(result).toEqual({ result: 'success' }); + }); + + it('should throw on invalid method format', async () => { + const mockCdpClient = { + send: {}, + }; + + const transport = new BrowserUseCDPTransport(mockCdpClient, 'session-123'); + + await expect(transport.send('InvalidFormat')).rejects.toThrow('Invalid CDP method format'); + }); + + it('should throw on unknown domain', async () => { + const mockCdpClient = { + send: { + Runtime: {}, + }, + }; + + const transport = new BrowserUseCDPTransport(mockCdpClient, 'session-123'); + + await expect(transport.send('Unknown.method')).rejects.toThrow('Unknown CDP domain'); + }); + + it('should throw on unknown method', async () => { + const mockCdpClient = { + send: { + Runtime: {}, + }, + }; + + const transport = new BrowserUseCDPTransport(mockCdpClient, 'session-123'); + + await expect(transport.send('Runtime.unknownMethod')).rejects.toThrow('Unknown CDP method'); + }); + + it('should return empty object when method returns null', async () => { + const mockMethod = jest.fn().mockResolvedValue(null); + const mockCdpClient = { + send: { + Page: { + reload: mockMethod, + }, + }, + }; + + const transport = new BrowserUseCDPTransport(mockCdpClient, 'session-123'); + + const result = await transport.send('Page.reload'); + + expect(result).toEqual({}); + }); +}); + +describe('BrowserUseAdapter', () => { + describe('page access', () => { + it('should access page directly when available', () => { + const mockPage = { goto: jest.fn() }; + const mockSession = { page: mockPage }; + + const adapter = new BrowserUseAdapter(mockSession); + + expect(adapter.page).toBe(mockPage); + }); + + it('should access _page when page not available', () => { + const mockPage = { goto: jest.fn() }; + const mockSession = { _page: mockPage }; + + const adapter = new BrowserUseAdapter(mockSession); + + expect(adapter.page).toBe(mockPage); + }); + + it('should throw when only async method available', () => { + const mockSession = { + getCurrentPage: jest.fn(), + }; + + const adapter = new BrowserUseAdapter(mockSession); + + expect(() => adapter.page).toThrow('Use await adapter.getPageAsync()'); + }); + + it('should throw when page not found', () => { + const mockSession = {}; + + const adapter = new BrowserUseAdapter(mockSession); + + expect(() => adapter.page).toThrow('Could not find page'); + }); + }); + + describe('getPageAsync', () => { + it('should call getCurrentPage when available', async () => { + const mockPage = { goto: jest.fn() }; + const mockSession = { + getCurrentPage: jest.fn().mockResolvedValue(mockPage), + }; + + const adapter = new BrowserUseAdapter(mockSession); + const page = await adapter.getPageAsync(); + + expect(mockSession.getCurrentPage).toHaveBeenCalled(); + expect(page).toBe(mockPage); + }); + + it('should fallback to sync page access', async () => { + const mockPage = { goto: jest.fn() }; + const mockSession = { page: mockPage }; + + const adapter = new BrowserUseAdapter(mockSession); + const page = await adapter.getPageAsync(); + + expect(page).toBe(mockPage); + }); + }); + + describe('apiKey and apiUrl', () => { + it('should return null for apiKey', () => { + const adapter = new BrowserUseAdapter({}); + expect(adapter.apiKey).toBeNull(); + }); + + it('should return null for apiUrl', () => { + const adapter = new BrowserUseAdapter({}); + expect(adapter.apiUrl).toBeNull(); + }); + }); + + describe('createBackend', () => { + it('should create backend from session', async () => { + const mockCdpClient = { + send: { + Runtime: { + evaluate: jest.fn().mockResolvedValue({ result: { value: 1 } }), + }, + }, + }; + const mockCdpSession = { + cdpClient: mockCdpClient, + sessionId: 'session-123', + }; + const mockSession = { + getOrCreateCdpSession: jest.fn().mockResolvedValue(mockCdpSession), + }; + + const adapter = new BrowserUseAdapter(mockSession); + const backend = await adapter.createBackend(); + + expect(mockSession.getOrCreateCdpSession).toHaveBeenCalled(); + expect(backend).toBeDefined(); + }); + + it('should return cached backend on subsequent calls', async () => { + const mockCdpClient = { + send: { + Runtime: { + evaluate: jest.fn().mockResolvedValue({ result: { value: 1 } }), + }, + }, + }; + const mockCdpSession = { + cdpClient: mockCdpClient, + sessionId: 'session-123', + }; + const mockSession = { + getOrCreateCdpSession: jest.fn().mockResolvedValue(mockCdpSession), + }; + + const adapter = new BrowserUseAdapter(mockSession); + const backend1 = await adapter.createBackend(); + const backend2 = await adapter.createBackend(); + + expect(backend1).toBe(backend2); + expect(mockSession.getOrCreateCdpSession).toHaveBeenCalledTimes(1); + }); + + it('should throw when getOrCreateCdpSession not available', async () => { + const mockSession = {}; + + const adapter = new BrowserUseAdapter(mockSession); + + await expect(adapter.createBackend()).rejects.toThrow('does not have getOrCreateCdpSession'); + }); + }); + + describe('getTransport', () => { + it('should return transport after creating backend', async () => { + const mockCdpClient = { + send: { + Runtime: { + evaluate: jest.fn().mockResolvedValue({ result: { value: 1 } }), + }, + }, + }; + const mockCdpSession = { + cdpClient: mockCdpClient, + sessionId: 'session-123', + }; + const mockSession = { + getOrCreateCdpSession: jest.fn().mockResolvedValue(mockCdpSession), + }; + + const adapter = new BrowserUseAdapter(mockSession); + const transport = await adapter.getTransport(); + + expect(transport).toBeInstanceOf(BrowserUseCDPTransport); + }); + }); +}); diff --git a/tests/backends/cdp-backend.test.ts b/tests/backends/cdp-backend.test.ts new file mode 100644 index 00000000..34c734de --- /dev/null +++ b/tests/backends/cdp-backend.test.ts @@ -0,0 +1,380 @@ +/** + * Tests for CDP backend implementation + */ + +import { CDPBackend, CDPTransport } from '../../src/backends/cdp-backend'; + +describe('CDPBackend', () => { + let mockTransport: jest.Mocked; + let backend: CDPBackend; + + beforeEach(() => { + mockTransport = { + send: jest.fn(), + }; + backend = new CDPBackend(mockTransport); + }); + + describe('refreshPageInfo', () => { + it('should return viewport info from JavaScript evaluation', async () => { + mockTransport.send.mockResolvedValue({ + result: { + type: 'object', + value: { + width: 1920, + height: 1080, + scrollX: 0, + scrollY: 100, + contentWidth: 1920, + contentHeight: 5000, + }, + }, + }); + + const viewport = await backend.refreshPageInfo(); + + expect(viewport.width).toBe(1920); + expect(viewport.height).toBe(1080); + expect(viewport.scrollX).toBe(0); + expect(viewport.scrollY).toBe(100); + expect(viewport.contentWidth).toBe(1920); + expect(viewport.contentHeight).toBe(5000); + }); + }); + + describe('eval', () => { + it('should evaluate JavaScript expression', async () => { + mockTransport.send.mockResolvedValue({ + result: { + type: 'number', + value: 42, + }, + }); + + const result = await backend.eval('1 + 41'); + + expect(mockTransport.send).toHaveBeenCalledWith('Runtime.evaluate', { + expression: '1 + 41', + returnByValue: true, + awaitPromise: true, + }); + expect(result).toBe(42); + }); + + it('should return null for undefined result', async () => { + mockTransport.send.mockResolvedValue({ + result: { + type: 'undefined', + }, + }); + + const result = await backend.eval('undefined'); + + expect(result).toBeNull(); + }); + + it('should throw on JavaScript exceptions', async () => { + mockTransport.send.mockResolvedValue({ + exceptionDetails: { + text: 'ReferenceError: foo is not defined', + }, + }); + + await expect(backend.eval('foo')).rejects.toThrow('JavaScript evaluation failed'); + }); + }); + + describe('call', () => { + it('should call function with arguments', async () => { + // Mock globalThis lookup + mockTransport.send.mockResolvedValueOnce({ + result: { + type: 'object', + objectId: 'global-object-id', + }, + }); + + // Mock callFunctionOn + mockTransport.send.mockResolvedValueOnce({ + result: { + type: 'number', + value: 15, + }, + }); + + const result = await backend.call('(x, y) => x + y', [5, 10]); + + expect(mockTransport.send).toHaveBeenCalledWith('Runtime.callFunctionOn', { + functionDeclaration: '(x, y) => x + y', + objectId: 'global-object-id', + arguments: [{ value: 5 }, { value: 10 }], + returnByValue: true, + awaitPromise: true, + }); + expect(result).toBe(15); + }); + + it('should fallback to eval when no objectId available', async () => { + // Mock globalThis lookup without objectId + mockTransport.send.mockResolvedValueOnce({ + result: { + type: 'object', + }, + }); + + // Mock fallback eval + mockTransport.send.mockResolvedValueOnce({ + result: { + type: 'number', + value: 15, + }, + }); + + const result = await backend.call('(x, y) => x + y', [5, 10]); + + expect(result).toBe(15); + }); + }); + + describe('getLayoutMetrics', () => { + it('should return layout metrics from CDP', async () => { + mockTransport.send.mockResolvedValue({ + layoutViewport: { + clientWidth: 1920, + clientHeight: 1080, + }, + contentSize: { + width: 1920, + height: 5000, + }, + visualViewport: { + pageX: 0, + pageY: 100, + clientWidth: 1920, + clientHeight: 1080, + scale: 1.5, + }, + }); + + const metrics = await backend.getLayoutMetrics(); + + expect(mockTransport.send).toHaveBeenCalledWith('Page.getLayoutMetrics'); + expect(metrics.viewportX).toBe(0); + expect(metrics.viewportY).toBe(100); + expect(metrics.viewportWidth).toBe(1920); + expect(metrics.viewportHeight).toBe(1080); + expect(metrics.contentWidth).toBe(1920); + expect(metrics.contentHeight).toBe(5000); + expect(metrics.deviceScaleFactor).toBe(1.5); + }); + }); + + describe('screenshotPng', () => { + it('should capture and return screenshot data', async () => { + mockTransport.send.mockResolvedValue({ + data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', + }); + + const screenshot = await backend.screenshotPng(); + + expect(mockTransport.send).toHaveBeenCalledWith('Page.captureScreenshot', { + format: 'png', + captureBeyondViewport: false, + }); + expect(screenshot).toContain('iVBORw0KGgo'); + }); + }); + + describe('mouseMove', () => { + it('should dispatch mouse move event', async () => { + mockTransport.send.mockResolvedValue({}); + + await backend.mouseMove(100, 200); + + expect(mockTransport.send).toHaveBeenCalledWith('Input.dispatchMouseEvent', { + type: 'mouseMoved', + x: 100, + y: 200, + }); + }); + }); + + describe('mouseClick', () => { + it('should dispatch mouse press and release events', async () => { + mockTransport.send.mockResolvedValue({}); + + await backend.mouseClick(100, 200, 'left', 1); + + expect(mockTransport.send).toHaveBeenCalledWith('Input.dispatchMouseEvent', { + type: 'mousePressed', + x: 100, + y: 200, + button: 'left', + clickCount: 1, + }); + + expect(mockTransport.send).toHaveBeenCalledWith('Input.dispatchMouseEvent', { + type: 'mouseReleased', + x: 100, + y: 200, + button: 'left', + clickCount: 1, + }); + }); + + it('should support double-click', async () => { + mockTransport.send.mockResolvedValue({}); + + await backend.mouseClick(100, 200, 'left', 2); + + expect(mockTransport.send).toHaveBeenCalledWith( + 'Input.dispatchMouseEvent', + expect.objectContaining({ + clickCount: 2, + }) + ); + }); + + it('should support right-click', async () => { + mockTransport.send.mockResolvedValue({}); + + await backend.mouseClick(100, 200, 'right', 1); + + expect(mockTransport.send).toHaveBeenCalledWith( + 'Input.dispatchMouseEvent', + expect.objectContaining({ + button: 'right', + }) + ); + }); + }); + + describe('wheel', () => { + it('should dispatch wheel event with coordinates', async () => { + mockTransport.send.mockResolvedValue({}); + + await backend.wheel(300, 500, 400); + + expect(mockTransport.send).toHaveBeenCalledWith('Input.dispatchMouseEvent', { + type: 'mouseWheel', + x: 500, + y: 400, + deltaX: 0, + deltaY: 300, + }); + }); + + it('should use viewport center when coordinates not provided', async () => { + // First call for refreshPageInfo + mockTransport.send.mockResolvedValueOnce({ + result: { + type: 'object', + value: { + width: 1000, + height: 800, + scrollX: 0, + scrollY: 0, + }, + }, + }); + + // Second call for wheel + mockTransport.send.mockResolvedValueOnce({}); + + await backend.wheel(300); + + expect(mockTransport.send).toHaveBeenLastCalledWith('Input.dispatchMouseEvent', { + type: 'mouseWheel', + x: 500, // width / 2 + y: 400, // height / 2 + deltaX: 0, + deltaY: 300, + }); + }); + }); + + describe('typeText', () => { + it('should dispatch key events for each character', async () => { + mockTransport.send.mockResolvedValue({}); + + await backend.typeText('Hi'); + + // Should have 3 events per character: keyDown, char, keyUp + expect(mockTransport.send).toHaveBeenCalledTimes(6); + + // Check first character 'H' + expect(mockTransport.send).toHaveBeenCalledWith('Input.dispatchKeyEvent', { + type: 'keyDown', + text: 'H', + }); + expect(mockTransport.send).toHaveBeenCalledWith('Input.dispatchKeyEvent', { + type: 'char', + text: 'H', + }); + expect(mockTransport.send).toHaveBeenCalledWith('Input.dispatchKeyEvent', { + type: 'keyUp', + text: 'H', + }); + }); + }); + + describe('waitReadyState', () => { + it('should return immediately when already in target state', async () => { + mockTransport.send.mockResolvedValue({ + result: { + type: 'string', + value: 'complete', + }, + }); + + await backend.waitReadyState('complete', 5000); + + expect(mockTransport.send).toHaveBeenCalledWith('Runtime.evaluate', { + expression: 'document.readyState', + returnByValue: true, + awaitPromise: true, + }); + }); + + it('should accept interactive when waiting for interactive', async () => { + mockTransport.send.mockResolvedValue({ + result: { + type: 'string', + value: 'interactive', + }, + }); + + await backend.waitReadyState('interactive', 5000); + + // Should succeed + }); + + it('should timeout when state not reached', async () => { + mockTransport.send.mockResolvedValue({ + result: { + type: 'string', + value: 'loading', + }, + }); + + await expect(backend.waitReadyState('complete', 200)).rejects.toThrow( + 'Timed out waiting for document.readyState' + ); + }, 10000); + }); + + describe('getUrl', () => { + it('should return current page URL', async () => { + mockTransport.send.mockResolvedValue({ + result: { + type: 'string', + value: 'https://example.com/page', + }, + }); + + const url = await backend.getUrl(); + + expect(url).toBe('https://example.com/page'); + }); + }); +}); diff --git a/tests/backends/protocol.test.ts b/tests/backends/protocol.test.ts new file mode 100644 index 00000000..fbe6e421 --- /dev/null +++ b/tests/backends/protocol.test.ts @@ -0,0 +1,127 @@ +/** + * Tests for backends protocol types + */ + +import { + BrowserBackend, + ViewportInfo, + LayoutMetrics, + MouseButton, + ReadyState, +} from '../../src/backends/protocol'; + +describe('backends/protocol types', () => { + describe('ViewportInfo', () => { + it('should define required viewport properties', () => { + const viewport: ViewportInfo = { + width: 1920, + height: 1080, + scrollX: 0, + scrollY: 100, + }; + + expect(viewport.width).toBe(1920); + expect(viewport.height).toBe(1080); + expect(viewport.scrollX).toBe(0); + expect(viewport.scrollY).toBe(100); + }); + + it('should allow optional content dimensions', () => { + const viewport: ViewportInfo = { + width: 1920, + height: 1080, + scrollX: 0, + scrollY: 0, + contentWidth: 1920, + contentHeight: 5000, + }; + + expect(viewport.contentWidth).toBe(1920); + expect(viewport.contentHeight).toBe(5000); + }); + }); + + describe('LayoutMetrics', () => { + it('should define layout metric properties', () => { + const metrics: LayoutMetrics = { + viewportX: 0, + viewportY: 0, + viewportWidth: 1920, + viewportHeight: 1080, + contentWidth: 1920, + contentHeight: 5000, + deviceScaleFactor: 2.0, + }; + + expect(metrics.viewportWidth).toBe(1920); + expect(metrics.viewportHeight).toBe(1080); + expect(metrics.deviceScaleFactor).toBe(2.0); + }); + }); + + describe('MouseButton type', () => { + it('should accept valid mouse button values', () => { + const left: MouseButton = 'left'; + const right: MouseButton = 'right'; + const middle: MouseButton = 'middle'; + + expect(left).toBe('left'); + expect(right).toBe('right'); + expect(middle).toBe('middle'); + }); + }); + + describe('ReadyState type', () => { + it('should accept valid ready state values', () => { + const interactive: ReadyState = 'interactive'; + const complete: ReadyState = 'complete'; + + expect(interactive).toBe('interactive'); + expect(complete).toBe('complete'); + }); + }); + + describe('BrowserBackend interface', () => { + it('should be implementable with required methods', () => { + // Create a mock implementation to verify interface is correctly defined + const mockBackend: BrowserBackend = { + refreshPageInfo: jest.fn().mockResolvedValue({ + width: 1920, + height: 1080, + scrollX: 0, + scrollY: 0, + }), + eval: jest.fn().mockResolvedValue('result'), + call: jest.fn().mockResolvedValue('call result'), + getLayoutMetrics: jest.fn().mockResolvedValue({ + viewportX: 0, + viewportY: 0, + viewportWidth: 1920, + viewportHeight: 1080, + contentWidth: 1920, + contentHeight: 5000, + deviceScaleFactor: 1.0, + }), + screenshotPng: jest.fn().mockResolvedValue('base64data'), + mouseMove: jest.fn().mockResolvedValue(undefined), + mouseClick: jest.fn().mockResolvedValue(undefined), + wheel: jest.fn().mockResolvedValue(undefined), + typeText: jest.fn().mockResolvedValue(undefined), + waitReadyState: jest.fn().mockResolvedValue(undefined), + getUrl: jest.fn().mockResolvedValue('https://example.com'), + }; + + expect(mockBackend.refreshPageInfo).toBeDefined(); + expect(mockBackend.eval).toBeDefined(); + expect(mockBackend.call).toBeDefined(); + expect(mockBackend.getLayoutMetrics).toBeDefined(); + expect(mockBackend.screenshotPng).toBeDefined(); + expect(mockBackend.mouseMove).toBeDefined(); + expect(mockBackend.mouseClick).toBeDefined(); + expect(mockBackend.wheel).toBeDefined(); + expect(mockBackend.typeText).toBeDefined(); + expect(mockBackend.waitReadyState).toBeDefined(); + expect(mockBackend.getUrl).toBeDefined(); + }); + }); +}); diff --git a/tests/backends/sentience-context.test.ts b/tests/backends/sentience-context.test.ts new file mode 100644 index 00000000..2993660d --- /dev/null +++ b/tests/backends/sentience-context.test.ts @@ -0,0 +1,559 @@ +/** + * Tests for SentienceContext (Token-Slasher Context Middleware). + * + * These tests verify the formatting logic and element selection strategy + * without requiring a real browser or extension. + */ + +import { + SentienceContext, + SentienceContextState, + TopElementSelector, +} from '../../src/backends/sentience-context'; +import type { Element, Snapshot, BBox, VisualCues, Viewport } from '../../src/types'; + +/** + * Helper to create test elements with defaults. + */ +function makeElement(params: { + id: number; + role?: string; + text?: string; + importance?: number; + bbox?: BBox; + visual_cues?: VisualCues; + doc_y?: number; + group_key?: string; + group_index?: number; + in_dominant_group?: boolean; + href?: string; +}): Element { + return { + id: params.id, + role: params.role ?? 'button', + text: params.text ?? null, + importance: params.importance ?? 50, + bbox: params.bbox ?? { x: 0, y: 0, width: 100, height: 30 }, + visual_cues: params.visual_cues ?? { + is_primary: false, + background_color_name: null, + is_clickable: true, + }, + in_viewport: true, + is_occluded: false, + z_index: 1, + doc_y: params.doc_y, + group_key: params.group_key, + group_index: params.group_index, + in_dominant_group: params.in_dominant_group, + href: params.href, + }; +} + +/** + * Helper to create test snapshots. + */ +function makeSnapshot(elements: Element[], dominant_group_key?: string): Snapshot { + return { + status: 'success', + url: 'https://example.com', + viewport: { width: 1920, height: 1080 }, + elements, + dominant_group_key, + }; +} + +describe('SentienceContext', () => { + describe('initialization', () => { + it('should use default values', () => { + const ctx = new SentienceContext(); + + expect(ctx.selector.byImportance).toBe(60); + expect(ctx.selector.fromDominantGroup).toBe(15); + expect(ctx.selector.byPosition).toBe(10); + }); + + it('should use custom values', () => { + const ctx = new SentienceContext({ + sentienceApiKey: 'test-key', + maxElements: 100, + showOverlay: true, + topElementSelector: { + byImportance: 30, + fromDominantGroup: 10, + byPosition: 5, + }, + }); + + expect(ctx.selector.byImportance).toBe(30); + expect(ctx.selector.fromDominantGroup).toBe(10); + expect(ctx.selector.byPosition).toBe(5); + }); + + it('should have correct default selector values', () => { + const ctx = new SentienceContext({ + topElementSelector: {}, + }); + expect(ctx.selector.byImportance).toBe(60); + expect(ctx.selector.fromDominantGroup).toBe(15); + expect(ctx.selector.byPosition).toBe(10); + }); + }); + + describe('_formatSnapshotForLLM', () => { + it('should format basic elements', () => { + const ctx = new SentienceContext({ + topElementSelector: { + byImportance: 10, + fromDominantGroup: 5, + byPosition: 5, + }, + }); + + const elements = [ + makeElement({ id: 1, role: 'button', text: 'Click me', importance: 80 }), + makeElement({ + id: 2, + role: 'link', + text: 'Go home', + importance: 60, + href: 'https://example.com', + }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + expect(lines).toHaveLength(2); + // Check format: ID|role|text|imp|is_primary|docYq|ord|DG|href + const parts = lines[0].split('|'); + expect(parts[0]).toBe('1'); // id + expect(parts[1]).toBe('button'); // role + expect(parts[2]).toBe('Click me'); // text + expect(parts[3]).toBe('80'); // importance + expect(parts[4]).toBe('0'); // is_primary (False) + }); + + it('should set is_primary flag correctly from visual_cues', () => { + const ctx = new SentienceContext({ + topElementSelector: { + byImportance: 10, + fromDominantGroup: 5, + byPosition: 5, + }, + }); + + const elements = [ + makeElement({ + id: 1, + role: 'button', + text: 'Primary CTA', + importance: 90, + visual_cues: { is_primary: true, background_color_name: null, is_clickable: true }, + }), + makeElement({ + id: 2, + role: 'button', + text: 'Secondary', + importance: 70, + visual_cues: { is_primary: false, background_color_name: null, is_clickable: true }, + }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + // First element should have is_primary=1 + const parts1 = lines[0].split('|'); + expect(parts1[4]).toBe('1'); + + // Second element should have is_primary=0 + const parts2 = lines[1].split('|'); + expect(parts2[4]).toBe('0'); + }); + + it('should override role to link when element has href', () => { + const ctx = new SentienceContext(); + + const elements = [ + makeElement({ + id: 1, + role: 'button', + text: 'Button with href', + importance: 80, + href: 'https://example.com', + }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const parts = result.trim().split('|'); + + expect(parts[1]).toBe('link'); + }); + + it('should normalize whitespace in text', () => { + const ctx = new SentienceContext({ topElementSelector: { byImportance: 10 } }); + + const elements = [ + makeElement({ + id: 1, + role: 'button', + text: 'Line1\nLine2\tTabbed Spaces', + importance: 80, + }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const parts = result.trim().split('|'); + + // All whitespace should be normalized to single spaces + expect(parts[2]).toBe('Line1 Line2 Tabbed Spaces'); + }); + + it('should truncate long text to 30 chars', () => { + const ctx = new SentienceContext({ topElementSelector: { byImportance: 10 } }); + + const longText = 'A'.repeat(50); // 50 characters + const elements = [makeElement({ id: 1, role: 'button', text: longText, importance: 80 })]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const parts = result.trim().split('|'); + + // Should be truncated to 27 chars + "..." + expect(parts[2].length).toBe(30); + expect(parts[2].endsWith('...')).toBe(true); + }); + + it('should set DG flag correctly for dominant group elements', () => { + const ctx = new SentienceContext({ + topElementSelector: { byImportance: 10, fromDominantGroup: 5 }, + }); + + const elements = [ + makeElement({ + id: 1, + role: 'link', + text: 'In DG', + importance: 80, + in_dominant_group: true, + }), + makeElement({ + id: 2, + role: 'link', + text: 'Not in DG', + importance: 70, + in_dominant_group: false, + }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + // DG flag is at index 7 (after ord at index 6) + const parts1 = lines[0].split('|'); + expect(parts1[7]).toBe('1'); + + const parts2 = lines[1].split('|'); + expect(parts2[7]).toBe('0'); + }); + + it('should compute rank_in_group locally for dominant group elements', () => { + const ctx = new SentienceContext({ + topElementSelector: { byImportance: 10, fromDominantGroup: 10 }, + }); + + const elements = [ + makeElement({ + id: 1, + role: 'link', + text: 'Third', + importance: 70, + doc_y: 300, + in_dominant_group: true, + }), + makeElement({ + id: 2, + role: 'link', + text: 'First', + importance: 80, + doc_y: 100, + in_dominant_group: true, + }), + makeElement({ + id: 3, + role: 'link', + text: 'Second', + importance: 90, + doc_y: 200, + in_dominant_group: true, + }), + makeElement({ + id: 4, + role: 'button', + text: 'Not in DG', + importance: 95, + doc_y: 50, + in_dominant_group: false, + }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + // Find elements and check ord values + const ordValues: Record = {}; + for (const line of lines) { + const parts = line.split('|'); + const elId = parseInt(parts[0], 10); + const ordVal = parts[6]; + ordValues[elId] = ordVal; + } + + // Element 2 (doc_y=100) should be rank 0 + expect(ordValues[2]).toBe('0'); + // Element 3 (doc_y=200) should be rank 1 + expect(ordValues[3]).toBe('1'); + // Element 1 (doc_y=300) should be rank 2 + expect(ordValues[1]).toBe('2'); + // Element 4 (not in DG) should have "-" + expect(ordValues[4]).toBe('-'); + }); + }); + + describe('_compressHref', () => { + it('should extract domain from full URL', () => { + const ctx = new SentienceContext(); + + // @ts-expect-error - accessing private method for testing + expect(ctx._compressHref('https://github.com/user/repo')).toBe('github'); + // @ts-expect-error - accessing private method for testing + expect(ctx._compressHref('https://www.example.com/page')).toBe('example'); + }); + + it('should extract last segment from relative URL', () => { + const ctx = new SentienceContext(); + + // @ts-expect-error - accessing private method for testing + expect(ctx._compressHref('/api/items/123')).toBe('123'); + // @ts-expect-error - accessing private method for testing + expect(ctx._compressHref('/products/widget')).toBe('widget'); + }); + + it('should return empty string for empty href', () => { + const ctx = new SentienceContext(); + + // @ts-expect-error - accessing private method for testing + expect(ctx._compressHref('')).toBe(''); + // @ts-expect-error - accessing private method for testing + expect(ctx._compressHref(undefined)).toBe(''); + }); + + it('should truncate long domain to 10 chars', () => { + const ctx = new SentienceContext(); + + // @ts-expect-error - accessing private method for testing + const result = ctx._compressHref('https://verylongdomainname.com/page'); + expect(result.length).toBeLessThanOrEqual(10); + }); + }); + + describe('element selection', () => { + it('should select top elements by importance', () => { + const ctx = new SentienceContext({ + topElementSelector: { + byImportance: 2, + fromDominantGroup: 0, + byPosition: 0, + }, + }); + + const elements = [ + makeElement({ id: 1, role: 'button', importance: 50 }), + makeElement({ id: 2, role: 'button', importance: 100 }), + makeElement({ id: 3, role: 'button', importance: 75 }), + makeElement({ id: 4, role: 'button', importance: 25 }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + // Should only have 2 elements (top by importance) + expect(lines).toHaveLength(2); + + // Should be elements 2 and 3 (highest importance) + const ids = lines.map(line => parseInt(line.split('|')[0], 10)); + expect(ids).toContain(2); + expect(ids).toContain(3); + }); + + it('should include elements from dominant group', () => { + const ctx = new SentienceContext({ + topElementSelector: { + byImportance: 1, + fromDominantGroup: 2, + byPosition: 0, + }, + }); + + const elements = [ + makeElement({ id: 1, role: 'button', importance: 100 }), // Top by importance + makeElement({ + id: 2, + role: 'link', + importance: 30, + in_dominant_group: true, + group_index: 0, + }), + makeElement({ + id: 3, + role: 'link', + importance: 20, + in_dominant_group: true, + group_index: 1, + }), + makeElement({ id: 4, role: 'link', importance: 40, in_dominant_group: false }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + // Should have 3 elements: 1 (importance) + 2 (dominant group) + expect(lines).toHaveLength(3); + + const ids = lines.map(line => parseInt(line.split('|')[0], 10)); + expect(ids).toContain(1); // top by importance + expect(ids).toContain(2); // dominant group + expect(ids).toContain(3); // dominant group + expect(ids).not.toContain(4); // not in dominant group + }); + + it('should include top elements by position (lowest doc_y)', () => { + const ctx = new SentienceContext({ + topElementSelector: { + byImportance: 0, + fromDominantGroup: 0, + byPosition: 2, + }, + }); + + const elements = [ + makeElement({ id: 1, role: 'button', importance: 50, doc_y: 500 }), + makeElement({ id: 2, role: 'button', importance: 30, doc_y: 100 }), + makeElement({ id: 3, role: 'button', importance: 40, doc_y: 200 }), + makeElement({ id: 4, role: 'button', importance: 60, doc_y: 800 }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + // Should have 2 elements with lowest doc_y + expect(lines).toHaveLength(2); + + const ids = lines.map(line => parseInt(line.split('|')[0], 10)); + expect(ids).toContain(2); // doc_y=100 + expect(ids).toContain(3); // doc_y=200 + }); + + it('should deduplicate elements selected by multiple criteria', () => { + const ctx = new SentienceContext({ + topElementSelector: { + byImportance: 2, + fromDominantGroup: 2, + byPosition: 2, + }, + }); + + // Element 1 qualifies for all three criteria + const elements = [ + makeElement({ + id: 1, + role: 'button', + importance: 100, + doc_y: 50, + in_dominant_group: true, + group_index: 0, + }), + makeElement({ id: 2, role: 'button', importance: 80, doc_y: 100 }), + makeElement({ + id: 3, + role: 'link', + importance: 30, + doc_y: 200, + in_dominant_group: true, + group_index: 1, + }), + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + // Element 1 should appear only once despite qualifying for all criteria + const ids = lines.map(line => parseInt(line.split('|')[0], 10)); + expect(ids.filter(id => id === 1)).toHaveLength(1); + }); + }); + + describe('interactive role filtering', () => { + it('should only include interactive roles', () => { + const ctx = new SentienceContext({ topElementSelector: { byImportance: 10 } }); + + const elements = [ + makeElement({ id: 1, role: 'button', importance: 80 }), + makeElement({ id: 2, role: 'link', importance: 70 }), + makeElement({ id: 3, role: 'heading', importance: 90 }), // Not interactive + makeElement({ id: 4, role: 'textbox', importance: 60 }), + makeElement({ id: 5, role: 'paragraph', importance: 85 }), // Not interactive + ]; + const snap = makeSnapshot(elements); + + // @ts-expect-error - accessing private method for testing + const result = ctx._formatSnapshotForLLM(snap); + const lines = result.trim().split('\n'); + + const ids = lines.map(line => parseInt(line.split('|')[0], 10)); + expect(ids).toContain(1); // button + expect(ids).toContain(2); // link + expect(ids).not.toContain(3); // heading - not interactive + expect(ids).toContain(4); // textbox + expect(ids).not.toContain(5); // paragraph - not interactive + }); + }); + + describe('SentienceContextState', () => { + it('should have correct structure', () => { + const mockSnap = makeSnapshot([makeElement({ id: 1, role: 'button', importance: 80 })]); + + const state: SentienceContextState = { + url: 'https://test.com', + snapshot: mockSnap, + promptBlock: 'test prompt', + }; + + expect(state.url).toBe('https://test.com'); + expect(state.snapshot).toBe(mockSnap); + expect(state.promptBlock).toBe('test prompt'); + }); + }); +}); diff --git a/tests/backends/snapshot.test.ts b/tests/backends/snapshot.test.ts new file mode 100644 index 00000000..04ad2458 --- /dev/null +++ b/tests/backends/snapshot.test.ts @@ -0,0 +1,249 @@ +/** + * Tests for backend snapshot functionality + */ + +import { + CachedSnapshot, + snapshot, + ExtensionNotLoadedError, + SnapshotError, +} from '../../src/backends/snapshot'; +import { BrowserBackend } from '../../src/backends/protocol'; +import { Snapshot } from '../../src/types'; + +describe('backends/snapshot', () => { + let mockBackend: jest.Mocked; + + const createMockSnapshot = (): Snapshot => ({ + status: 'success', + url: 'https://example.com', + elements: [ + { + id: 1, + role: 'button', + text: 'Click me', + importance: 100, + bbox: { x: 100, y: 100, width: 80, height: 30 }, + visual_cues: { + is_primary: false, + background_color_name: 'blue', + is_clickable: true, + }, + in_viewport: true, + is_occluded: false, + z_index: 1, + }, + ], + viewport: { width: 1920, height: 1080 }, + }); + + beforeEach(() => { + mockBackend = { + refreshPageInfo: jest.fn().mockResolvedValue({ + width: 1920, + height: 1080, + scrollX: 0, + scrollY: 0, + }), + eval: jest.fn(), + call: jest.fn().mockResolvedValue(null), + getLayoutMetrics: jest.fn().mockResolvedValue({ + viewportX: 0, + viewportY: 0, + viewportWidth: 1920, + viewportHeight: 1080, + contentWidth: 1920, + contentHeight: 5000, + deviceScaleFactor: 1.0, + }), + screenshotPng: jest.fn().mockResolvedValue('base64data'), + mouseMove: jest.fn().mockResolvedValue(undefined), + mouseClick: jest.fn().mockResolvedValue(undefined), + wheel: jest.fn().mockResolvedValue(undefined), + typeText: jest.fn().mockResolvedValue(undefined), + waitReadyState: jest.fn().mockResolvedValue(undefined), + getUrl: jest.fn().mockResolvedValue('https://example.com'), + }; + }); + + describe('ExtensionNotLoadedError', () => { + it('should create error with timeout info', () => { + const error = ExtensionNotLoadedError.fromTimeout(5000); + + expect(error.message).toContain('5000ms'); + expect(error.message).toContain('--load-extension'); + expect(error.timeoutMs).toBe(5000); + }); + + it('should include diagnostics in message', () => { + const diagnostics = { + sentienceDefined: false, + sentienceSnapshot: false, + url: 'https://example.com', + }; + + const error = ExtensionNotLoadedError.fromTimeout(5000, diagnostics); + + expect(error.message).toContain('sentienceDefined'); + expect(error.diagnostics).toEqual(diagnostics); + }); + }); + + describe('SnapshotError', () => { + it('should create error for null result', () => { + const error = SnapshotError.fromNullResult('https://example.com'); + + expect(error.message).toContain('returned null'); + expect(error.message).toContain('https://example.com'); + expect(error.url).toBe('https://example.com'); + }); + + it('should work without URL', () => { + const error = SnapshotError.fromNullResult(); + + expect(error.message).toContain('returned null'); + expect(error.url).toBeUndefined(); + }); + }); + + describe('CachedSnapshot', () => { + it('should take fresh snapshot on first call', async () => { + // Mock extension ready check + mockBackend.eval + .mockResolvedValueOnce(true) // Extension ready check + .mockResolvedValueOnce(createMockSnapshot()); // Snapshot call + + const cache = new CachedSnapshot(mockBackend, 2000); + const snap = await cache.get(); + + expect(snap.elements).toHaveLength(1); + expect(cache.isCached).toBe(true); + }); + + it('should return cached snapshot if fresh', async () => { + // Mock extension ready check and snapshot + mockBackend.eval.mockResolvedValueOnce(true).mockResolvedValueOnce(createMockSnapshot()); + + const cache = new CachedSnapshot(mockBackend, 5000); + + const snap1 = await cache.get(); + const snap2 = await cache.get(); + + // eval should only be called twice (once for ready check, once for snapshot) + expect(mockBackend.eval).toHaveBeenCalledTimes(2); + expect(snap1).toBe(snap2); + }); + + it('should take fresh snapshot when cache is stale', async () => { + mockBackend.eval + .mockResolvedValueOnce(true) + .mockResolvedValueOnce(createMockSnapshot()) + .mockResolvedValueOnce(true) + .mockResolvedValueOnce(createMockSnapshot()); + + const cache = new CachedSnapshot(mockBackend, 10); // 10ms max age + + await cache.get(); + + // Wait for cache to become stale + await new Promise(resolve => setTimeout(resolve, 20)); + + await cache.get(); + + // Should have taken two snapshots + expect(mockBackend.eval).toHaveBeenCalledTimes(4); + }); + + it('should take fresh snapshot when invalidated', async () => { + mockBackend.eval + .mockResolvedValueOnce(true) + .mockResolvedValueOnce(createMockSnapshot()) + .mockResolvedValueOnce(true) + .mockResolvedValueOnce(createMockSnapshot()); + + const cache = new CachedSnapshot(mockBackend, 60000); + + await cache.get(); + cache.invalidate(); + await cache.get(); + + // Should have taken two snapshots + expect(mockBackend.eval).toHaveBeenCalledTimes(4); + expect(cache.isCached).toBe(true); + }); + + it('should force refresh when requested', async () => { + mockBackend.eval + .mockResolvedValueOnce(true) + .mockResolvedValueOnce(createMockSnapshot()) + .mockResolvedValueOnce(true) + .mockResolvedValueOnce(createMockSnapshot()); + + const cache = new CachedSnapshot(mockBackend, 60000); + + await cache.get(); + await cache.get(undefined, true); // Force refresh + + expect(mockBackend.eval).toHaveBeenCalledTimes(4); + }); + + it('should report age correctly', async () => { + mockBackend.eval.mockResolvedValueOnce(true).mockResolvedValueOnce(createMockSnapshot()); + + const cache = new CachedSnapshot(mockBackend, 5000); + + // Before any snapshot + expect(cache.ageMs).toBe(Infinity); + + await cache.get(); + + // After snapshot + expect(cache.ageMs).toBeLessThan(100); + }); + }); + + describe('snapshot', () => { + it('should throw ExtensionNotLoadedError when extension not ready', async () => { + // Always return false for extension check + mockBackend.eval.mockResolvedValue(false); + + await expect(snapshot(mockBackend)).rejects.toThrow(ExtensionNotLoadedError); + }, 10000); + + it('should throw SnapshotError when snapshot returns null', async () => { + mockBackend.eval + .mockResolvedValueOnce(true) // Extension ready + .mockResolvedValueOnce(null) // Snapshot returns null + .mockResolvedValueOnce('https://example.com'); // URL for error + + await expect(snapshot(mockBackend)).rejects.toThrow(SnapshotError); + }); + + it('should return snapshot when extension is ready', async () => { + const mockSnap = createMockSnapshot(); + mockBackend.eval + .mockResolvedValueOnce(true) // Extension ready + .mockResolvedValueOnce(mockSnap); // Snapshot result + + const result = await snapshot(mockBackend); + + expect(result.status).toBe('success'); + expect(result.elements).toHaveLength(1); + }); + + it('should pass options to extension', async () => { + const mockSnap = createMockSnapshot(); + mockBackend.eval.mockResolvedValueOnce(true).mockResolvedValueOnce(mockSnap); + + await snapshot(mockBackend, { + limit: 100, + screenshot: true, + }); + + // Second call should include options + const evalCall = mockBackend.eval.mock.calls[1][0]; + expect(evalCall).toContain('limit'); + expect(evalCall).toContain('screenshot'); + }); + }); +});