diff --git a/README.md b/README.md index 53308ba4..bd950a62 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ npx playwright install chromium ``` **For local development:** + ```bash npm install npm run build @@ -44,7 +45,7 @@ const response = await agent.execute( console.log(response); // "I found the top result for wireless mouse on Amazon. It's priced at $24.99..." // Follow-up questions maintain context -const followUp = await agent.chat("Add it to cart"); +const followUp = await agent.chat('Add it to cart'); console.log(followUp); await browser.close(); @@ -144,7 +145,9 @@ await browser.close(); ## 🆕 What's New (2026-01-06) ### Human-like Typing + Add realistic delays between keystrokes to mimic human typing: + ```typescript // Type instantly (default) await typeText(browser, elementId, 'Hello World'); @@ -154,7 +157,9 @@ await typeText(browser, elementId, 'Hello World', false, 10); ``` ### Scroll to Element + Scroll elements into view with smooth animation: + ```typescript const snap = await snapshot(browser); const button = find(snap, 'role=button text~"Submit"'); @@ -181,7 +186,7 @@ import { SentienceAgent, OpenAIProvider, Tracer, - JsonlTraceSink + JsonlTraceSink, } from 'sentienceapi'; import { randomUUID } from 'crypto'; @@ -235,6 +240,38 @@ Traces are **100% compatible** with Python SDK traces - use the same tools to an +
+

🔍 Agent Runtime Verification

+ +`AgentRuntime` provides assertion predicates for runtime verification in agent loops, enabling programmatic verification of browser state during execution. + +```typescript +import { SentienceBrowser } from 'sentienceapi'; +import { AgentRuntime, urlContains, exists, allOf } from 'sentienceapi'; +import { createTracer } from 'sentienceapi'; + +const browser = new SentienceBrowser(); +await browser.start(); +const tracer = await createTracer({ runId: 'my-run', uploadTrace: false }); +const runtime = new AgentRuntime(browser, browser.getPage(), tracer); + +// Navigate and take snapshot +await browser.getPage().goto('https://example.com'); +runtime.beginStep('Verify page'); +await runtime.snapshot(); + +// Run assertions +runtime.assert(urlContains('example.com'), 'on_correct_domain'); +runtime.assert(exists('role=heading'), 'has_heading'); +runtime.assertDone(exists("text~'Example'"), 'task_complete'); + +console.log(`Task done: ${runtime.isTaskDone}`); +``` + +**See example:** [examples/agent-runtime-verification.ts](examples/agent-runtime-verification.ts) + +
+ ---
@@ -261,14 +298,14 @@ async function main() { console.log(`Found ${snap.elements.length} elements`); // Find first product in viewport using spatial filtering - const products = snap.elements - .filter(el => + const products = snap.elements.filter( + el => el.role === 'link' && el.visual_cues.is_clickable && el.in_viewport && !el.is_occluded && - el.bbox.y < 600 // First row - ); + el.bbox.y < 600 // First row + ); if (products.length > 0) { // Sort by position (left to right, top to bottom) @@ -323,12 +360,14 @@ main(); **`snapshot(browser, options?)`** - Capture page state with AI-ranked elements Features: + - Returns semantic elements with roles, text, importance scores, and bounding boxes - Optional screenshot capture (PNG/JPEG) - Optional visual overlay to see what elements are detected - TypeScript types for type safety **Example:** + ```typescript const snap = await snapshot(browser, { screenshot: true, show_overlay: true }); @@ -353,6 +392,7 @@ for (const element of snap.elements) { - Powerful query DSL with multiple operators **Query Examples:** + ```typescript // Find by role and text const button = find(snap, 'role=button text="Sign in"'); @@ -393,12 +433,13 @@ All actions return `ActionResult` with success status, timing, and outcome: const result = await click(browser, element.id); console.log(`Success: ${result.success}`); -console.log(`Outcome: ${result.outcome}`); // "navigated", "dom_updated", "error" +console.log(`Outcome: ${result.outcome}`); // "navigated", "dom_updated", "error" console.log(`Duration: ${result.duration_ms}ms`); console.log(`URL changed: ${result.url_changed}`); ``` **Coordinate-based clicking:** + ```typescript import { clickRect } from './src'; @@ -416,7 +457,7 @@ if (element) { x: element.bbox.x, y: element.bbox.y, w: element.bbox.width, - h: element.bbox.height + h: element.bbox.height, }); } ``` @@ -430,6 +471,7 @@ if (element) { - **`expect(browser, selector)`** - Assertion helper with fluent API **Examples:** + ```typescript // Wait for element (auto-detects optimal interval based on API usage) const result = await waitFor(browser, 'role=button text="Submit"', 10000); @@ -447,9 +489,9 @@ const result = await waitFor(browser, 'role=button', 5000, undefined, true); const result = await waitFor(browser, 'role=button', 5000, 500, false); // Semantic wait conditions -await waitFor(browser, 'clickable=true', 5000); // Wait for clickable element -await waitFor(browser, 'importance>100', 5000); // Wait for important element -await waitFor(browser, 'role=link visible=true', 5000); // Wait for visible link +await waitFor(browser, 'clickable=true', 5000); // Wait for clickable element +await waitFor(browser, 'importance>100', 5000); // Wait for important element +await waitFor(browser, 'role=link visible=true', 5000); // Wait for visible link // Assertions await expect(browser, 'role=button text="Submit"').toExist(5000); @@ -475,7 +517,7 @@ import { showOverlay, clearOverlay } from 'sentienceapi'; const snap = await snapshot(browser); // Show overlay anytime without re-snapshotting -await showOverlay(browser, snap); // Auto-clears after 5 seconds +await showOverlay(browser, snap); // Auto-clears after 5 seconds // Highlight specific target element in red const button = find(snap, 'role=button text~"Submit"'); @@ -487,11 +529,13 @@ await clearOverlay(browser); ``` **Color Coding:** + - 🔴 Red: Target element - 🔵 Blue: Primary elements (`is_primary=true`) - 🟢 Green: Regular interactive elements **Visual Indicators:** + - Border thickness/opacity scales with importance - Semi-transparent fill - Importance badges @@ -504,21 +548,23 @@ await clearOverlay(browser);

📄 Content Reading

**`read(browser, options?)`** - Extract page content + - `format: "text"` - Plain text extraction - `format: "markdown"` - High-quality markdown conversion (uses Turndown) - `format: "raw"` - Cleaned HTML (default) **Example:** + ```typescript import { read } from './src'; // Get markdown content const result = await read(browser, { format: 'markdown' }); -console.log(result.content); // Markdown text +console.log(result.content); // Markdown text // Get plain text const result = await read(browser, { format: 'text' }); -console.log(result.content); // Plain text +console.log(result.content); // Plain text ```
@@ -527,11 +573,13 @@ console.log(result.content); // Plain text

📷 Screenshots

**`screenshot(browser, options?)`** - Standalone screenshot capture + - Returns base64-encoded data URL - PNG or JPEG format - Quality control for JPEG (1-100) **Example:** + ```typescript import { screenshot } from './src'; import { writeFileSync } from 'fs'; @@ -558,6 +606,7 @@ const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 }); Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match. **Example:** + ```typescript import { SentienceBrowser, findTextRect, clickRect } from 'sentienceapi'; @@ -565,8 +614,8 @@ const browser = await SentienceBrowser.create(); await browser.getPage().goto('https://example.com'); // Find "Sign In" button (simple string syntax) -const result = await findTextRect(browser.getPage(), "Sign In"); -if (result.status === "success" && result.results) { +const result = await findTextRect(browser.getPage(), 'Sign In'); +if (result.status === 'success' && result.results) { const firstMatch = result.results[0]; console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`); console.log(`In viewport: ${firstMatch.in_viewport}`); @@ -577,30 +626,31 @@ if (result.status === "success" && result.results) { x: firstMatch.rect.x, y: firstMatch.rect.y, w: firstMatch.rect.width, - h: firstMatch.rect.height + h: firstMatch.rect.height, }); } } ``` **Advanced Options:** + ```typescript // Case-sensitive search const result = await findTextRect(browser.getPage(), { - text: "LOGIN", - caseSensitive: true + text: 'LOGIN', + caseSensitive: true, }); // Whole word only (won't match "login" as part of "loginButton") const result = await findTextRect(browser.getPage(), { - text: "log", - wholeWord: true + text: 'log', + wholeWord: true, }); // Find multiple matches const result = await findTextRect(browser.getPage(), { - text: "Buy", - maxResults: 10 + text: 'Buy', + maxResults: 10, }); for (const match of result.results || []) { if (match.in_viewport) { @@ -611,6 +661,7 @@ for (const match of result.results || []) { ``` **Returns:** Promise with: + - **`status`**: "success" or "error" - **`results`**: Array of `TextMatch` objects with: - `text` - The matched text @@ -620,6 +671,7 @@ for (const match of result.results || []) { - `in_viewport` - Whether visible in current viewport **Use Cases:** + - Find buttons/links by visible text without CSS selectors - Get exact pixel coordinates for click automation - Verify text visibility and position on page @@ -641,15 +693,15 @@ for (const match of result.results || []) { Elements returned by `snapshot()` have the following properties: ```typescript -element.id // Unique identifier for interactions -element.role // ARIA role (button, link, textbox, heading, etc.) -element.text // Visible text content -element.importance // AI importance score (0-1000) -element.bbox // Bounding box (x, y, width, height) -element.visual_cues // Visual analysis (is_primary, is_clickable, background_color) -element.in_viewport // Is element visible in current viewport? -element.is_occluded // Is element covered by other elements? -element.z_index // CSS stacking order +element.id; // Unique identifier for interactions +element.role; // ARIA role (button, link, textbox, heading, etc.) +element.text; // Visible text content +element.importance; // AI importance score (0-1000) +element.bbox; // Bounding box (x, y, width, height) +element.visual_cues; // Visual analysis (is_primary, is_clickable, background_color) +element.in_viewport; // Is element visible in current viewport? +element.is_occluded; // Is element covered by other elements? +element.z_index; // CSS stacking order ``` @@ -659,15 +711,15 @@ element.z_index // CSS stacking order ### Basic Operators -| Operator | Description | Example | -|----------|-------------|---------| -| `=` | Exact match | `role=button` | -| `!=` | Exclusion | `role!=link` | -| `~` | Substring (case-insensitive) | `text~"sign in"` | -| `^=` | Prefix match | `text^="Add"` | -| `$=` | Suffix match | `text$="Cart"` | -| `>`, `>=` | Greater than | `importance>500` | -| `<`, `<=` | Less than | `bbox.y<600` | +| Operator | Description | Example | +| --------- | ---------------------------- | ---------------- | +| `=` | Exact match | `role=button` | +| `!=` | Exclusion | `role!=link` | +| `~` | Substring (case-insensitive) | `text~"sign in"` | +| `^=` | Prefix match | `text^="Add"` | +| `$=` | Suffix match | `text$="Cart"` | +| `>`, `>=` | Greater than | `importance>500` | +| `<`, `<=` | Less than | `bbox.y<600` | ### Supported Fields @@ -712,7 +764,7 @@ const browser = new SentienceBrowser(undefined, undefined, false); const browser = new SentienceBrowser(undefined, undefined, true); // Auto-detect based on environment (default) -const browser = new SentienceBrowser(); // headless=true if CI=true, else false +const browser = new SentienceBrowser(); // headless=true if CI=true, else false ``` @@ -723,6 +775,7 @@ const browser = new SentienceBrowser(); // headless=true if CI=true, else false For users running from datacenters (AWS, DigitalOcean, etc.), you can configure a residential proxy to prevent IP-based detection by Cloudflare, Akamai, and other anti-bot services. **Supported Formats:** + - HTTP: `http://username:password@host:port` - HTTPS: `https://username:password@host:port` - SOCKS5: `socks5://username:password@host:port` @@ -787,9 +840,9 @@ await saveStorageState(browser.getContext(), 'auth.json'); const browser2 = new SentienceBrowser( undefined, // apiKey undefined, // apiUrl - false, // headless - undefined, // proxy - undefined, // userDataDir + false, // headless + undefined, // proxy + undefined, // userDataDir 'auth.json' // storageState - inject saved session ); await browser2.start(); @@ -797,12 +850,12 @@ await browser2.start(); // Workflow 2: Persistent sessions (cookies persist across runs) const browser3 = new SentienceBrowser( - undefined, // apiKey - undefined, // apiUrl - false, // headless - undefined, // proxy + undefined, // apiKey + undefined, // apiUrl + false, // headless + undefined, // proxy './chrome_profile', // userDataDir - persist cookies - undefined // storageState + undefined // storageState ); await browser3.start(); // First run: Log in @@ -810,6 +863,7 @@ await browser3.start(); ``` **Benefits:** + - Bypass login screens and CAPTCHAs with valid sessions - Save 5-10 agent steps and hundreds of tokens per run - Maintain stateful sessions for accessing authenticated pages @@ -827,13 +881,15 @@ See `examples/auth-injection-agent.ts` for complete examples. Click to expand best practices ### 1. Wait for Dynamic Content + ```typescript await browser.goto('https://example.com'); await browser.getPage().waitForLoadState('networkidle'); -await new Promise(resolve => setTimeout(resolve, 1000)); // Extra buffer +await new Promise(resolve => setTimeout(resolve, 1000)); // Extra buffer ``` ### 2. Use Multiple Strategies for Finding Elements + ```typescript // Try exact match first let btn = find(snap, 'role=button text="Add to Cart"'); @@ -845,6 +901,7 @@ if (!btn) { ``` ### 3. Check Element Visibility Before Clicking + ```typescript if (element.in_viewport && !element.is_occluded) { await click(browser, element.id); @@ -852,6 +909,7 @@ if (element.in_viewport && !element.is_occluded) { ``` ### 4. Handle Navigation + ```typescript const result = await click(browser, linkId); if (result.url_changed) { @@ -860,6 +918,7 @@ if (result.url_changed) { ``` ### 5. Use Screenshots Sparingly + ```typescript // Fast - no screenshot (only element data) const snap = await snapshot(browser); @@ -869,6 +928,7 @@ const snap = await snapshot(browser, { screenshot: true }); ``` ### 6. Always Close Browser + ```typescript const browser = new SentienceBrowser(); @@ -876,7 +936,7 @@ try { await browser.start(); // ... your automation code } finally { - await browser.close(); // Always clean up + await browser.close(); // Always clean up } ``` @@ -890,14 +950,18 @@ try { Click to expand common issues and solutions ### "Extension failed to load" + **Solution:** Build the extension first: + ```bash cd sentience-chrome ./build.sh ``` ### "Cannot use import statement outside a module" + **Solution:** Don't use `node` directly. Use `ts-node` or npm scripts: + ```bash npx ts-node examples/hello.ts # or @@ -905,13 +969,17 @@ npm run example:hello ``` ### "Element not found" + **Solutions:** + - Ensure page is loaded: `await browser.getPage().waitForLoadState('networkidle')` - Use `waitFor()`: `await waitFor(browser, 'role=button', 10000)` - Debug elements: `console.log(snap.elements.map(el => el.text))` ### Button not clickable + **Solutions:** + - Check visibility: `element.in_viewport && !element.is_occluded` - Scroll to element: ``await browser.getPage().evaluate(`window.sentience_registry[${element.id}].scrollIntoView()`)`` @@ -948,6 +1016,7 @@ npm run example:hello **⚠️ Important**: You cannot use `node` directly to run TypeScript files. Use one of these methods: ### Option 1: Using npm scripts (recommended) + ```bash npm run example:hello npm run example:basic @@ -956,6 +1025,7 @@ npm run example:wait ``` ### Option 2: Using ts-node directly + ```bash npx ts-node examples/hello.ts # or if ts-node is installed globally: @@ -963,6 +1033,7 @@ ts-node examples/hello.ts ``` ### Option 3: Compile then run + ```bash npm run build # Then use compiled JavaScript from dist/ @@ -1001,7 +1072,7 @@ npm test -- snapshot.test.ts This project is licensed under either of: -* Apache License, Version 2.0, ([LICENSE-APACHE](./LICENSE-APACHE)) -* MIT license ([LICENSE-MIT](./LICENSE-MIT)) +- Apache License, Version 2.0, ([LICENSE-APACHE](./LICENSE-APACHE)) +- MIT license ([LICENSE-MIT](./LICENSE-MIT)) at your option. diff --git a/examples/agent-runtime-verification.ts b/examples/agent-runtime-verification.ts new file mode 100644 index 00000000..4d13e7b6 --- /dev/null +++ b/examples/agent-runtime-verification.ts @@ -0,0 +1,155 @@ +/** + * Example: Agent Runtime with Verification Loop + * + * Demonstrates how to use AgentRuntime for runtime verification in agent loops. + * The AgentRuntime provides assertion predicates to verify browser state during execution. + * + * Key features: + * - Predicate helpers: urlMatches, urlContains, exists, notExists, elementCount + * - Combinators: allOf, anyOf for complex conditions + * - Task completion: assertDone() for goal verification + * - Trace integration: Assertions emitted to trace for Studio timeline + * + * Requirements: + * - SENTIENCE_API_KEY (Pro or Enterprise tier) + * + * Usage: + * ts-node examples/agent-runtime-verification.ts + * or + * npm run example:agent-runtime + */ + +import { Page } from 'playwright'; +import { SentienceBrowser } from '../src/browser'; +import { Snapshot } from '../src/types'; +import { + AgentRuntime, + urlContains, + urlMatches, + exists, + notExists, + allOf, +} from '../src'; +import { createTracer } from '../src/tracing/tracer-factory'; + +/** + * Adapter to make SentienceBrowser compatible with AgentRuntime's BrowserLike interface. + * AgentRuntime expects snapshot(page, options) but SentienceBrowser has snapshot(options). + */ +function createBrowserAdapter(browser: SentienceBrowser) { + return { + snapshot: async (_page: Page, options?: Record): Promise => { + return await browser.snapshot(options); + }, + }; +} + +async function main() { + // Get API key from environment + const sentienceKey = process.env.SENTIENCE_API_KEY; + + if (!sentienceKey) { + console.error('Error: SENTIENCE_API_KEY not set'); + process.exit(1); + } + + console.log('Starting Agent Runtime Verification Demo\n'); + + // 1. Create tracer for verification event emission + const runId = 'verification-demo'; + const tracer = await createTracer({ + apiKey: sentienceKey, + runId: runId, + uploadTrace: false, + }); + console.log(`Run ID: ${runId}\n`); + + // 2. Create and start browser + const browser = new SentienceBrowser(sentienceKey, undefined, false); + await browser.start(); + const page = browser.getPage(); + + try { + // 3. Create AgentRuntime with browser adapter, page, and tracer + const browserAdapter = createBrowserAdapter(browser); + const runtime = new AgentRuntime(browserAdapter, page, tracer); + + // 4. Navigate to a page + console.log('Navigating to example.com...\n'); + await page.goto('https://example.com'); + await page.waitForLoadState('networkidle'); + + // Wait for extension to inject + try { + await page.waitForFunction( + () => typeof (window as any).sentience !== 'undefined', + { timeout: 10000 } + ); + } catch { + console.warn('Extension not ready, continuing anyway...'); + } + + // 5. Begin a verification step + runtime.beginStep('Verify page loaded correctly'); + + // 6. Take a snapshot (required for element assertions) + const snapshot = await runtime.snapshot(); + console.log(`Snapshot taken: ${snapshot.elements.length} elements found\n`); + + // 7. Run assertions against current state + console.log('Running assertions:\n'); + + // URL assertions + const urlOk = runtime.assert(urlContains('example.com'), 'on_example_domain'); + console.log(` [${urlOk ? 'PASS' : 'FAIL'}] on_example_domain`); + + const urlMatch = runtime.assert(urlMatches(/https:\/\/.*example\.com/), 'url_is_https'); + console.log(` [${urlMatch ? 'PASS' : 'FAIL'}] url_is_https`); + + // Element assertions + const hasHeading = runtime.assert(exists('role=heading'), 'has_heading'); + console.log(` [${hasHeading ? 'PASS' : 'FAIL'}] has_heading`); + + const noError = runtime.assert(notExists("text~'Error'"), 'no_error_message'); + console.log(` [${noError ? 'PASS' : 'FAIL'}] no_error_message`); + + // Combined assertion with allOf + const pageReady = runtime.assert( + allOf(urlContains('example'), exists('role=link')), + 'page_fully_ready' + ); + console.log(` [${pageReady ? 'PASS' : 'FAIL'}] page_fully_ready`); + + // 8. Check if task is done (required assertion) + const taskComplete = runtime.assertDone( + exists("text~'Example Domain'"), + 'reached_example_page' + ); + console.log(`\n [${taskComplete ? 'DONE' : 'NOT DONE'}] reached_example_page`); + + // 9. Get accumulated assertions for step_end event + const assertionsData = runtime.getAssertionsForStepEnd(); + console.log(`\nTotal assertions: ${assertionsData.assertions.length}`); + console.log(`Task done: ${assertionsData.task_done ?? false}`); + + // 10. Check overall status + console.log('\nVerification Summary:'); + console.log(` All passed: ${runtime.allAssertionsPassed()}`); + console.log(` Required passed: ${runtime.requiredAssertionsPassed()}`); + console.log(` Task complete: ${runtime.isTaskDone}`); + + } catch (error: any) { + console.error(`\nError during execution: ${error.message}`); + throw error; + } finally { + // Close tracer and browser + console.log('\nClosing tracer...'); + await tracer.close(true); + console.log(`Trace saved to: ~/.sentience/traces/${runId}.jsonl`); + + await browser.close(); + console.log('Done!'); + } +} + +main().catch(console.error); diff --git a/package-lock.json b/package-lock.json index 78694626..4aa0fb61 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "sentienceapi", - "version": "0.92.2", + "version": "0.92.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "sentienceapi", - "version": "0.92.2", + "version": "0.92.3", "license": "(MIT OR Apache-2.0)", "dependencies": { "playwright": "^1.40.0", diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts new file mode 100644 index 00000000..845ca35a --- /dev/null +++ b/src/agent-runtime.ts @@ -0,0 +1,320 @@ +/** + * Agent runtime for verification loop support. + * + * This module provides a thin runtime wrapper that combines: + * 1. Browser session management + * 2. Snapshot/query helpers + * 3. Tracer for event emission + * 4. Assertion/verification methods + * + * The AgentRuntime is designed to be used in agent verification loops where + * you need to repeatedly take snapshots, execute actions, and verify results. + * + * @example + * ```typescript + * import { SentienceBrowser } from './browser'; + * import { AgentRuntime } from './agent-runtime'; + * import { urlMatches, exists } from './verification'; + * import { Tracer, JsonlTraceSink } from './tracing'; + * + * const browser = await SentienceBrowser.create(); + * const page = await browser.newPage(); + * await page.goto("https://example.com"); + * + * const sink = new JsonlTraceSink("trace.jsonl"); + * const tracer = new Tracer("test-run", sink); + * + * const runtime = new AgentRuntime(browser, page, tracer); + * + * // Take snapshot and run assertions + * await runtime.snapshot(); + * runtime.assert(urlMatches(/example\.com/), "on_homepage"); + * runtime.assert(exists("role=button"), "has_buttons"); + * + * // Check if task is done + * if (runtime.assertDone(exists("text~'Success'"), "task_complete")) { + * console.log("Task completed!"); + * } + * ``` + */ + +import { Page } from 'playwright'; +import { v4 as uuidv4 } from 'uuid'; +import { Snapshot } from './types'; +import { AssertContext, Predicate } from './verification'; +import { Tracer } from './tracing/tracer'; + +// Define a minimal browser interface to avoid circular dependencies +interface BrowserLike { + snapshot(page: Page, options?: Record): Promise; +} + +/** + * Assertion record for accumulation and step_end emission. + */ +export interface AssertionRecord { + label: string; + passed: boolean; + required: boolean; + reason: string; + details: Record; +} + +/** + * Runtime wrapper for agent verification loops. + * + * Provides ergonomic methods for: + * - snapshot(): Take page snapshot + * - assert(): Evaluate assertion predicates + * - assertDone(): Assert task completion (required assertion) + * + * The runtime manages assertion state per step and emits verification events + * to the tracer for Studio timeline display. + */ +export class AgentRuntime { + /** Browser instance for taking snapshots */ + readonly browser: BrowserLike; + /** Playwright Page for browser interaction */ + readonly page: Page; + /** Tracer for event emission */ + readonly tracer: Tracer; + + /** Current step identifier */ + stepId: string | null = null; + /** Current step index (0-based) */ + stepIndex: number = 0; + /** Most recent snapshot (for assertion context) */ + lastSnapshot: Snapshot | null = null; + + /** Assertions accumulated during current step */ + private assertionsThisStep: AssertionRecord[] = []; + /** Task completion tracking */ + private taskDone: boolean = false; + private taskDoneLabel: string | null = null; + + /** + * Create a new AgentRuntime. + * + * @param browser - Browser instance for taking snapshots + * @param page - Playwright Page for browser interaction + * @param tracer - Tracer for emitting verification events + */ + constructor(browser: BrowserLike, page: Page, tracer: Tracer) { + this.browser = browser; + this.page = page; + this.tracer = tracer; + } + + /** + * Build assertion context from current state. + */ + private ctx(): AssertContext { + let url: string | null = null; + if (this.lastSnapshot) { + url = this.lastSnapshot.url; + } else if (this.page) { + url = this.page.url(); + } + + return { + snapshot: this.lastSnapshot, + url, + stepId: this.stepId, + }; + } + + /** + * Take a snapshot of the current page state. + * + * This updates lastSnapshot which is used as context for assertions. + * + * @param options - Options passed through to browser.snapshot() + * @returns Snapshot of current page state + */ + async snapshot(options?: Record): Promise { + this.lastSnapshot = await this.browser.snapshot(this.page, options); + return this.lastSnapshot; + } + + /** + * Begin a new step in the verification loop. + * + * This: + * - Generates a new stepId + * - Clears assertions from previous step + * - Increments stepIndex (or uses provided value) + * + * @param goal - Description of what this step aims to achieve + * @param stepIndex - Optional explicit step index (otherwise auto-increments) + * @returns Generated stepId + */ + beginStep(goal: string, stepIndex?: number): string { + // Clear previous step state + this.assertionsThisStep = []; + + // Generate new stepId + this.stepId = uuidv4(); + + // Update step index + if (stepIndex !== undefined) { + this.stepIndex = stepIndex; + } else { + this.stepIndex += 1; + } + + return this.stepId; + } + + /** + * Evaluate an assertion against current snapshot state. + * + * The assertion result is: + * 1. Accumulated for inclusion in step_end.data.verify.signals.assertions + * 2. Emitted as a dedicated 'verification' event for Studio timeline + * + * @param predicate - Predicate function to evaluate + * @param label - Human-readable label for this assertion + * @param required - If true, this assertion gates step success (default: false) + * @returns True if assertion passed, false otherwise + */ + assert(predicate: Predicate, label: string, required: boolean = false): boolean { + const outcome = predicate(this.ctx()); + + const record: AssertionRecord = { + label, + passed: outcome.passed, + required, + reason: outcome.reason, + details: outcome.details, + }; + this.assertionsThisStep.push(record); + + // Emit dedicated verification event (Option B from design doc) + // This makes assertions visible in Studio timeline + this.tracer.emit( + 'verification', + { + kind: 'assert', + passed: outcome.passed, + label, + required, + reason: outcome.reason, + details: outcome.details, + }, + this.stepId || undefined + ); + + return outcome.passed; + } + + /** + * Assert task completion (required assertion). + * + * This is a convenience wrapper for assert() with required=true. + * When the assertion passes, it marks the task as done. + * + * Use this for final verification that the agent's goal is complete. + * + * @param predicate - Predicate function to evaluate + * @param label - Human-readable label for this assertion + * @returns True if task is complete (assertion passed), false otherwise + */ + assertDone(predicate: Predicate, label: string): boolean { + const ok = this.assert(predicate, label, true); + + if (ok) { + this.taskDone = true; + this.taskDoneLabel = label; + + // Emit task_done verification event + this.tracer.emit( + 'verification', + { + kind: 'task_done', + passed: true, + label, + }, + this.stepId || undefined + ); + } + + return ok; + } + + /** + * Get assertions data for inclusion in step_end.data.verify.signals. + * + * This is called when building the step_end event to include + * assertion results in the trace. + * + * @returns Object with 'assertions', 'task_done', 'task_done_label' keys + */ + getAssertionsForStepEnd(): { + assertions: AssertionRecord[]; + task_done?: boolean; + task_done_label?: string; + } { + const result: { + assertions: AssertionRecord[]; + task_done?: boolean; + task_done_label?: string; + } = { + assertions: [...this.assertionsThisStep], + }; + + if (this.taskDone) { + result.task_done = true; + result.task_done_label = this.taskDoneLabel || undefined; + } + + return result; + } + + /** + * Get and clear assertions for current step. + * + * Call this at step end to get accumulated assertions + * for the step_end event, then clear for next step. + * + * @returns List of assertion records from this step + */ + flushAssertions(): AssertionRecord[] { + const assertions = [...this.assertionsThisStep]; + this.assertionsThisStep = []; + return assertions; + } + + /** + * Check if task has been marked as done via assertDone(). + */ + get isTaskDone(): boolean { + return this.taskDone; + } + + /** + * Reset task_done state (for multi-task runs). + */ + resetTaskDone(): void { + this.taskDone = false; + this.taskDoneLabel = null; + } + + /** + * Check if all assertions in current step passed. + * + * @returns True if all assertions passed (or no assertions made) + */ + allAssertionsPassed(): boolean { + return this.assertionsThisStep.every(a => a.passed); + } + + /** + * Check if all required assertions in current step passed. + * + * @returns True if all required assertions passed (or no required assertions) + */ + requiredAssertionsPassed(): boolean { + const required = this.assertionsThisStep.filter(a => a.required); + return required.every(a => a.passed); + } +} diff --git a/src/canonicalization.ts b/src/canonicalization.ts new file mode 100644 index 00000000..8335a139 --- /dev/null +++ b/src/canonicalization.ts @@ -0,0 +1,208 @@ +/** + * Shared canonicalization utilities for snapshot comparison and indexing. + * + * This module provides consistent normalization functions used by both: + * - tracing/indexer.ts (for computing stable digests) + * - snapshot-diff.ts (for computing diff_status labels) + * + * By sharing these helpers, we ensure consistent behavior: + * - Same text normalization (whitespace, case, length) + * - Same bbox rounding (2px precision) + * - Same change detection thresholds + */ + +export interface BBox { + x: number; + y: number; + width: number; + height: number; +} + +export interface VisualCues { + is_primary?: boolean; + is_clickable?: boolean; +} + +export interface ElementData { + id?: number; + role?: string; + text?: string | null; + bbox?: BBox; + visual_cues?: VisualCues; + is_primary?: boolean; + is_clickable?: boolean; +} + +export interface CanonicalElement { + id: number | undefined; + role: string; + text_norm: string; + bbox: BBox; + is_primary: boolean; + is_clickable: boolean; +} + +/** + * Normalize text for canonical comparison. + * + * Transforms: + * - Trims leading/trailing whitespace + * - Collapses internal whitespace to single spaces + * - Lowercases + * - Caps length + * + * @param text - Input text (may be undefined/null) + * @param maxLen - Maximum length to retain (default: 80) + * @returns Normalized text string (empty string if input is falsy) + * + * @example + * normalizeText(" Hello World ") // "hello world" + * normalizeText(undefined) // "" + */ +export function normalizeText(text: string | undefined | null, maxLen: number = 80): string { + if (!text) return ''; + + // Trim and collapse whitespace + let normalized = text.split(/\s+/).join(' ').trim(); + + // Lowercase + normalized = normalized.toLowerCase(); + + // Cap length + if (normalized.length > maxLen) { + normalized = normalized.substring(0, maxLen); + } + + return normalized; +} + +/** + * Round bbox coordinates to reduce noise. + * + * Snaps coordinates to grid of `precision` pixels to ignore + * sub-pixel rendering differences. + * + * @param bbox - Bounding box with x, y, width, height + * @param precision - Grid size in pixels (default: 2) + * @returns Rounded bbox with integer coordinates + * + * @example + * roundBBox({x: 101, y: 203, width: 50, height: 25}) + * // {x: 100, y: 202, width: 50, height: 24} + */ +export function roundBBox(bbox: Partial, precision: number = 2): BBox { + return { + x: Math.round((bbox.x || 0) / precision) * precision, + y: Math.round((bbox.y || 0) / precision) * precision, + width: Math.round((bbox.width || 0) / precision) * precision, + height: Math.round((bbox.height || 0) / precision) * precision, + }; +} + +/** + * Check if two bboxes are equal within a threshold. + * + * @param bbox1 - First bounding box + * @param bbox2 - Second bounding box + * @param threshold - Maximum allowed difference in pixels (default: 5.0) + * @returns True if all bbox properties differ by less than threshold + */ +export function bboxEqual( + bbox1: Partial, + bbox2: Partial, + threshold: number = 5.0 +): boolean { + return ( + Math.abs((bbox1.x || 0) - (bbox2.x || 0)) <= threshold && + Math.abs((bbox1.y || 0) - (bbox2.y || 0)) <= threshold && + Math.abs((bbox1.width || 0) - (bbox2.width || 0)) <= threshold && + Math.abs((bbox1.height || 0) - (bbox2.height || 0)) <= threshold + ); +} + +/** + * Check if two bboxes differ beyond the threshold. + * + * This is the inverse of bboxEqual, provided for semantic clarity + * in diff detection code. + * + * @param bbox1 - First bounding box + * @param bbox2 - Second bounding box + * @param threshold - Maximum allowed difference in pixels (default: 5.0) + * @returns True if any bbox property differs by more than threshold + */ +export function bboxChanged( + bbox1: Partial, + bbox2: Partial, + threshold: number = 5.0 +): boolean { + return !bboxEqual(bbox1, bbox2, threshold); +} + +/** + * Create canonical representation of an element for comparison/hashing. + * + * Extracts and normalizes the fields that matter for identity: + * - id, role, normalized text, rounded bbox + * - is_primary, is_clickable from visual_cues + * + * @param elem - Raw element object + * @returns Canonical element object with normalized fields + */ +export function canonicalizeElement(elem: ElementData): CanonicalElement { + // Extract is_primary and is_clickable from visual_cues if present + const visualCues = elem.visual_cues || {}; + const isPrimary = + typeof visualCues === 'object' && visualCues !== null + ? visualCues.is_primary || false + : elem.is_primary || false; + const isClickable = + typeof visualCues === 'object' && visualCues !== null + ? visualCues.is_clickable || false + : elem.is_clickable || false; + + return { + id: elem.id, + role: elem.role || '', + text_norm: normalizeText(elem.text), + bbox: roundBBox(elem.bbox || { x: 0, y: 0, width: 0, height: 0 }), + is_primary: isPrimary, + is_clickable: isClickable, + }; +} + +/** + * Check if two elements have equal content (ignoring position). + * + * Compares normalized text, role, and visual cues. + * + * @param elem1 - First element (raw or canonical) + * @param elem2 - Second element (raw or canonical) + * @returns True if content is equal after normalization + */ +export function contentEqual(elem1: ElementData, elem2: ElementData): boolean { + // Normalize both elements + const c1 = canonicalizeElement(elem1); + const c2 = canonicalizeElement(elem2); + + return ( + c1.role === c2.role && + c1.text_norm === c2.text_norm && + c1.is_primary === c2.is_primary && + c1.is_clickable === c2.is_clickable + ); +} + +/** + * Check if two elements have different content (ignoring position). + * + * This is the inverse of contentEqual, provided for semantic clarity + * in diff detection code. + * + * @param elem1 - First element + * @param elem2 - Second element + * @returns True if content differs after normalization + */ +export function contentChanged(elem1: ElementData, elem2: ElementData): boolean { + return !contentEqual(elem1, elem2); +} diff --git a/src/index.ts b/src/index.ts index 17a41f28..9a6edd84 100644 --- a/src/index.ts +++ b/src/index.ts @@ -42,3 +42,19 @@ export { // Tracing Layer (v0.3.1+) export { Tracer, TraceSink, JsonlTraceSink, TraceEvent, TraceEventData } from './tracing'; + +// Verification Layer (agent assertion loop) +export { + AssertOutcome, + AssertContext, + Predicate, + urlMatches, + urlContains, + exists, + notExists, + elementCount, + allOf, + anyOf, + custom, +} from './verification'; +export { AgentRuntime, AssertionRecord } from './agent-runtime'; diff --git a/src/snapshot-diff.ts b/src/snapshot-diff.ts index 793655a0..0f8d4c27 100644 --- a/src/snapshot-diff.ts +++ b/src/snapshot-diff.ts @@ -1,57 +1,43 @@ /** * Snapshot comparison utilities for diff_status detection. * Implements change detection logic for the Diff Overlay feature. + * + * Uses shared canonicalization helpers from canonicalization.ts to ensure + * consistent comparison behavior with tracing/indexer.ts. */ +import { bboxChanged, contentChanged, ElementData } from './canonicalization'; import { Element, Snapshot } from './types'; -export class SnapshotDiff { - /** - * Check if element's bounding box has changed significantly. - * @param el1 - First element - * @param el2 - Second element - * @param threshold - Position change threshold in pixels (default: 5.0) - * @returns True if position or size changed beyond threshold - */ - private static hasBboxChanged(el1: Element, el2: Element, threshold: number = 5.0): boolean { - return ( - Math.abs(el1.bbox.x - el2.bbox.x) > threshold || - Math.abs(el1.bbox.y - el2.bbox.y) > threshold || - Math.abs(el1.bbox.width - el2.bbox.width) > threshold || - Math.abs(el1.bbox.height - el2.bbox.height) > threshold - ); - } - - /** - * Check if element's content has changed. - * @param el1 - First element - * @param el2 - Second element - * @returns True if text, role, or visual properties changed - */ - private static hasContentChanged(el1: Element, el2: Element): boolean { - // Compare text content - if (el1.text !== el2.text) { - return true; - } - - // Compare role - if (el1.role !== el2.role) { - return true; - } - - // Compare visual cues - if (el1.visual_cues.is_primary !== el2.visual_cues.is_primary) { - return true; - } - if (el1.visual_cues.is_clickable !== el2.visual_cues.is_clickable) { - return true; - } - - return false; - } +/** + * Convert Element to ElementData for canonicalization helpers. + */ +function elementToData(el: Element): ElementData { + return { + id: el.id, + role: el.role, + text: el.text, + bbox: { + x: el.bbox.x, + y: el.bbox.y, + width: el.bbox.width, + height: el.bbox.height, + }, + visual_cues: { + is_primary: el.visual_cues.is_primary, + is_clickable: el.visual_cues.is_clickable, + }, + }; +} +export class SnapshotDiff { /** * Compare current snapshot with previous and set diff_status on elements. + * + * Uses canonicalized comparisons: + * - Text is normalized (trimmed, collapsed whitespace, lowercased) + * - Bbox is rounded to 2px grid to ignore sub-pixel differences + * * @param current - Current snapshot * @param previous - Previous snapshot (undefined if this is the first snapshot) * @returns List of elements with diff_status set (includes REMOVED elements from previous) @@ -83,25 +69,29 @@ export class SnapshotDiff { diff_status: 'ADDED', }); } else { - // Element existed before - check for changes + // Element existed before - check for changes using canonicalized comparisons const prevEl = previousById.get(el.id)!; - const bboxChanged = SnapshotDiff.hasBboxChanged(el, prevEl); - const contentChanged = SnapshotDiff.hasContentChanged(el, prevEl); + // Convert to ElementData for canonicalization helpers + const elData = elementToData(el); + const prevElData = elementToData(prevEl); + + const hasBboxChanged = bboxChanged(elData.bbox!, prevElData.bbox!); + const hasContentChanged = contentChanged(elData, prevElData); - if (bboxChanged && contentChanged) { + if (hasBboxChanged && hasContentChanged) { // Both position and content changed - mark as MODIFIED result.push({ ...el, diff_status: 'MODIFIED', }); - } else if (bboxChanged) { + } else if (hasBboxChanged) { // Only position changed - mark as MOVED result.push({ ...el, diff_status: 'MOVED', }); - } else if (contentChanged) { + } else if (hasContentChanged) { // Only content changed - mark as MODIFIED result.push({ ...el, diff --git a/src/tracing/indexer.ts b/src/tracing/indexer.ts index 2c38e358..fdc8e0ab 100644 --- a/src/tracing/indexer.ts +++ b/src/tracing/indexer.ts @@ -5,6 +5,7 @@ import * as fs from 'fs'; import * as crypto from 'crypto'; import * as path from 'path'; +import { canonicalizeElement } from '../canonicalization'; import { TraceIndex, StepIndex, @@ -16,38 +17,6 @@ import { StepStatus, } from './index-schema'; -/** - * Normalize text for digest: trim, collapse whitespace, lowercase, cap length - */ -function normalizeText(text: string | undefined, maxLen: number = 80): string { - if (!text) return ''; - - // Trim and collapse whitespace - let normalized = text.split(/\s+/).join(' ').trim(); - - // Lowercase - normalized = normalized.toLowerCase(); - - // Cap length - if (normalized.length > maxLen) { - normalized = normalized.substring(0, maxLen); - } - - return normalized; -} - -/** - * Round bbox coordinates to reduce noise (default: 2px precision) - */ -function roundBBox(bbox: any, precision: number = 2): any { - return { - x: Math.round((bbox.x || 0) / precision) * precision, - y: Math.round((bbox.y || 0) / precision) * precision, - width: Math.round((bbox.width || 0) / precision) * precision, - height: Math.round((bbox.height || 0) / precision) * precision, - }; -} - /** * Compute stable digest of snapshot for diffing */ @@ -56,28 +25,8 @@ function computeSnapshotDigest(snapshotData: any): string { const viewport = snapshotData.viewport || {}; const elements = snapshotData.elements || []; - // Canonicalize elements - const canonicalElements = elements.map((elem: any) => { - // Extract is_primary and is_clickable from visual_cues if present - const visualCues = elem.visual_cues || {}; - const isPrimary = - typeof visualCues === 'object' && visualCues !== null - ? visualCues.is_primary || false - : elem.is_primary || false; - const isClickable = - typeof visualCues === 'object' && visualCues !== null - ? visualCues.is_clickable || false - : elem.is_clickable || false; - - return { - id: elem.id, - role: elem.role || '', - text_norm: normalizeText(elem.text), - bbox: roundBBox(elem.bbox || { x: 0, y: 0, width: 0, height: 0 }), - is_primary: isPrimary, - is_clickable: isClickable, - }; - }); + // Canonicalize elements using shared helper + const canonicalElements = elements.map((elem: any) => canonicalizeElement(elem)); // Sort by element id for determinism canonicalElements.sort((a: { id?: number }, b: { id?: number }) => (a.id || 0) - (b.id || 0)); diff --git a/src/tracing/types.ts b/src/tracing/types.ts index 0847cbb3..7dcac36b 100644 --- a/src/tracing/types.ts +++ b/src/tracing/types.ts @@ -95,6 +95,17 @@ export interface ElementFound { bounding_box: { x: number; y: number; width: number; height: number }; } +/** + * Assertion result for verification events + */ +export interface AssertionResult { + label: string; + passed: boolean; + required?: boolean; + reason?: string; + details?: Record; +} + /** * Verify signals for step_end events */ @@ -102,6 +113,10 @@ export interface VerifySignals { url_changed?: boolean; error?: string; elements_found?: ElementFound[]; + // Assertion results from agent verification loop + assertions?: AssertionResult[]; + task_done?: boolean; + task_done_label?: string; } /** @@ -163,6 +178,14 @@ export interface TraceEventData { exec?: ExecutionData; post?: SnapshotInfo; verify?: VerifyData; + + // Verification event fields (for assertion loop) + kind?: 'assert' | 'task_done'; + label?: string; + passed?: boolean; + required?: boolean; + reason?: string; + details?: Record; } /** diff --git a/src/verification.ts b/src/verification.ts new file mode 100644 index 00000000..90ea3e73 --- /dev/null +++ b/src/verification.ts @@ -0,0 +1,364 @@ +/** + * Verification primitives for agent assertion loops. + * + * This module provides assertion predicates and outcome types for runtime verification + * in agent loops. Assertions evaluate against the current browser state (snapshot/url) + * and record results into the trace. + * + * Key concepts: + * - AssertOutcome: Result of evaluating an assertion + * - AssertContext: Context provided to assertion predicates (snapshot, url, stepId) + * - Predicate: Callable that takes context and returns outcome + * + * @example + * ```typescript + * import { urlMatches, exists, AssertContext } from './verification'; + * + * // Create predicates + * const onSearchPage = urlMatches(/\/s\?k=/); + * const resultsLoaded = exists("text~'Results'"); + * + * // Evaluate against context + * const ctx: AssertContext = { snapshot, url: "https://example.com/s?k=shoes" }; + * const outcome = onSearchPage(ctx); + * console.log(outcome.passed); // true + * ``` + */ + +import { Snapshot, QuerySelector } from './types'; +import { query } from './query'; + +/** + * Convert QuerySelector to string for display/logging. + */ +function selectorToString(selector: QuerySelector): string { + return typeof selector === 'string' ? selector : JSON.stringify(selector); +} + +/** + * Result of evaluating an assertion predicate. + */ +export interface AssertOutcome { + /** Whether the assertion passed */ + passed: boolean; + /** Human-readable explanation (especially useful when failed) */ + reason: string; + /** Additional structured data for debugging/display */ + details: Record; +} + +/** + * Context provided to assertion predicates. + * + * Provides access to current browser state without requiring + * the predicate to know about browser internals. + */ +export interface AssertContext { + /** Current page snapshot (may be null if not taken) */ + snapshot: Snapshot | null; + /** Current page URL */ + url: string | null; + /** Current step identifier (for trace correlation) */ + stepId: string | null; +} + +/** + * Type alias for assertion predicates. + * A predicate takes context and returns an outcome. + */ +export type Predicate = (ctx: AssertContext) => AssertOutcome; + +/** + * Create a predicate that checks if current URL matches a regex pattern. + * + * @param pattern - Regular expression pattern or string to match against URL + * @returns Predicate function that evaluates URL matching + * + * @example + * ```typescript + * const pred = urlMatches(/\/search\?q=/); + * const ctx = { snapshot: null, url: "https://example.com/search?q=shoes", stepId: null }; + * const outcome = pred(ctx); + * console.log(outcome.passed); // true + * ``` + */ +export function urlMatches(pattern: string | RegExp): Predicate { + const rx = typeof pattern === 'string' ? new RegExp(pattern) : pattern; + + return (ctx: AssertContext): AssertOutcome => { + const url = ctx.url || ''; + const ok = rx.test(url); + return { + passed: ok, + reason: ok ? '' : `url did not match pattern: ${pattern}`, + details: { pattern: String(pattern), url: url.substring(0, 200) }, + }; + }; +} + +/** + * Create a predicate that checks if current URL contains a substring. + * + * @param substring - String to search for in URL + * @returns Predicate function that evaluates URL containment + * + * @example + * ```typescript + * const pred = urlContains("/cart"); + * const ctx = { snapshot: null, url: "https://example.com/cart/checkout", stepId: null }; + * const outcome = pred(ctx); + * console.log(outcome.passed); // true + * ``` + */ +export function urlContains(substring: string): Predicate { + return (ctx: AssertContext): AssertOutcome => { + const url = ctx.url || ''; + const ok = url.includes(substring); + return { + passed: ok, + reason: ok ? '' : `url does not contain: ${substring}`, + details: { substring, url: url.substring(0, 200) }, + }; + }; +} + +/** + * Create a predicate that checks if elements matching selector exist. + * + * Uses the SDK's query engine to find matching elements. + * + * @param selector - Semantic selector string (e.g., "role=button text~'Sign in'") + * @returns Predicate function that evaluates element existence + * + * @example + * ```typescript + * const pred = exists("text~'Results'"); + * // Will check if snapshot contains elements with "Results" in text + * ``` + */ +export function exists(selector: QuerySelector): Predicate { + const selectorStr = selectorToString(selector); + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { selector: selectorStr }, + }; + } + + const matches = query(snap, selector); + const ok = matches.length > 0; + return { + passed: ok, + reason: ok ? '' : `no elements matched selector: ${selectorStr}`, + details: { selector: selectorStr, matched: matches.length }, + }; + }; +} + +/** + * Create a predicate that checks that NO elements match the selector. + * + * Useful for asserting that error messages, loading spinners, etc. are gone. + * + * @param selector - Semantic selector string + * @returns Predicate function that evaluates element non-existence + * + * @example + * ```typescript + * const pred = notExists("text~'Loading'"); + * // Will pass if no elements contain "Loading" text + * ``` + */ +export function notExists(selector: QuerySelector): Predicate { + const selectorStr = selectorToString(selector); + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { selector: selectorStr }, + }; + } + + const matches = query(snap, selector); + const ok = matches.length === 0; + return { + passed: ok, + reason: ok ? '' : `found ${matches.length} elements matching: ${selectorStr}`, + details: { selector: selectorStr, matched: matches.length }, + }; + }; +} + +/** + * Create a predicate that checks the number of matching elements. + * + * @param selector - Semantic selector string + * @param options - Count constraints + * @returns Predicate function that evaluates element count + * + * @example + * ```typescript + * const pred = elementCount("role=button", { minCount: 1, maxCount: 5 }); + * // Will pass if 1-5 buttons found + * ``` + */ +export function elementCount( + selector: QuerySelector, + options: { minCount?: number; maxCount?: number } = {} +): Predicate { + const { minCount = 0, maxCount } = options; + const selectorStr = selectorToString(selector); + + return (ctx: AssertContext): AssertOutcome => { + const snap = ctx.snapshot; + if (!snap) { + return { + passed: false, + reason: 'no snapshot available', + details: { selector: selectorStr, minCount, maxCount }, + }; + } + + const matches = query(snap, selector); + const count = matches.length; + + let ok = count >= minCount; + if (maxCount !== undefined) { + ok = ok && count <= maxCount; + } + + let reason = ''; + if (!ok) { + if (maxCount !== undefined) { + reason = `expected ${minCount}-${maxCount} elements, found ${count}`; + } else { + reason = `expected at least ${minCount} elements, found ${count}`; + } + } + + return { + passed: ok, + reason, + details: { + selector: selectorStr, + matched: count, + minCount, + maxCount, + }, + }; + }; +} + +/** + * Create a predicate that passes only if ALL sub-predicates pass. + * + * @param predicates - Predicate functions to combine with AND logic + * @returns Combined predicate + * + * @example + * ```typescript + * const pred = allOf(urlContains("/cart"), exists("text~'Checkout'")); + * // Will pass only if both conditions are true + * ``` + */ +export function allOf(...predicates: Predicate[]): Predicate { + return (ctx: AssertContext): AssertOutcome => { + const failedReasons: string[] = []; + const allDetails: Record[] = []; + + for (const p of predicates) { + const outcome = p(ctx); + allDetails.push(outcome.details); + if (!outcome.passed) { + failedReasons.push(outcome.reason); + } + } + + const ok = failedReasons.length === 0; + return { + passed: ok, + reason: failedReasons.join('; '), + details: { subPredicates: allDetails, failedCount: failedReasons.length }, + }; + }; +} + +/** + * Create a predicate that passes if ANY sub-predicate passes. + * + * @param predicates - Predicate functions to combine with OR logic + * @returns Combined predicate + * + * @example + * ```typescript + * const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'")); + * // Will pass if either condition is true + * ``` + */ +export function anyOf(...predicates: Predicate[]): Predicate { + return (ctx: AssertContext): AssertOutcome => { + const allReasons: string[] = []; + const allDetails: Record[] = []; + + for (let i = 0; i < predicates.length; i++) { + const outcome = predicates[i](ctx); + allDetails.push(outcome.details); + if (outcome.passed) { + return { + passed: true, + reason: '', + details: { subPredicates: allDetails, matchedAtIndex: i }, + }; + } + allReasons.push(outcome.reason); + } + + return { + passed: false, + reason: `none of ${predicates.length} predicates passed: ${allReasons.join('; ')}`, + details: { subPredicates: allDetails }, + }; + }; +} + +/** + * Create a predicate from a custom function. + * + * @param checkFn - Function that takes AssertContext and returns boolean + * @param label - Label for debugging/display + * @returns Predicate wrapping the custom function + * + * @example + * ```typescript + * const pred = custom( + * (ctx) => ctx.snapshot !== null && ctx.snapshot.elements.length > 10, + * "has_many_elements" + * ); + * ``` + */ +export function custom( + checkFn: (ctx: AssertContext) => boolean, + label: string = 'custom' +): Predicate { + return (ctx: AssertContext): AssertOutcome => { + try { + const ok = checkFn(ctx); + return { + passed: ok, + reason: ok ? '' : `custom check '${label}' returned false`, + details: { label }, + }; + } catch (e) { + return { + passed: false, + reason: `custom check '${label}' raised exception: ${e}`, + details: { label, error: String(e) }, + }; + } + }; +} diff --git a/tests/verification.test.ts b/tests/verification.test.ts new file mode 100644 index 00000000..d4b982d9 --- /dev/null +++ b/tests/verification.test.ts @@ -0,0 +1,336 @@ +/** + * Tests for verification module - assertion predicates for agent loops. + */ + +import { + AssertContext, + AssertOutcome, + Predicate, + urlMatches, + urlContains, + exists, + notExists, + elementCount, + allOf, + anyOf, + custom, +} from '../src/verification'; +import { Snapshot, Element, BBox, Viewport, VisualCues } from '../src/types'; + +/** + * Helper to create test elements. + */ +function makeElement( + id: number, + role: string = 'button', + text?: string | null, + importance: number = 100 +): Element { + return { + id, + role, + text: text ?? undefined, + importance, + bbox: { x: 0, y: 0, width: 100, height: 50 } as BBox, + visual_cues: { + is_primary: false, + is_clickable: true, + background_color_name: null, + } as VisualCues, + } as Element; +} + +/** + * Helper to create test snapshots. + */ +function makeSnapshot(elements: Element[], url: string = 'https://example.com'): Snapshot { + return { + status: 'success', + url, + elements, + viewport: { width: 1920, height: 1080 } as Viewport, + } as Snapshot; +} + +describe('urlMatches', () => { + it('matches string pattern', () => { + const pred = urlMatches('/search\\?q='); + const ctx: AssertContext = { + snapshot: null, + url: 'https://example.com/search?q=shoes', + stepId: null, + }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + expect(outcome.reason).toBe(''); + }); + + it('matches regex pattern', () => { + const pred = urlMatches(/\/search\?q=/); + const ctx: AssertContext = { + snapshot: null, + url: 'https://example.com/search?q=shoes', + stepId: null, + }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('returns false when no match', () => { + const pred = urlMatches('/cart'); + const ctx: AssertContext = { + snapshot: null, + url: 'https://example.com/search?q=shoes', + stepId: null, + }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('did not match'); + }); + + it('handles null url', () => { + const pred = urlMatches('/search'); + const ctx: AssertContext = { snapshot: null, url: null, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + }); + + it('includes pattern and url in details', () => { + const pred = urlMatches('/test'); + const ctx: AssertContext = { snapshot: null, url: 'https://example.com/test', stepId: null }; + const outcome = pred(ctx); + expect(outcome.details.pattern).toBe('/test'); + expect(outcome.details.url).toContain('example.com'); + }); +}); + +describe('urlContains', () => { + it('finds substring', () => { + const pred = urlContains('/cart'); + const ctx: AssertContext = { + snapshot: null, + url: 'https://example.com/cart/checkout', + stepId: null, + }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('returns false when substring not found', () => { + const pred = urlContains('/orders'); + const ctx: AssertContext = { snapshot: null, url: 'https://example.com/cart', stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('does not contain'); + }); + + it('handles null url', () => { + const pred = urlContains('/test'); + const ctx: AssertContext = { snapshot: null, url: null, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + }); +}); + +describe('exists', () => { + it('finds element by role', () => { + const elements = [makeElement(1, 'button', 'Click me')]; + const snap = makeSnapshot(elements); + const pred = exists('role=button'); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + expect(outcome.details.matched).toBe(1); + }); + + it('returns false when element not found', () => { + const elements = [makeElement(1, 'button', 'Click me')]; + const snap = makeSnapshot(elements); + const pred = exists('role=link'); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('no elements matched'); + }); + + it('finds element by text', () => { + const elements = [makeElement(1, 'button', 'Submit Form')]; + const snap = makeSnapshot(elements); + const pred = exists("text~'Submit'"); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('handles null snapshot', () => { + const pred = exists('role=button'); + const ctx: AssertContext = { snapshot: null, url: null, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('no snapshot available'); + }); +}); + +describe('notExists', () => { + it('passes when element absent', () => { + const elements = [makeElement(1, 'button')]; + const snap = makeSnapshot(elements); + const pred = notExists("text~'Loading'"); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('fails when element present', () => { + const elements = [makeElement(1, 'button', 'Loading...')]; + const snap = makeSnapshot(elements); + const pred = notExists("text~'Loading'"); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('found 1 elements'); + }); +}); + +describe('elementCount', () => { + it('passes when min count satisfied', () => { + const elements = [makeElement(0, 'button'), makeElement(1, 'button'), makeElement(2, 'button')]; + const snap = makeSnapshot(elements); + const pred = elementCount('role=button', { minCount: 2 }); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('fails when min count not satisfied', () => { + const elements = [makeElement(1, 'button')]; + const snap = makeSnapshot(elements); + const pred = elementCount('role=button', { minCount: 5 }); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('expected at least 5'); + }); + + it('passes when within min-max range', () => { + const elements = [makeElement(0, 'button'), makeElement(1, 'button'), makeElement(2, 'button')]; + const snap = makeSnapshot(elements); + const pred = elementCount('role=button', { minCount: 1, maxCount: 5 }); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('fails when max count exceeded', () => { + const elements = Array.from({ length: 10 }, (_, i) => makeElement(i, 'button')); + const snap = makeSnapshot(elements); + const pred = elementCount('role=button', { minCount: 1, maxCount: 5 }); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('expected 1-5'); + }); +}); + +describe('allOf', () => { + it('passes when all predicates pass', () => { + const elements = [makeElement(1, 'button', 'Checkout')]; + const snap = makeSnapshot(elements, 'https://example.com/cart'); + const pred = allOf(urlContains('/cart'), exists('role=button')); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + expect(outcome.details.failedCount).toBe(0); + }); + + it('fails when one predicate fails', () => { + const elements = [makeElement(1, 'button')]; + const snap = makeSnapshot(elements, 'https://example.com/home'); + const pred = allOf(urlContains('/cart'), exists('role=button')); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.details.failedCount).toBe(1); + }); + + it('fails when all predicates fail', () => { + const elements = [makeElement(1, 'link')]; + const snap = makeSnapshot(elements, 'https://example.com/home'); + const pred = allOf(urlContains('/cart'), exists('role=button')); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.details.failedCount).toBe(2); + }); +}); + +describe('anyOf', () => { + it('passes when first predicate passes', () => { + const elements = [makeElement(1, 'button', 'Success')]; + const snap = makeSnapshot(elements); + const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'")); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('passes when second predicate passes', () => { + const elements = [makeElement(1, 'button', 'Complete')]; + const snap = makeSnapshot(elements); + const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'")); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('fails when no predicates pass', () => { + const elements = [makeElement(1, 'button', 'Error')]; + const snap = makeSnapshot(elements); + const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'")); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('none of 2 predicates passed'); + }); +}); + +describe('custom', () => { + it('passes when function returns true', () => { + const pred = custom(ctx => ctx.url !== null, 'has_url'); + const ctx: AssertContext = { snapshot: null, url: 'https://example.com', stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('fails when function returns false', () => { + const pred = custom(ctx => ctx.url === null, 'no_url'); + const ctx: AssertContext = { snapshot: null, url: 'https://example.com', stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('returned false'); + }); + + it('can check snapshot data', () => { + const elements = Array.from({ length: 15 }, (_, i) => makeElement(i, 'button')); + const snap = makeSnapshot(elements); + const pred = custom( + ctx => ctx.snapshot !== null && ctx.snapshot.elements.length > 10, + 'has_many_elements' + ); + const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(true); + }); + + it('handles exceptions gracefully', () => { + const badCheck = (_ctx: AssertContext): boolean => { + throw new Error('Something went wrong'); + }; + const pred = custom(badCheck, 'bad_check'); + const ctx: AssertContext = { snapshot: null, url: null, stepId: null }; + const outcome = pred(ctx); + expect(outcome.passed).toBe(false); + expect(outcome.reason).toContain('raised exception'); + expect(outcome.reason).toContain('Something went wrong'); + }); +});