diff --git a/README.md b/README.md
index 53308ba4..bd950a62 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ npx playwright install chromium
```
**For local development:**
+
```bash
npm install
npm run build
@@ -44,7 +45,7 @@ const response = await agent.execute(
console.log(response); // "I found the top result for wireless mouse on Amazon. It's priced at $24.99..."
// Follow-up questions maintain context
-const followUp = await agent.chat("Add it to cart");
+const followUp = await agent.chat('Add it to cart');
console.log(followUp);
await browser.close();
@@ -144,7 +145,9 @@ await browser.close();
## 🆕 What's New (2026-01-06)
### Human-like Typing
+
Add realistic delays between keystrokes to mimic human typing:
+
```typescript
// Type instantly (default)
await typeText(browser, elementId, 'Hello World');
@@ -154,7 +157,9 @@ await typeText(browser, elementId, 'Hello World', false, 10);
```
### Scroll to Element
+
Scroll elements into view with smooth animation:
+
```typescript
const snap = await snapshot(browser);
const button = find(snap, 'role=button text~"Submit"');
@@ -181,7 +186,7 @@ import {
SentienceAgent,
OpenAIProvider,
Tracer,
- JsonlTraceSink
+ JsonlTraceSink,
} from 'sentienceapi';
import { randomUUID } from 'crypto';
@@ -235,6 +240,38 @@ Traces are **100% compatible** with Python SDK traces - use the same tools to an
+
+🔍 Agent Runtime Verification
+
+`AgentRuntime` provides assertion predicates for runtime verification in agent loops, enabling programmatic verification of browser state during execution.
+
+```typescript
+import { SentienceBrowser } from 'sentienceapi';
+import { AgentRuntime, urlContains, exists, allOf } from 'sentienceapi';
+import { createTracer } from 'sentienceapi';
+
+const browser = new SentienceBrowser();
+await browser.start();
+const tracer = await createTracer({ runId: 'my-run', uploadTrace: false });
+const runtime = new AgentRuntime(browser, browser.getPage(), tracer);
+
+// Navigate and take snapshot
+await browser.getPage().goto('https://example.com');
+runtime.beginStep('Verify page');
+await runtime.snapshot();
+
+// Run assertions
+runtime.assert(urlContains('example.com'), 'on_correct_domain');
+runtime.assert(exists('role=heading'), 'has_heading');
+runtime.assertDone(exists("text~'Example'"), 'task_complete');
+
+console.log(`Task done: ${runtime.isTaskDone}`);
+```
+
+**See example:** [examples/agent-runtime-verification.ts](examples/agent-runtime-verification.ts)
+
+
+
---
@@ -261,14 +298,14 @@ async function main() {
console.log(`Found ${snap.elements.length} elements`);
// Find first product in viewport using spatial filtering
- const products = snap.elements
- .filter(el =>
+ const products = snap.elements.filter(
+ el =>
el.role === 'link' &&
el.visual_cues.is_clickable &&
el.in_viewport &&
!el.is_occluded &&
- el.bbox.y < 600 // First row
- );
+ el.bbox.y < 600 // First row
+ );
if (products.length > 0) {
// Sort by position (left to right, top to bottom)
@@ -323,12 +360,14 @@ main();
**`snapshot(browser, options?)`** - Capture page state with AI-ranked elements
Features:
+
- Returns semantic elements with roles, text, importance scores, and bounding boxes
- Optional screenshot capture (PNG/JPEG)
- Optional visual overlay to see what elements are detected
- TypeScript types for type safety
**Example:**
+
```typescript
const snap = await snapshot(browser, { screenshot: true, show_overlay: true });
@@ -353,6 +392,7 @@ for (const element of snap.elements) {
- Powerful query DSL with multiple operators
**Query Examples:**
+
```typescript
// Find by role and text
const button = find(snap, 'role=button text="Sign in"');
@@ -393,12 +433,13 @@ All actions return `ActionResult` with success status, timing, and outcome:
const result = await click(browser, element.id);
console.log(`Success: ${result.success}`);
-console.log(`Outcome: ${result.outcome}`); // "navigated", "dom_updated", "error"
+console.log(`Outcome: ${result.outcome}`); // "navigated", "dom_updated", "error"
console.log(`Duration: ${result.duration_ms}ms`);
console.log(`URL changed: ${result.url_changed}`);
```
**Coordinate-based clicking:**
+
```typescript
import { clickRect } from './src';
@@ -416,7 +457,7 @@ if (element) {
x: element.bbox.x,
y: element.bbox.y,
w: element.bbox.width,
- h: element.bbox.height
+ h: element.bbox.height,
});
}
```
@@ -430,6 +471,7 @@ if (element) {
- **`expect(browser, selector)`** - Assertion helper with fluent API
**Examples:**
+
```typescript
// Wait for element (auto-detects optimal interval based on API usage)
const result = await waitFor(browser, 'role=button text="Submit"', 10000);
@@ -447,9 +489,9 @@ const result = await waitFor(browser, 'role=button', 5000, undefined, true);
const result = await waitFor(browser, 'role=button', 5000, 500, false);
// Semantic wait conditions
-await waitFor(browser, 'clickable=true', 5000); // Wait for clickable element
-await waitFor(browser, 'importance>100', 5000); // Wait for important element
-await waitFor(browser, 'role=link visible=true', 5000); // Wait for visible link
+await waitFor(browser, 'clickable=true', 5000); // Wait for clickable element
+await waitFor(browser, 'importance>100', 5000); // Wait for important element
+await waitFor(browser, 'role=link visible=true', 5000); // Wait for visible link
// Assertions
await expect(browser, 'role=button text="Submit"').toExist(5000);
@@ -475,7 +517,7 @@ import { showOverlay, clearOverlay } from 'sentienceapi';
const snap = await snapshot(browser);
// Show overlay anytime without re-snapshotting
-await showOverlay(browser, snap); // Auto-clears after 5 seconds
+await showOverlay(browser, snap); // Auto-clears after 5 seconds
// Highlight specific target element in red
const button = find(snap, 'role=button text~"Submit"');
@@ -487,11 +529,13 @@ await clearOverlay(browser);
```
**Color Coding:**
+
- 🔴 Red: Target element
- 🔵 Blue: Primary elements (`is_primary=true`)
- 🟢 Green: Regular interactive elements
**Visual Indicators:**
+
- Border thickness/opacity scales with importance
- Semi-transparent fill
- Importance badges
@@ -504,21 +548,23 @@ await clearOverlay(browser);
📄 Content Reading
**`read(browser, options?)`** - Extract page content
+
- `format: "text"` - Plain text extraction
- `format: "markdown"` - High-quality markdown conversion (uses Turndown)
- `format: "raw"` - Cleaned HTML (default)
**Example:**
+
```typescript
import { read } from './src';
// Get markdown content
const result = await read(browser, { format: 'markdown' });
-console.log(result.content); // Markdown text
+console.log(result.content); // Markdown text
// Get plain text
const result = await read(browser, { format: 'text' });
-console.log(result.content); // Plain text
+console.log(result.content); // Plain text
```
@@ -527,11 +573,13 @@ console.log(result.content); // Plain text
📷 Screenshots
**`screenshot(browser, options?)`** - Standalone screenshot capture
+
- Returns base64-encoded data URL
- PNG or JPEG format
- Quality control for JPEG (1-100)
**Example:**
+
```typescript
import { screenshot } from './src';
import { writeFileSync } from 'fs';
@@ -558,6 +606,7 @@ const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 });
Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.
**Example:**
+
```typescript
import { SentienceBrowser, findTextRect, clickRect } from 'sentienceapi';
@@ -565,8 +614,8 @@ const browser = await SentienceBrowser.create();
await browser.getPage().goto('https://example.com');
// Find "Sign In" button (simple string syntax)
-const result = await findTextRect(browser.getPage(), "Sign In");
-if (result.status === "success" && result.results) {
+const result = await findTextRect(browser.getPage(), 'Sign In');
+if (result.status === 'success' && result.results) {
const firstMatch = result.results[0];
console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`);
console.log(`In viewport: ${firstMatch.in_viewport}`);
@@ -577,30 +626,31 @@ if (result.status === "success" && result.results) {
x: firstMatch.rect.x,
y: firstMatch.rect.y,
w: firstMatch.rect.width,
- h: firstMatch.rect.height
+ h: firstMatch.rect.height,
});
}
}
```
**Advanced Options:**
+
```typescript
// Case-sensitive search
const result = await findTextRect(browser.getPage(), {
- text: "LOGIN",
- caseSensitive: true
+ text: 'LOGIN',
+ caseSensitive: true,
});
// Whole word only (won't match "login" as part of "loginButton")
const result = await findTextRect(browser.getPage(), {
- text: "log",
- wholeWord: true
+ text: 'log',
+ wholeWord: true,
});
// Find multiple matches
const result = await findTextRect(browser.getPage(), {
- text: "Buy",
- maxResults: 10
+ text: 'Buy',
+ maxResults: 10,
});
for (const match of result.results || []) {
if (match.in_viewport) {
@@ -611,6 +661,7 @@ for (const match of result.results || []) {
```
**Returns:** Promise with:
+
- **`status`**: "success" or "error"
- **`results`**: Array of `TextMatch` objects with:
- `text` - The matched text
@@ -620,6 +671,7 @@ for (const match of result.results || []) {
- `in_viewport` - Whether visible in current viewport
**Use Cases:**
+
- Find buttons/links by visible text without CSS selectors
- Get exact pixel coordinates for click automation
- Verify text visibility and position on page
@@ -641,15 +693,15 @@ for (const match of result.results || []) {
Elements returned by `snapshot()` have the following properties:
```typescript
-element.id // Unique identifier for interactions
-element.role // ARIA role (button, link, textbox, heading, etc.)
-element.text // Visible text content
-element.importance // AI importance score (0-1000)
-element.bbox // Bounding box (x, y, width, height)
-element.visual_cues // Visual analysis (is_primary, is_clickable, background_color)
-element.in_viewport // Is element visible in current viewport?
-element.is_occluded // Is element covered by other elements?
-element.z_index // CSS stacking order
+element.id; // Unique identifier for interactions
+element.role; // ARIA role (button, link, textbox, heading, etc.)
+element.text; // Visible text content
+element.importance; // AI importance score (0-1000)
+element.bbox; // Bounding box (x, y, width, height)
+element.visual_cues; // Visual analysis (is_primary, is_clickable, background_color)
+element.in_viewport; // Is element visible in current viewport?
+element.is_occluded; // Is element covered by other elements?
+element.z_index; // CSS stacking order
```
@@ -659,15 +711,15 @@ element.z_index // CSS stacking order
### Basic Operators
-| Operator | Description | Example |
-|----------|-------------|---------|
-| `=` | Exact match | `role=button` |
-| `!=` | Exclusion | `role!=link` |
-| `~` | Substring (case-insensitive) | `text~"sign in"` |
-| `^=` | Prefix match | `text^="Add"` |
-| `$=` | Suffix match | `text$="Cart"` |
-| `>`, `>=` | Greater than | `importance>500` |
-| `<`, `<=` | Less than | `bbox.y<600` |
+| Operator | Description | Example |
+| --------- | ---------------------------- | ---------------- |
+| `=` | Exact match | `role=button` |
+| `!=` | Exclusion | `role!=link` |
+| `~` | Substring (case-insensitive) | `text~"sign in"` |
+| `^=` | Prefix match | `text^="Add"` |
+| `$=` | Suffix match | `text$="Cart"` |
+| `>`, `>=` | Greater than | `importance>500` |
+| `<`, `<=` | Less than | `bbox.y<600` |
### Supported Fields
@@ -712,7 +764,7 @@ const browser = new SentienceBrowser(undefined, undefined, false);
const browser = new SentienceBrowser(undefined, undefined, true);
// Auto-detect based on environment (default)
-const browser = new SentienceBrowser(); // headless=true if CI=true, else false
+const browser = new SentienceBrowser(); // headless=true if CI=true, else false
```
@@ -723,6 +775,7 @@ const browser = new SentienceBrowser(); // headless=true if CI=true, else false
For users running from datacenters (AWS, DigitalOcean, etc.), you can configure a residential proxy to prevent IP-based detection by Cloudflare, Akamai, and other anti-bot services.
**Supported Formats:**
+
- HTTP: `http://username:password@host:port`
- HTTPS: `https://username:password@host:port`
- SOCKS5: `socks5://username:password@host:port`
@@ -787,9 +840,9 @@ await saveStorageState(browser.getContext(), 'auth.json');
const browser2 = new SentienceBrowser(
undefined, // apiKey
undefined, // apiUrl
- false, // headless
- undefined, // proxy
- undefined, // userDataDir
+ false, // headless
+ undefined, // proxy
+ undefined, // userDataDir
'auth.json' // storageState - inject saved session
);
await browser2.start();
@@ -797,12 +850,12 @@ await browser2.start();
// Workflow 2: Persistent sessions (cookies persist across runs)
const browser3 = new SentienceBrowser(
- undefined, // apiKey
- undefined, // apiUrl
- false, // headless
- undefined, // proxy
+ undefined, // apiKey
+ undefined, // apiUrl
+ false, // headless
+ undefined, // proxy
'./chrome_profile', // userDataDir - persist cookies
- undefined // storageState
+ undefined // storageState
);
await browser3.start();
// First run: Log in
@@ -810,6 +863,7 @@ await browser3.start();
```
**Benefits:**
+
- Bypass login screens and CAPTCHAs with valid sessions
- Save 5-10 agent steps and hundreds of tokens per run
- Maintain stateful sessions for accessing authenticated pages
@@ -827,13 +881,15 @@ See `examples/auth-injection-agent.ts` for complete examples.
Click to expand best practices
### 1. Wait for Dynamic Content
+
```typescript
await browser.goto('https://example.com');
await browser.getPage().waitForLoadState('networkidle');
-await new Promise(resolve => setTimeout(resolve, 1000)); // Extra buffer
+await new Promise(resolve => setTimeout(resolve, 1000)); // Extra buffer
```
### 2. Use Multiple Strategies for Finding Elements
+
```typescript
// Try exact match first
let btn = find(snap, 'role=button text="Add to Cart"');
@@ -845,6 +901,7 @@ if (!btn) {
```
### 3. Check Element Visibility Before Clicking
+
```typescript
if (element.in_viewport && !element.is_occluded) {
await click(browser, element.id);
@@ -852,6 +909,7 @@ if (element.in_viewport && !element.is_occluded) {
```
### 4. Handle Navigation
+
```typescript
const result = await click(browser, linkId);
if (result.url_changed) {
@@ -860,6 +918,7 @@ if (result.url_changed) {
```
### 5. Use Screenshots Sparingly
+
```typescript
// Fast - no screenshot (only element data)
const snap = await snapshot(browser);
@@ -869,6 +928,7 @@ const snap = await snapshot(browser, { screenshot: true });
```
### 6. Always Close Browser
+
```typescript
const browser = new SentienceBrowser();
@@ -876,7 +936,7 @@ try {
await browser.start();
// ... your automation code
} finally {
- await browser.close(); // Always clean up
+ await browser.close(); // Always clean up
}
```
@@ -890,14 +950,18 @@ try {
Click to expand common issues and solutions
### "Extension failed to load"
+
**Solution:** Build the extension first:
+
```bash
cd sentience-chrome
./build.sh
```
### "Cannot use import statement outside a module"
+
**Solution:** Don't use `node` directly. Use `ts-node` or npm scripts:
+
```bash
npx ts-node examples/hello.ts
# or
@@ -905,13 +969,17 @@ npm run example:hello
```
### "Element not found"
+
**Solutions:**
+
- Ensure page is loaded: `await browser.getPage().waitForLoadState('networkidle')`
- Use `waitFor()`: `await waitFor(browser, 'role=button', 10000)`
- Debug elements: `console.log(snap.elements.map(el => el.text))`
### Button not clickable
+
**Solutions:**
+
- Check visibility: `element.in_viewport && !element.is_occluded`
- Scroll to element: ``await browser.getPage().evaluate(`window.sentience_registry[${element.id}].scrollIntoView()`)``
@@ -948,6 +1016,7 @@ npm run example:hello
**⚠️ Important**: You cannot use `node` directly to run TypeScript files. Use one of these methods:
### Option 1: Using npm scripts (recommended)
+
```bash
npm run example:hello
npm run example:basic
@@ -956,6 +1025,7 @@ npm run example:wait
```
### Option 2: Using ts-node directly
+
```bash
npx ts-node examples/hello.ts
# or if ts-node is installed globally:
@@ -963,6 +1033,7 @@ ts-node examples/hello.ts
```
### Option 3: Compile then run
+
```bash
npm run build
# Then use compiled JavaScript from dist/
@@ -1001,7 +1072,7 @@ npm test -- snapshot.test.ts
This project is licensed under either of:
-* Apache License, Version 2.0, ([LICENSE-APACHE](./LICENSE-APACHE))
-* MIT license ([LICENSE-MIT](./LICENSE-MIT))
+- Apache License, Version 2.0, ([LICENSE-APACHE](./LICENSE-APACHE))
+- MIT license ([LICENSE-MIT](./LICENSE-MIT))
at your option.
diff --git a/examples/agent-runtime-verification.ts b/examples/agent-runtime-verification.ts
new file mode 100644
index 00000000..4d13e7b6
--- /dev/null
+++ b/examples/agent-runtime-verification.ts
@@ -0,0 +1,155 @@
+/**
+ * Example: Agent Runtime with Verification Loop
+ *
+ * Demonstrates how to use AgentRuntime for runtime verification in agent loops.
+ * The AgentRuntime provides assertion predicates to verify browser state during execution.
+ *
+ * Key features:
+ * - Predicate helpers: urlMatches, urlContains, exists, notExists, elementCount
+ * - Combinators: allOf, anyOf for complex conditions
+ * - Task completion: assertDone() for goal verification
+ * - Trace integration: Assertions emitted to trace for Studio timeline
+ *
+ * Requirements:
+ * - SENTIENCE_API_KEY (Pro or Enterprise tier)
+ *
+ * Usage:
+ * ts-node examples/agent-runtime-verification.ts
+ * or
+ * npm run example:agent-runtime
+ */
+
+import { Page } from 'playwright';
+import { SentienceBrowser } from '../src/browser';
+import { Snapshot } from '../src/types';
+import {
+ AgentRuntime,
+ urlContains,
+ urlMatches,
+ exists,
+ notExists,
+ allOf,
+} from '../src';
+import { createTracer } from '../src/tracing/tracer-factory';
+
+/**
+ * Adapter to make SentienceBrowser compatible with AgentRuntime's BrowserLike interface.
+ * AgentRuntime expects snapshot(page, options) but SentienceBrowser has snapshot(options).
+ */
+function createBrowserAdapter(browser: SentienceBrowser) {
+ return {
+ snapshot: async (_page: Page, options?: Record): Promise => {
+ return await browser.snapshot(options);
+ },
+ };
+}
+
+async function main() {
+ // Get API key from environment
+ const sentienceKey = process.env.SENTIENCE_API_KEY;
+
+ if (!sentienceKey) {
+ console.error('Error: SENTIENCE_API_KEY not set');
+ process.exit(1);
+ }
+
+ console.log('Starting Agent Runtime Verification Demo\n');
+
+ // 1. Create tracer for verification event emission
+ const runId = 'verification-demo';
+ const tracer = await createTracer({
+ apiKey: sentienceKey,
+ runId: runId,
+ uploadTrace: false,
+ });
+ console.log(`Run ID: ${runId}\n`);
+
+ // 2. Create and start browser
+ const browser = new SentienceBrowser(sentienceKey, undefined, false);
+ await browser.start();
+ const page = browser.getPage();
+
+ try {
+ // 3. Create AgentRuntime with browser adapter, page, and tracer
+ const browserAdapter = createBrowserAdapter(browser);
+ const runtime = new AgentRuntime(browserAdapter, page, tracer);
+
+ // 4. Navigate to a page
+ console.log('Navigating to example.com...\n');
+ await page.goto('https://example.com');
+ await page.waitForLoadState('networkidle');
+
+ // Wait for extension to inject
+ try {
+ await page.waitForFunction(
+ () => typeof (window as any).sentience !== 'undefined',
+ { timeout: 10000 }
+ );
+ } catch {
+ console.warn('Extension not ready, continuing anyway...');
+ }
+
+ // 5. Begin a verification step
+ runtime.beginStep('Verify page loaded correctly');
+
+ // 6. Take a snapshot (required for element assertions)
+ const snapshot = await runtime.snapshot();
+ console.log(`Snapshot taken: ${snapshot.elements.length} elements found\n`);
+
+ // 7. Run assertions against current state
+ console.log('Running assertions:\n');
+
+ // URL assertions
+ const urlOk = runtime.assert(urlContains('example.com'), 'on_example_domain');
+ console.log(` [${urlOk ? 'PASS' : 'FAIL'}] on_example_domain`);
+
+ const urlMatch = runtime.assert(urlMatches(/https:\/\/.*example\.com/), 'url_is_https');
+ console.log(` [${urlMatch ? 'PASS' : 'FAIL'}] url_is_https`);
+
+ // Element assertions
+ const hasHeading = runtime.assert(exists('role=heading'), 'has_heading');
+ console.log(` [${hasHeading ? 'PASS' : 'FAIL'}] has_heading`);
+
+ const noError = runtime.assert(notExists("text~'Error'"), 'no_error_message');
+ console.log(` [${noError ? 'PASS' : 'FAIL'}] no_error_message`);
+
+ // Combined assertion with allOf
+ const pageReady = runtime.assert(
+ allOf(urlContains('example'), exists('role=link')),
+ 'page_fully_ready'
+ );
+ console.log(` [${pageReady ? 'PASS' : 'FAIL'}] page_fully_ready`);
+
+ // 8. Check if task is done (required assertion)
+ const taskComplete = runtime.assertDone(
+ exists("text~'Example Domain'"),
+ 'reached_example_page'
+ );
+ console.log(`\n [${taskComplete ? 'DONE' : 'NOT DONE'}] reached_example_page`);
+
+ // 9. Get accumulated assertions for step_end event
+ const assertionsData = runtime.getAssertionsForStepEnd();
+ console.log(`\nTotal assertions: ${assertionsData.assertions.length}`);
+ console.log(`Task done: ${assertionsData.task_done ?? false}`);
+
+ // 10. Check overall status
+ console.log('\nVerification Summary:');
+ console.log(` All passed: ${runtime.allAssertionsPassed()}`);
+ console.log(` Required passed: ${runtime.requiredAssertionsPassed()}`);
+ console.log(` Task complete: ${runtime.isTaskDone}`);
+
+ } catch (error: any) {
+ console.error(`\nError during execution: ${error.message}`);
+ throw error;
+ } finally {
+ // Close tracer and browser
+ console.log('\nClosing tracer...');
+ await tracer.close(true);
+ console.log(`Trace saved to: ~/.sentience/traces/${runId}.jsonl`);
+
+ await browser.close();
+ console.log('Done!');
+ }
+}
+
+main().catch(console.error);
diff --git a/package-lock.json b/package-lock.json
index 78694626..4aa0fb61 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "sentienceapi",
- "version": "0.92.2",
+ "version": "0.92.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "sentienceapi",
- "version": "0.92.2",
+ "version": "0.92.3",
"license": "(MIT OR Apache-2.0)",
"dependencies": {
"playwright": "^1.40.0",
diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts
new file mode 100644
index 00000000..845ca35a
--- /dev/null
+++ b/src/agent-runtime.ts
@@ -0,0 +1,320 @@
+/**
+ * Agent runtime for verification loop support.
+ *
+ * This module provides a thin runtime wrapper that combines:
+ * 1. Browser session management
+ * 2. Snapshot/query helpers
+ * 3. Tracer for event emission
+ * 4. Assertion/verification methods
+ *
+ * The AgentRuntime is designed to be used in agent verification loops where
+ * you need to repeatedly take snapshots, execute actions, and verify results.
+ *
+ * @example
+ * ```typescript
+ * import { SentienceBrowser } from './browser';
+ * import { AgentRuntime } from './agent-runtime';
+ * import { urlMatches, exists } from './verification';
+ * import { Tracer, JsonlTraceSink } from './tracing';
+ *
+ * const browser = await SentienceBrowser.create();
+ * const page = await browser.newPage();
+ * await page.goto("https://example.com");
+ *
+ * const sink = new JsonlTraceSink("trace.jsonl");
+ * const tracer = new Tracer("test-run", sink);
+ *
+ * const runtime = new AgentRuntime(browser, page, tracer);
+ *
+ * // Take snapshot and run assertions
+ * await runtime.snapshot();
+ * runtime.assert(urlMatches(/example\.com/), "on_homepage");
+ * runtime.assert(exists("role=button"), "has_buttons");
+ *
+ * // Check if task is done
+ * if (runtime.assertDone(exists("text~'Success'"), "task_complete")) {
+ * console.log("Task completed!");
+ * }
+ * ```
+ */
+
+import { Page } from 'playwright';
+import { v4 as uuidv4 } from 'uuid';
+import { Snapshot } from './types';
+import { AssertContext, Predicate } from './verification';
+import { Tracer } from './tracing/tracer';
+
+// Define a minimal browser interface to avoid circular dependencies
+interface BrowserLike {
+ snapshot(page: Page, options?: Record): Promise;
+}
+
+/**
+ * Assertion record for accumulation and step_end emission.
+ */
+export interface AssertionRecord {
+ label: string;
+ passed: boolean;
+ required: boolean;
+ reason: string;
+ details: Record;
+}
+
+/**
+ * Runtime wrapper for agent verification loops.
+ *
+ * Provides ergonomic methods for:
+ * - snapshot(): Take page snapshot
+ * - assert(): Evaluate assertion predicates
+ * - assertDone(): Assert task completion (required assertion)
+ *
+ * The runtime manages assertion state per step and emits verification events
+ * to the tracer for Studio timeline display.
+ */
+export class AgentRuntime {
+ /** Browser instance for taking snapshots */
+ readonly browser: BrowserLike;
+ /** Playwright Page for browser interaction */
+ readonly page: Page;
+ /** Tracer for event emission */
+ readonly tracer: Tracer;
+
+ /** Current step identifier */
+ stepId: string | null = null;
+ /** Current step index (0-based) */
+ stepIndex: number = 0;
+ /** Most recent snapshot (for assertion context) */
+ lastSnapshot: Snapshot | null = null;
+
+ /** Assertions accumulated during current step */
+ private assertionsThisStep: AssertionRecord[] = [];
+ /** Task completion tracking */
+ private taskDone: boolean = false;
+ private taskDoneLabel: string | null = null;
+
+ /**
+ * Create a new AgentRuntime.
+ *
+ * @param browser - Browser instance for taking snapshots
+ * @param page - Playwright Page for browser interaction
+ * @param tracer - Tracer for emitting verification events
+ */
+ constructor(browser: BrowserLike, page: Page, tracer: Tracer) {
+ this.browser = browser;
+ this.page = page;
+ this.tracer = tracer;
+ }
+
+ /**
+ * Build assertion context from current state.
+ */
+ private ctx(): AssertContext {
+ let url: string | null = null;
+ if (this.lastSnapshot) {
+ url = this.lastSnapshot.url;
+ } else if (this.page) {
+ url = this.page.url();
+ }
+
+ return {
+ snapshot: this.lastSnapshot,
+ url,
+ stepId: this.stepId,
+ };
+ }
+
+ /**
+ * Take a snapshot of the current page state.
+ *
+ * This updates lastSnapshot which is used as context for assertions.
+ *
+ * @param options - Options passed through to browser.snapshot()
+ * @returns Snapshot of current page state
+ */
+ async snapshot(options?: Record): Promise {
+ this.lastSnapshot = await this.browser.snapshot(this.page, options);
+ return this.lastSnapshot;
+ }
+
+ /**
+ * Begin a new step in the verification loop.
+ *
+ * This:
+ * - Generates a new stepId
+ * - Clears assertions from previous step
+ * - Increments stepIndex (or uses provided value)
+ *
+ * @param goal - Description of what this step aims to achieve
+ * @param stepIndex - Optional explicit step index (otherwise auto-increments)
+ * @returns Generated stepId
+ */
+ beginStep(goal: string, stepIndex?: number): string {
+ // Clear previous step state
+ this.assertionsThisStep = [];
+
+ // Generate new stepId
+ this.stepId = uuidv4();
+
+ // Update step index
+ if (stepIndex !== undefined) {
+ this.stepIndex = stepIndex;
+ } else {
+ this.stepIndex += 1;
+ }
+
+ return this.stepId;
+ }
+
+ /**
+ * Evaluate an assertion against current snapshot state.
+ *
+ * The assertion result is:
+ * 1. Accumulated for inclusion in step_end.data.verify.signals.assertions
+ * 2. Emitted as a dedicated 'verification' event for Studio timeline
+ *
+ * @param predicate - Predicate function to evaluate
+ * @param label - Human-readable label for this assertion
+ * @param required - If true, this assertion gates step success (default: false)
+ * @returns True if assertion passed, false otherwise
+ */
+ assert(predicate: Predicate, label: string, required: boolean = false): boolean {
+ const outcome = predicate(this.ctx());
+
+ const record: AssertionRecord = {
+ label,
+ passed: outcome.passed,
+ required,
+ reason: outcome.reason,
+ details: outcome.details,
+ };
+ this.assertionsThisStep.push(record);
+
+ // Emit dedicated verification event (Option B from design doc)
+ // This makes assertions visible in Studio timeline
+ this.tracer.emit(
+ 'verification',
+ {
+ kind: 'assert',
+ passed: outcome.passed,
+ label,
+ required,
+ reason: outcome.reason,
+ details: outcome.details,
+ },
+ this.stepId || undefined
+ );
+
+ return outcome.passed;
+ }
+
+ /**
+ * Assert task completion (required assertion).
+ *
+ * This is a convenience wrapper for assert() with required=true.
+ * When the assertion passes, it marks the task as done.
+ *
+ * Use this for final verification that the agent's goal is complete.
+ *
+ * @param predicate - Predicate function to evaluate
+ * @param label - Human-readable label for this assertion
+ * @returns True if task is complete (assertion passed), false otherwise
+ */
+ assertDone(predicate: Predicate, label: string): boolean {
+ const ok = this.assert(predicate, label, true);
+
+ if (ok) {
+ this.taskDone = true;
+ this.taskDoneLabel = label;
+
+ // Emit task_done verification event
+ this.tracer.emit(
+ 'verification',
+ {
+ kind: 'task_done',
+ passed: true,
+ label,
+ },
+ this.stepId || undefined
+ );
+ }
+
+ return ok;
+ }
+
+ /**
+ * Get assertions data for inclusion in step_end.data.verify.signals.
+ *
+ * This is called when building the step_end event to include
+ * assertion results in the trace.
+ *
+ * @returns Object with 'assertions', 'task_done', 'task_done_label' keys
+ */
+ getAssertionsForStepEnd(): {
+ assertions: AssertionRecord[];
+ task_done?: boolean;
+ task_done_label?: string;
+ } {
+ const result: {
+ assertions: AssertionRecord[];
+ task_done?: boolean;
+ task_done_label?: string;
+ } = {
+ assertions: [...this.assertionsThisStep],
+ };
+
+ if (this.taskDone) {
+ result.task_done = true;
+ result.task_done_label = this.taskDoneLabel || undefined;
+ }
+
+ return result;
+ }
+
+ /**
+ * Get and clear assertions for current step.
+ *
+ * Call this at step end to get accumulated assertions
+ * for the step_end event, then clear for next step.
+ *
+ * @returns List of assertion records from this step
+ */
+ flushAssertions(): AssertionRecord[] {
+ const assertions = [...this.assertionsThisStep];
+ this.assertionsThisStep = [];
+ return assertions;
+ }
+
+ /**
+ * Check if task has been marked as done via assertDone().
+ */
+ get isTaskDone(): boolean {
+ return this.taskDone;
+ }
+
+ /**
+ * Reset task_done state (for multi-task runs).
+ */
+ resetTaskDone(): void {
+ this.taskDone = false;
+ this.taskDoneLabel = null;
+ }
+
+ /**
+ * Check if all assertions in current step passed.
+ *
+ * @returns True if all assertions passed (or no assertions made)
+ */
+ allAssertionsPassed(): boolean {
+ return this.assertionsThisStep.every(a => a.passed);
+ }
+
+ /**
+ * Check if all required assertions in current step passed.
+ *
+ * @returns True if all required assertions passed (or no required assertions)
+ */
+ requiredAssertionsPassed(): boolean {
+ const required = this.assertionsThisStep.filter(a => a.required);
+ return required.every(a => a.passed);
+ }
+}
diff --git a/src/canonicalization.ts b/src/canonicalization.ts
new file mode 100644
index 00000000..8335a139
--- /dev/null
+++ b/src/canonicalization.ts
@@ -0,0 +1,208 @@
+/**
+ * Shared canonicalization utilities for snapshot comparison and indexing.
+ *
+ * This module provides consistent normalization functions used by both:
+ * - tracing/indexer.ts (for computing stable digests)
+ * - snapshot-diff.ts (for computing diff_status labels)
+ *
+ * By sharing these helpers, we ensure consistent behavior:
+ * - Same text normalization (whitespace, case, length)
+ * - Same bbox rounding (2px precision)
+ * - Same change detection thresholds
+ */
+
+export interface BBox {
+ x: number;
+ y: number;
+ width: number;
+ height: number;
+}
+
+export interface VisualCues {
+ is_primary?: boolean;
+ is_clickable?: boolean;
+}
+
+export interface ElementData {
+ id?: number;
+ role?: string;
+ text?: string | null;
+ bbox?: BBox;
+ visual_cues?: VisualCues;
+ is_primary?: boolean;
+ is_clickable?: boolean;
+}
+
+export interface CanonicalElement {
+ id: number | undefined;
+ role: string;
+ text_norm: string;
+ bbox: BBox;
+ is_primary: boolean;
+ is_clickable: boolean;
+}
+
+/**
+ * Normalize text for canonical comparison.
+ *
+ * Transforms:
+ * - Trims leading/trailing whitespace
+ * - Collapses internal whitespace to single spaces
+ * - Lowercases
+ * - Caps length
+ *
+ * @param text - Input text (may be undefined/null)
+ * @param maxLen - Maximum length to retain (default: 80)
+ * @returns Normalized text string (empty string if input is falsy)
+ *
+ * @example
+ * normalizeText(" Hello World ") // "hello world"
+ * normalizeText(undefined) // ""
+ */
+export function normalizeText(text: string | undefined | null, maxLen: number = 80): string {
+ if (!text) return '';
+
+ // Trim and collapse whitespace
+ let normalized = text.split(/\s+/).join(' ').trim();
+
+ // Lowercase
+ normalized = normalized.toLowerCase();
+
+ // Cap length
+ if (normalized.length > maxLen) {
+ normalized = normalized.substring(0, maxLen);
+ }
+
+ return normalized;
+}
+
+/**
+ * Round bbox coordinates to reduce noise.
+ *
+ * Snaps coordinates to grid of `precision` pixels to ignore
+ * sub-pixel rendering differences.
+ *
+ * @param bbox - Bounding box with x, y, width, height
+ * @param precision - Grid size in pixels (default: 2)
+ * @returns Rounded bbox with integer coordinates
+ *
+ * @example
+ * roundBBox({x: 101, y: 203, width: 50, height: 25})
+ * // {x: 100, y: 202, width: 50, height: 24}
+ */
+export function roundBBox(bbox: Partial, precision: number = 2): BBox {
+ return {
+ x: Math.round((bbox.x || 0) / precision) * precision,
+ y: Math.round((bbox.y || 0) / precision) * precision,
+ width: Math.round((bbox.width || 0) / precision) * precision,
+ height: Math.round((bbox.height || 0) / precision) * precision,
+ };
+}
+
+/**
+ * Check if two bboxes are equal within a threshold.
+ *
+ * @param bbox1 - First bounding box
+ * @param bbox2 - Second bounding box
+ * @param threshold - Maximum allowed difference in pixels (default: 5.0)
+ * @returns True if all bbox properties differ by less than threshold
+ */
+export function bboxEqual(
+ bbox1: Partial,
+ bbox2: Partial,
+ threshold: number = 5.0
+): boolean {
+ return (
+ Math.abs((bbox1.x || 0) - (bbox2.x || 0)) <= threshold &&
+ Math.abs((bbox1.y || 0) - (bbox2.y || 0)) <= threshold &&
+ Math.abs((bbox1.width || 0) - (bbox2.width || 0)) <= threshold &&
+ Math.abs((bbox1.height || 0) - (bbox2.height || 0)) <= threshold
+ );
+}
+
+/**
+ * Check if two bboxes differ beyond the threshold.
+ *
+ * This is the inverse of bboxEqual, provided for semantic clarity
+ * in diff detection code.
+ *
+ * @param bbox1 - First bounding box
+ * @param bbox2 - Second bounding box
+ * @param threshold - Maximum allowed difference in pixels (default: 5.0)
+ * @returns True if any bbox property differs by more than threshold
+ */
+export function bboxChanged(
+ bbox1: Partial,
+ bbox2: Partial,
+ threshold: number = 5.0
+): boolean {
+ return !bboxEqual(bbox1, bbox2, threshold);
+}
+
+/**
+ * Create canonical representation of an element for comparison/hashing.
+ *
+ * Extracts and normalizes the fields that matter for identity:
+ * - id, role, normalized text, rounded bbox
+ * - is_primary, is_clickable from visual_cues
+ *
+ * @param elem - Raw element object
+ * @returns Canonical element object with normalized fields
+ */
+export function canonicalizeElement(elem: ElementData): CanonicalElement {
+ // Extract is_primary and is_clickable from visual_cues if present
+ const visualCues = elem.visual_cues || {};
+ const isPrimary =
+ typeof visualCues === 'object' && visualCues !== null
+ ? visualCues.is_primary || false
+ : elem.is_primary || false;
+ const isClickable =
+ typeof visualCues === 'object' && visualCues !== null
+ ? visualCues.is_clickable || false
+ : elem.is_clickable || false;
+
+ return {
+ id: elem.id,
+ role: elem.role || '',
+ text_norm: normalizeText(elem.text),
+ bbox: roundBBox(elem.bbox || { x: 0, y: 0, width: 0, height: 0 }),
+ is_primary: isPrimary,
+ is_clickable: isClickable,
+ };
+}
+
+/**
+ * Check if two elements have equal content (ignoring position).
+ *
+ * Compares normalized text, role, and visual cues.
+ *
+ * @param elem1 - First element (raw or canonical)
+ * @param elem2 - Second element (raw or canonical)
+ * @returns True if content is equal after normalization
+ */
+export function contentEqual(elem1: ElementData, elem2: ElementData): boolean {
+ // Normalize both elements
+ const c1 = canonicalizeElement(elem1);
+ const c2 = canonicalizeElement(elem2);
+
+ return (
+ c1.role === c2.role &&
+ c1.text_norm === c2.text_norm &&
+ c1.is_primary === c2.is_primary &&
+ c1.is_clickable === c2.is_clickable
+ );
+}
+
+/**
+ * Check if two elements have different content (ignoring position).
+ *
+ * This is the inverse of contentEqual, provided for semantic clarity
+ * in diff detection code.
+ *
+ * @param elem1 - First element
+ * @param elem2 - Second element
+ * @returns True if content differs after normalization
+ */
+export function contentChanged(elem1: ElementData, elem2: ElementData): boolean {
+ return !contentEqual(elem1, elem2);
+}
diff --git a/src/index.ts b/src/index.ts
index 17a41f28..9a6edd84 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -42,3 +42,19 @@ export {
// Tracing Layer (v0.3.1+)
export { Tracer, TraceSink, JsonlTraceSink, TraceEvent, TraceEventData } from './tracing';
+
+// Verification Layer (agent assertion loop)
+export {
+ AssertOutcome,
+ AssertContext,
+ Predicate,
+ urlMatches,
+ urlContains,
+ exists,
+ notExists,
+ elementCount,
+ allOf,
+ anyOf,
+ custom,
+} from './verification';
+export { AgentRuntime, AssertionRecord } from './agent-runtime';
diff --git a/src/snapshot-diff.ts b/src/snapshot-diff.ts
index 793655a0..0f8d4c27 100644
--- a/src/snapshot-diff.ts
+++ b/src/snapshot-diff.ts
@@ -1,57 +1,43 @@
/**
* Snapshot comparison utilities for diff_status detection.
* Implements change detection logic for the Diff Overlay feature.
+ *
+ * Uses shared canonicalization helpers from canonicalization.ts to ensure
+ * consistent comparison behavior with tracing/indexer.ts.
*/
+import { bboxChanged, contentChanged, ElementData } from './canonicalization';
import { Element, Snapshot } from './types';
-export class SnapshotDiff {
- /**
- * Check if element's bounding box has changed significantly.
- * @param el1 - First element
- * @param el2 - Second element
- * @param threshold - Position change threshold in pixels (default: 5.0)
- * @returns True if position or size changed beyond threshold
- */
- private static hasBboxChanged(el1: Element, el2: Element, threshold: number = 5.0): boolean {
- return (
- Math.abs(el1.bbox.x - el2.bbox.x) > threshold ||
- Math.abs(el1.bbox.y - el2.bbox.y) > threshold ||
- Math.abs(el1.bbox.width - el2.bbox.width) > threshold ||
- Math.abs(el1.bbox.height - el2.bbox.height) > threshold
- );
- }
-
- /**
- * Check if element's content has changed.
- * @param el1 - First element
- * @param el2 - Second element
- * @returns True if text, role, or visual properties changed
- */
- private static hasContentChanged(el1: Element, el2: Element): boolean {
- // Compare text content
- if (el1.text !== el2.text) {
- return true;
- }
-
- // Compare role
- if (el1.role !== el2.role) {
- return true;
- }
-
- // Compare visual cues
- if (el1.visual_cues.is_primary !== el2.visual_cues.is_primary) {
- return true;
- }
- if (el1.visual_cues.is_clickable !== el2.visual_cues.is_clickable) {
- return true;
- }
-
- return false;
- }
+/**
+ * Convert Element to ElementData for canonicalization helpers.
+ */
+function elementToData(el: Element): ElementData {
+ return {
+ id: el.id,
+ role: el.role,
+ text: el.text,
+ bbox: {
+ x: el.bbox.x,
+ y: el.bbox.y,
+ width: el.bbox.width,
+ height: el.bbox.height,
+ },
+ visual_cues: {
+ is_primary: el.visual_cues.is_primary,
+ is_clickable: el.visual_cues.is_clickable,
+ },
+ };
+}
+export class SnapshotDiff {
/**
* Compare current snapshot with previous and set diff_status on elements.
+ *
+ * Uses canonicalized comparisons:
+ * - Text is normalized (trimmed, collapsed whitespace, lowercased)
+ * - Bbox is rounded to 2px grid to ignore sub-pixel differences
+ *
* @param current - Current snapshot
* @param previous - Previous snapshot (undefined if this is the first snapshot)
* @returns List of elements with diff_status set (includes REMOVED elements from previous)
@@ -83,25 +69,29 @@ export class SnapshotDiff {
diff_status: 'ADDED',
});
} else {
- // Element existed before - check for changes
+ // Element existed before - check for changes using canonicalized comparisons
const prevEl = previousById.get(el.id)!;
- const bboxChanged = SnapshotDiff.hasBboxChanged(el, prevEl);
- const contentChanged = SnapshotDiff.hasContentChanged(el, prevEl);
+ // Convert to ElementData for canonicalization helpers
+ const elData = elementToData(el);
+ const prevElData = elementToData(prevEl);
+
+ const hasBboxChanged = bboxChanged(elData.bbox!, prevElData.bbox!);
+ const hasContentChanged = contentChanged(elData, prevElData);
- if (bboxChanged && contentChanged) {
+ if (hasBboxChanged && hasContentChanged) {
// Both position and content changed - mark as MODIFIED
result.push({
...el,
diff_status: 'MODIFIED',
});
- } else if (bboxChanged) {
+ } else if (hasBboxChanged) {
// Only position changed - mark as MOVED
result.push({
...el,
diff_status: 'MOVED',
});
- } else if (contentChanged) {
+ } else if (hasContentChanged) {
// Only content changed - mark as MODIFIED
result.push({
...el,
diff --git a/src/tracing/indexer.ts b/src/tracing/indexer.ts
index 2c38e358..fdc8e0ab 100644
--- a/src/tracing/indexer.ts
+++ b/src/tracing/indexer.ts
@@ -5,6 +5,7 @@
import * as fs from 'fs';
import * as crypto from 'crypto';
import * as path from 'path';
+import { canonicalizeElement } from '../canonicalization';
import {
TraceIndex,
StepIndex,
@@ -16,38 +17,6 @@ import {
StepStatus,
} from './index-schema';
-/**
- * Normalize text for digest: trim, collapse whitespace, lowercase, cap length
- */
-function normalizeText(text: string | undefined, maxLen: number = 80): string {
- if (!text) return '';
-
- // Trim and collapse whitespace
- let normalized = text.split(/\s+/).join(' ').trim();
-
- // Lowercase
- normalized = normalized.toLowerCase();
-
- // Cap length
- if (normalized.length > maxLen) {
- normalized = normalized.substring(0, maxLen);
- }
-
- return normalized;
-}
-
-/**
- * Round bbox coordinates to reduce noise (default: 2px precision)
- */
-function roundBBox(bbox: any, precision: number = 2): any {
- return {
- x: Math.round((bbox.x || 0) / precision) * precision,
- y: Math.round((bbox.y || 0) / precision) * precision,
- width: Math.round((bbox.width || 0) / precision) * precision,
- height: Math.round((bbox.height || 0) / precision) * precision,
- };
-}
-
/**
* Compute stable digest of snapshot for diffing
*/
@@ -56,28 +25,8 @@ function computeSnapshotDigest(snapshotData: any): string {
const viewport = snapshotData.viewport || {};
const elements = snapshotData.elements || [];
- // Canonicalize elements
- const canonicalElements = elements.map((elem: any) => {
- // Extract is_primary and is_clickable from visual_cues if present
- const visualCues = elem.visual_cues || {};
- const isPrimary =
- typeof visualCues === 'object' && visualCues !== null
- ? visualCues.is_primary || false
- : elem.is_primary || false;
- const isClickable =
- typeof visualCues === 'object' && visualCues !== null
- ? visualCues.is_clickable || false
- : elem.is_clickable || false;
-
- return {
- id: elem.id,
- role: elem.role || '',
- text_norm: normalizeText(elem.text),
- bbox: roundBBox(elem.bbox || { x: 0, y: 0, width: 0, height: 0 }),
- is_primary: isPrimary,
- is_clickable: isClickable,
- };
- });
+ // Canonicalize elements using shared helper
+ const canonicalElements = elements.map((elem: any) => canonicalizeElement(elem));
// Sort by element id for determinism
canonicalElements.sort((a: { id?: number }, b: { id?: number }) => (a.id || 0) - (b.id || 0));
diff --git a/src/tracing/types.ts b/src/tracing/types.ts
index 0847cbb3..7dcac36b 100644
--- a/src/tracing/types.ts
+++ b/src/tracing/types.ts
@@ -95,6 +95,17 @@ export interface ElementFound {
bounding_box: { x: number; y: number; width: number; height: number };
}
+/**
+ * Assertion result for verification events
+ */
+export interface AssertionResult {
+ label: string;
+ passed: boolean;
+ required?: boolean;
+ reason?: string;
+ details?: Record;
+}
+
/**
* Verify signals for step_end events
*/
@@ -102,6 +113,10 @@ export interface VerifySignals {
url_changed?: boolean;
error?: string;
elements_found?: ElementFound[];
+ // Assertion results from agent verification loop
+ assertions?: AssertionResult[];
+ task_done?: boolean;
+ task_done_label?: string;
}
/**
@@ -163,6 +178,14 @@ export interface TraceEventData {
exec?: ExecutionData;
post?: SnapshotInfo;
verify?: VerifyData;
+
+ // Verification event fields (for assertion loop)
+ kind?: 'assert' | 'task_done';
+ label?: string;
+ passed?: boolean;
+ required?: boolean;
+ reason?: string;
+ details?: Record;
}
/**
diff --git a/src/verification.ts b/src/verification.ts
new file mode 100644
index 00000000..90ea3e73
--- /dev/null
+++ b/src/verification.ts
@@ -0,0 +1,364 @@
+/**
+ * Verification primitives for agent assertion loops.
+ *
+ * This module provides assertion predicates and outcome types for runtime verification
+ * in agent loops. Assertions evaluate against the current browser state (snapshot/url)
+ * and record results into the trace.
+ *
+ * Key concepts:
+ * - AssertOutcome: Result of evaluating an assertion
+ * - AssertContext: Context provided to assertion predicates (snapshot, url, stepId)
+ * - Predicate: Callable that takes context and returns outcome
+ *
+ * @example
+ * ```typescript
+ * import { urlMatches, exists, AssertContext } from './verification';
+ *
+ * // Create predicates
+ * const onSearchPage = urlMatches(/\/s\?k=/);
+ * const resultsLoaded = exists("text~'Results'");
+ *
+ * // Evaluate against context
+ * const ctx: AssertContext = { snapshot, url: "https://example.com/s?k=shoes" };
+ * const outcome = onSearchPage(ctx);
+ * console.log(outcome.passed); // true
+ * ```
+ */
+
+import { Snapshot, QuerySelector } from './types';
+import { query } from './query';
+
+/**
+ * Convert QuerySelector to string for display/logging.
+ */
+function selectorToString(selector: QuerySelector): string {
+ return typeof selector === 'string' ? selector : JSON.stringify(selector);
+}
+
+/**
+ * Result of evaluating an assertion predicate.
+ */
+export interface AssertOutcome {
+ /** Whether the assertion passed */
+ passed: boolean;
+ /** Human-readable explanation (especially useful when failed) */
+ reason: string;
+ /** Additional structured data for debugging/display */
+ details: Record;
+}
+
+/**
+ * Context provided to assertion predicates.
+ *
+ * Provides access to current browser state without requiring
+ * the predicate to know about browser internals.
+ */
+export interface AssertContext {
+ /** Current page snapshot (may be null if not taken) */
+ snapshot: Snapshot | null;
+ /** Current page URL */
+ url: string | null;
+ /** Current step identifier (for trace correlation) */
+ stepId: string | null;
+}
+
+/**
+ * Type alias for assertion predicates.
+ * A predicate takes context and returns an outcome.
+ */
+export type Predicate = (ctx: AssertContext) => AssertOutcome;
+
+/**
+ * Create a predicate that checks if current URL matches a regex pattern.
+ *
+ * @param pattern - Regular expression pattern or string to match against URL
+ * @returns Predicate function that evaluates URL matching
+ *
+ * @example
+ * ```typescript
+ * const pred = urlMatches(/\/search\?q=/);
+ * const ctx = { snapshot: null, url: "https://example.com/search?q=shoes", stepId: null };
+ * const outcome = pred(ctx);
+ * console.log(outcome.passed); // true
+ * ```
+ */
+export function urlMatches(pattern: string | RegExp): Predicate {
+ const rx = typeof pattern === 'string' ? new RegExp(pattern) : pattern;
+
+ return (ctx: AssertContext): AssertOutcome => {
+ const url = ctx.url || '';
+ const ok = rx.test(url);
+ return {
+ passed: ok,
+ reason: ok ? '' : `url did not match pattern: ${pattern}`,
+ details: { pattern: String(pattern), url: url.substring(0, 200) },
+ };
+ };
+}
+
+/**
+ * Create a predicate that checks if current URL contains a substring.
+ *
+ * @param substring - String to search for in URL
+ * @returns Predicate function that evaluates URL containment
+ *
+ * @example
+ * ```typescript
+ * const pred = urlContains("/cart");
+ * const ctx = { snapshot: null, url: "https://example.com/cart/checkout", stepId: null };
+ * const outcome = pred(ctx);
+ * console.log(outcome.passed); // true
+ * ```
+ */
+export function urlContains(substring: string): Predicate {
+ return (ctx: AssertContext): AssertOutcome => {
+ const url = ctx.url || '';
+ const ok = url.includes(substring);
+ return {
+ passed: ok,
+ reason: ok ? '' : `url does not contain: ${substring}`,
+ details: { substring, url: url.substring(0, 200) },
+ };
+ };
+}
+
+/**
+ * Create a predicate that checks if elements matching selector exist.
+ *
+ * Uses the SDK's query engine to find matching elements.
+ *
+ * @param selector - Semantic selector string (e.g., "role=button text~'Sign in'")
+ * @returns Predicate function that evaluates element existence
+ *
+ * @example
+ * ```typescript
+ * const pred = exists("text~'Results'");
+ * // Will check if snapshot contains elements with "Results" in text
+ * ```
+ */
+export function exists(selector: QuerySelector): Predicate {
+ const selectorStr = selectorToString(selector);
+ return (ctx: AssertContext): AssertOutcome => {
+ const snap = ctx.snapshot;
+ if (!snap) {
+ return {
+ passed: false,
+ reason: 'no snapshot available',
+ details: { selector: selectorStr },
+ };
+ }
+
+ const matches = query(snap, selector);
+ const ok = matches.length > 0;
+ return {
+ passed: ok,
+ reason: ok ? '' : `no elements matched selector: ${selectorStr}`,
+ details: { selector: selectorStr, matched: matches.length },
+ };
+ };
+}
+
+/**
+ * Create a predicate that checks that NO elements match the selector.
+ *
+ * Useful for asserting that error messages, loading spinners, etc. are gone.
+ *
+ * @param selector - Semantic selector string
+ * @returns Predicate function that evaluates element non-existence
+ *
+ * @example
+ * ```typescript
+ * const pred = notExists("text~'Loading'");
+ * // Will pass if no elements contain "Loading" text
+ * ```
+ */
+export function notExists(selector: QuerySelector): Predicate {
+ const selectorStr = selectorToString(selector);
+ return (ctx: AssertContext): AssertOutcome => {
+ const snap = ctx.snapshot;
+ if (!snap) {
+ return {
+ passed: false,
+ reason: 'no snapshot available',
+ details: { selector: selectorStr },
+ };
+ }
+
+ const matches = query(snap, selector);
+ const ok = matches.length === 0;
+ return {
+ passed: ok,
+ reason: ok ? '' : `found ${matches.length} elements matching: ${selectorStr}`,
+ details: { selector: selectorStr, matched: matches.length },
+ };
+ };
+}
+
+/**
+ * Create a predicate that checks the number of matching elements.
+ *
+ * @param selector - Semantic selector string
+ * @param options - Count constraints
+ * @returns Predicate function that evaluates element count
+ *
+ * @example
+ * ```typescript
+ * const pred = elementCount("role=button", { minCount: 1, maxCount: 5 });
+ * // Will pass if 1-5 buttons found
+ * ```
+ */
+export function elementCount(
+ selector: QuerySelector,
+ options: { minCount?: number; maxCount?: number } = {}
+): Predicate {
+ const { minCount = 0, maxCount } = options;
+ const selectorStr = selectorToString(selector);
+
+ return (ctx: AssertContext): AssertOutcome => {
+ const snap = ctx.snapshot;
+ if (!snap) {
+ return {
+ passed: false,
+ reason: 'no snapshot available',
+ details: { selector: selectorStr, minCount, maxCount },
+ };
+ }
+
+ const matches = query(snap, selector);
+ const count = matches.length;
+
+ let ok = count >= minCount;
+ if (maxCount !== undefined) {
+ ok = ok && count <= maxCount;
+ }
+
+ let reason = '';
+ if (!ok) {
+ if (maxCount !== undefined) {
+ reason = `expected ${minCount}-${maxCount} elements, found ${count}`;
+ } else {
+ reason = `expected at least ${minCount} elements, found ${count}`;
+ }
+ }
+
+ return {
+ passed: ok,
+ reason,
+ details: {
+ selector: selectorStr,
+ matched: count,
+ minCount,
+ maxCount,
+ },
+ };
+ };
+}
+
+/**
+ * Create a predicate that passes only if ALL sub-predicates pass.
+ *
+ * @param predicates - Predicate functions to combine with AND logic
+ * @returns Combined predicate
+ *
+ * @example
+ * ```typescript
+ * const pred = allOf(urlContains("/cart"), exists("text~'Checkout'"));
+ * // Will pass only if both conditions are true
+ * ```
+ */
+export function allOf(...predicates: Predicate[]): Predicate {
+ return (ctx: AssertContext): AssertOutcome => {
+ const failedReasons: string[] = [];
+ const allDetails: Record[] = [];
+
+ for (const p of predicates) {
+ const outcome = p(ctx);
+ allDetails.push(outcome.details);
+ if (!outcome.passed) {
+ failedReasons.push(outcome.reason);
+ }
+ }
+
+ const ok = failedReasons.length === 0;
+ return {
+ passed: ok,
+ reason: failedReasons.join('; '),
+ details: { subPredicates: allDetails, failedCount: failedReasons.length },
+ };
+ };
+}
+
+/**
+ * Create a predicate that passes if ANY sub-predicate passes.
+ *
+ * @param predicates - Predicate functions to combine with OR logic
+ * @returns Combined predicate
+ *
+ * @example
+ * ```typescript
+ * const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'"));
+ * // Will pass if either condition is true
+ * ```
+ */
+export function anyOf(...predicates: Predicate[]): Predicate {
+ return (ctx: AssertContext): AssertOutcome => {
+ const allReasons: string[] = [];
+ const allDetails: Record[] = [];
+
+ for (let i = 0; i < predicates.length; i++) {
+ const outcome = predicates[i](ctx);
+ allDetails.push(outcome.details);
+ if (outcome.passed) {
+ return {
+ passed: true,
+ reason: '',
+ details: { subPredicates: allDetails, matchedAtIndex: i },
+ };
+ }
+ allReasons.push(outcome.reason);
+ }
+
+ return {
+ passed: false,
+ reason: `none of ${predicates.length} predicates passed: ${allReasons.join('; ')}`,
+ details: { subPredicates: allDetails },
+ };
+ };
+}
+
+/**
+ * Create a predicate from a custom function.
+ *
+ * @param checkFn - Function that takes AssertContext and returns boolean
+ * @param label - Label for debugging/display
+ * @returns Predicate wrapping the custom function
+ *
+ * @example
+ * ```typescript
+ * const pred = custom(
+ * (ctx) => ctx.snapshot !== null && ctx.snapshot.elements.length > 10,
+ * "has_many_elements"
+ * );
+ * ```
+ */
+export function custom(
+ checkFn: (ctx: AssertContext) => boolean,
+ label: string = 'custom'
+): Predicate {
+ return (ctx: AssertContext): AssertOutcome => {
+ try {
+ const ok = checkFn(ctx);
+ return {
+ passed: ok,
+ reason: ok ? '' : `custom check '${label}' returned false`,
+ details: { label },
+ };
+ } catch (e) {
+ return {
+ passed: false,
+ reason: `custom check '${label}' raised exception: ${e}`,
+ details: { label, error: String(e) },
+ };
+ }
+ };
+}
diff --git a/tests/verification.test.ts b/tests/verification.test.ts
new file mode 100644
index 00000000..d4b982d9
--- /dev/null
+++ b/tests/verification.test.ts
@@ -0,0 +1,336 @@
+/**
+ * Tests for verification module - assertion predicates for agent loops.
+ */
+
+import {
+ AssertContext,
+ AssertOutcome,
+ Predicate,
+ urlMatches,
+ urlContains,
+ exists,
+ notExists,
+ elementCount,
+ allOf,
+ anyOf,
+ custom,
+} from '../src/verification';
+import { Snapshot, Element, BBox, Viewport, VisualCues } from '../src/types';
+
+/**
+ * Helper to create test elements.
+ */
+function makeElement(
+ id: number,
+ role: string = 'button',
+ text?: string | null,
+ importance: number = 100
+): Element {
+ return {
+ id,
+ role,
+ text: text ?? undefined,
+ importance,
+ bbox: { x: 0, y: 0, width: 100, height: 50 } as BBox,
+ visual_cues: {
+ is_primary: false,
+ is_clickable: true,
+ background_color_name: null,
+ } as VisualCues,
+ } as Element;
+}
+
+/**
+ * Helper to create test snapshots.
+ */
+function makeSnapshot(elements: Element[], url: string = 'https://example.com'): Snapshot {
+ return {
+ status: 'success',
+ url,
+ elements,
+ viewport: { width: 1920, height: 1080 } as Viewport,
+ } as Snapshot;
+}
+
+describe('urlMatches', () => {
+ it('matches string pattern', () => {
+ const pred = urlMatches('/search\\?q=');
+ const ctx: AssertContext = {
+ snapshot: null,
+ url: 'https://example.com/search?q=shoes',
+ stepId: null,
+ };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ expect(outcome.reason).toBe('');
+ });
+
+ it('matches regex pattern', () => {
+ const pred = urlMatches(/\/search\?q=/);
+ const ctx: AssertContext = {
+ snapshot: null,
+ url: 'https://example.com/search?q=shoes',
+ stepId: null,
+ };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('returns false when no match', () => {
+ const pred = urlMatches('/cart');
+ const ctx: AssertContext = {
+ snapshot: null,
+ url: 'https://example.com/search?q=shoes',
+ stepId: null,
+ };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('did not match');
+ });
+
+ it('handles null url', () => {
+ const pred = urlMatches('/search');
+ const ctx: AssertContext = { snapshot: null, url: null, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ });
+
+ it('includes pattern and url in details', () => {
+ const pred = urlMatches('/test');
+ const ctx: AssertContext = { snapshot: null, url: 'https://example.com/test', stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.details.pattern).toBe('/test');
+ expect(outcome.details.url).toContain('example.com');
+ });
+});
+
+describe('urlContains', () => {
+ it('finds substring', () => {
+ const pred = urlContains('/cart');
+ const ctx: AssertContext = {
+ snapshot: null,
+ url: 'https://example.com/cart/checkout',
+ stepId: null,
+ };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('returns false when substring not found', () => {
+ const pred = urlContains('/orders');
+ const ctx: AssertContext = { snapshot: null, url: 'https://example.com/cart', stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('does not contain');
+ });
+
+ it('handles null url', () => {
+ const pred = urlContains('/test');
+ const ctx: AssertContext = { snapshot: null, url: null, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ });
+});
+
+describe('exists', () => {
+ it('finds element by role', () => {
+ const elements = [makeElement(1, 'button', 'Click me')];
+ const snap = makeSnapshot(elements);
+ const pred = exists('role=button');
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ expect(outcome.details.matched).toBe(1);
+ });
+
+ it('returns false when element not found', () => {
+ const elements = [makeElement(1, 'button', 'Click me')];
+ const snap = makeSnapshot(elements);
+ const pred = exists('role=link');
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('no elements matched');
+ });
+
+ it('finds element by text', () => {
+ const elements = [makeElement(1, 'button', 'Submit Form')];
+ const snap = makeSnapshot(elements);
+ const pred = exists("text~'Submit'");
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('handles null snapshot', () => {
+ const pred = exists('role=button');
+ const ctx: AssertContext = { snapshot: null, url: null, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('no snapshot available');
+ });
+});
+
+describe('notExists', () => {
+ it('passes when element absent', () => {
+ const elements = [makeElement(1, 'button')];
+ const snap = makeSnapshot(elements);
+ const pred = notExists("text~'Loading'");
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('fails when element present', () => {
+ const elements = [makeElement(1, 'button', 'Loading...')];
+ const snap = makeSnapshot(elements);
+ const pred = notExists("text~'Loading'");
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('found 1 elements');
+ });
+});
+
+describe('elementCount', () => {
+ it('passes when min count satisfied', () => {
+ const elements = [makeElement(0, 'button'), makeElement(1, 'button'), makeElement(2, 'button')];
+ const snap = makeSnapshot(elements);
+ const pred = elementCount('role=button', { minCount: 2 });
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('fails when min count not satisfied', () => {
+ const elements = [makeElement(1, 'button')];
+ const snap = makeSnapshot(elements);
+ const pred = elementCount('role=button', { minCount: 5 });
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('expected at least 5');
+ });
+
+ it('passes when within min-max range', () => {
+ const elements = [makeElement(0, 'button'), makeElement(1, 'button'), makeElement(2, 'button')];
+ const snap = makeSnapshot(elements);
+ const pred = elementCount('role=button', { minCount: 1, maxCount: 5 });
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('fails when max count exceeded', () => {
+ const elements = Array.from({ length: 10 }, (_, i) => makeElement(i, 'button'));
+ const snap = makeSnapshot(elements);
+ const pred = elementCount('role=button', { minCount: 1, maxCount: 5 });
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('expected 1-5');
+ });
+});
+
+describe('allOf', () => {
+ it('passes when all predicates pass', () => {
+ const elements = [makeElement(1, 'button', 'Checkout')];
+ const snap = makeSnapshot(elements, 'https://example.com/cart');
+ const pred = allOf(urlContains('/cart'), exists('role=button'));
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ expect(outcome.details.failedCount).toBe(0);
+ });
+
+ it('fails when one predicate fails', () => {
+ const elements = [makeElement(1, 'button')];
+ const snap = makeSnapshot(elements, 'https://example.com/home');
+ const pred = allOf(urlContains('/cart'), exists('role=button'));
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.details.failedCount).toBe(1);
+ });
+
+ it('fails when all predicates fail', () => {
+ const elements = [makeElement(1, 'link')];
+ const snap = makeSnapshot(elements, 'https://example.com/home');
+ const pred = allOf(urlContains('/cart'), exists('role=button'));
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.details.failedCount).toBe(2);
+ });
+});
+
+describe('anyOf', () => {
+ it('passes when first predicate passes', () => {
+ const elements = [makeElement(1, 'button', 'Success')];
+ const snap = makeSnapshot(elements);
+ const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'"));
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('passes when second predicate passes', () => {
+ const elements = [makeElement(1, 'button', 'Complete')];
+ const snap = makeSnapshot(elements);
+ const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'"));
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('fails when no predicates pass', () => {
+ const elements = [makeElement(1, 'button', 'Error')];
+ const snap = makeSnapshot(elements);
+ const pred = anyOf(exists("text~'Success'"), exists("text~'Complete'"));
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('none of 2 predicates passed');
+ });
+});
+
+describe('custom', () => {
+ it('passes when function returns true', () => {
+ const pred = custom(ctx => ctx.url !== null, 'has_url');
+ const ctx: AssertContext = { snapshot: null, url: 'https://example.com', stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('fails when function returns false', () => {
+ const pred = custom(ctx => ctx.url === null, 'no_url');
+ const ctx: AssertContext = { snapshot: null, url: 'https://example.com', stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('returned false');
+ });
+
+ it('can check snapshot data', () => {
+ const elements = Array.from({ length: 15 }, (_, i) => makeElement(i, 'button'));
+ const snap = makeSnapshot(elements);
+ const pred = custom(
+ ctx => ctx.snapshot !== null && ctx.snapshot.elements.length > 10,
+ 'has_many_elements'
+ );
+ const ctx: AssertContext = { snapshot: snap, url: snap.url, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(true);
+ });
+
+ it('handles exceptions gracefully', () => {
+ const badCheck = (_ctx: AssertContext): boolean => {
+ throw new Error('Something went wrong');
+ };
+ const pred = custom(badCheck, 'bad_check');
+ const ctx: AssertContext = { snapshot: null, url: null, stepId: null };
+ const outcome = pred(ctx);
+ expect(outcome.passed).toBe(false);
+ expect(outcome.reason).toContain('raised exception');
+ expect(outcome.reason).toContain('Something went wrong');
+ });
+});