diff --git a/src/actions.ts b/src/actions.ts index ce8169a..07fbacc 100644 --- a/src/actions.ts +++ b/src/actions.ts @@ -340,6 +340,528 @@ export async function typeText( }; } +/** + * Clear the value of an input/textarea element (best-effort). + */ +export async function clear( + browser: IBrowser, + elementId: number, + takeSnapshot: boolean = false +): Promise { + const page = browser.getPage(); + if (!page) throw new Error('Browser not started. Call start() first.'); + + const startTime = Date.now(); + const urlBefore = page.url(); + + const ok = await BrowserEvaluator.evaluate( + page, + id => { + const el = (window as any).sentience_registry?.[id]; + if (!el) return false; + try { + el.focus?.(); + } catch { + /* ignore */ + } + if ('value' in el) { + el.value = ''; + el.dispatchEvent(new Event('input', { bubbles: true })); + el.dispatchEvent(new Event('change', { bubbles: true })); + return true; + } + return false; + }, + elementId + ); + + if (!ok) { + return { + success: false, + duration_ms: Date.now() - startTime, + outcome: 'error', + error: { code: 'clear_failed', reason: 'Element not found or not clearable' }, + }; + } + + try { + await page.waitForTimeout(250); + } catch { + /* ignore */ + } + + const durationMs = Date.now() - startTime; + const urlAfter = page.url(); + const urlChanged = urlBefore !== urlAfter; + const outcome = urlChanged ? 'navigated' : 'dom_updated'; + + let snapshotAfter: Snapshot | undefined; + if (takeSnapshot) { + snapshotAfter = await snapshot(browser); + } + + return { + success: true, + duration_ms: durationMs, + outcome, + url_changed: urlChanged, + snapshot_after: snapshotAfter, + }; +} + +/** + * Ensure a checkbox/radio is checked (best-effort). + */ +export async function check( + browser: IBrowser, + elementId: number, + takeSnapshot: boolean = false +): Promise { + const page = browser.getPage(); + if (!page) throw new Error('Browser not started. Call start() first.'); + + const startTime = Date.now(); + const urlBefore = page.url(); + + const ok = await BrowserEvaluator.evaluate( + page, + id => { + const el = (window as any).sentience_registry?.[id]; + if (!el) return false; + try { + el.focus?.(); + } catch { + /* ignore */ + } + if (!('checked' in el)) return false; + if (el.checked === true) return true; + try { + el.click(); + } catch { + return false; + } + return true; + }, + elementId + ); + + if (!ok) { + return { + success: false, + duration_ms: Date.now() - startTime, + outcome: 'error', + error: { code: 'check_failed', reason: 'Element not found or not checkable' }, + }; + } + + try { + await page.waitForTimeout(250); + } catch { + /* ignore */ + } + + const durationMs = Date.now() - startTime; + const urlAfter = page.url(); + const urlChanged = urlBefore !== urlAfter; + const outcome = urlChanged ? 'navigated' : 'dom_updated'; + + let snapshotAfter: Snapshot | undefined; + if (takeSnapshot) snapshotAfter = await snapshot(browser); + + return { + success: true, + duration_ms: durationMs, + outcome, + url_changed: urlChanged, + snapshot_after: snapshotAfter, + }; +} + +/** + * Ensure a checkbox/radio is unchecked (best-effort). + */ +export async function uncheck( + browser: IBrowser, + elementId: number, + takeSnapshot: boolean = false +): Promise { + const page = browser.getPage(); + if (!page) throw new Error('Browser not started. Call start() first.'); + + const startTime = Date.now(); + const urlBefore = page.url(); + + const ok = await BrowserEvaluator.evaluate( + page, + id => { + const el = (window as any).sentience_registry?.[id]; + if (!el) return false; + try { + el.focus?.(); + } catch { + /* ignore */ + } + if (!('checked' in el)) return false; + if (el.checked === false) return true; + try { + el.click(); + } catch { + return false; + } + return true; + }, + elementId + ); + + if (!ok) { + return { + success: false, + duration_ms: Date.now() - startTime, + outcome: 'error', + error: { code: 'uncheck_failed', reason: 'Element not found or not uncheckable' }, + }; + } + + try { + await page.waitForTimeout(250); + } catch { + /* ignore */ + } + + const durationMs = Date.now() - startTime; + const urlAfter = page.url(); + const urlChanged = urlBefore !== urlAfter; + const outcome = urlChanged ? 'navigated' : 'dom_updated'; + + let snapshotAfter: Snapshot | undefined; + if (takeSnapshot) snapshotAfter = await snapshot(browser); + + return { + success: true, + duration_ms: durationMs, + outcome, + url_changed: urlChanged, + snapshot_after: snapshotAfter, + }; +} + +/** + * Select an option in a element (best-effort). + */ +export async function uploadFile( + browser: IBrowser, + elementId: number, + filePath: string, + takeSnapshot: boolean = false +): Promise { + const page = browser.getPage(); + if (!page) throw new Error('Browser not started. Call start() first.'); + + const startTime = Date.now(); + const urlBefore = page.url(); + + let success = false; + let errorMsg: string | undefined; + try { + // First try: grab the exact element handle from the sentience registry. + try { + const handle = await page.evaluateHandle( + '(id) => (window.sentience_registry && window.sentience_registry[id]) || null', + elementId + ); + const el = (handle as any).asElement?.() ?? null; + if (!el) throw new Error('Element not found'); + await el.setInputFiles(filePath); + success = true; + } catch { + // Fallback: resolve a selector from the element's attributes and use page.setInputFiles(). + const attrs = await BrowserEvaluator.evaluate( + page, + id => { + const el = (window as any).sentience_registry?.[id]; + if (!el) return null; + const tag = String(el.tagName || '').toUpperCase(); + const type = String(el.type || '').toLowerCase(); + const idAttr = el.id ? String(el.id) : null; + const nameAttr = el.name ? String(el.name) : null; + return { tag, type, id: idAttr, name: nameAttr }; + }, + elementId + ); + + let selector: string | null = null; + if (attrs && attrs.tag === 'INPUT' && attrs.type === 'file') { + if (attrs.id) selector = `input#${attrs.id}`; + else if (attrs.name) selector = `input[name="${String(attrs.name).replace(/"/g, '\\"')}"]`; + } + if (!selector) throw new Error('Element not found'); + await page.setInputFiles(selector, filePath); + success = true; + } + } catch (e: any) { + success = false; + errorMsg = String(e?.message ?? e); + } + + try { + await page.waitForTimeout(250); + } catch { + /* ignore */ + } + + const durationMs = Date.now() - startTime; + const urlAfter = page.url(); + const urlChanged = urlBefore !== urlAfter; + const outcome = urlChanged ? 'navigated' : success ? 'dom_updated' : 'error'; + + let snapshotAfter: Snapshot | undefined; + if (takeSnapshot) { + try { + snapshotAfter = await snapshot(browser); + } catch { + /* ignore */ + } + } + + return { + success, + duration_ms: durationMs, + outcome, + url_changed: urlChanged, + snapshot_after: snapshotAfter, + error: success ? undefined : { code: 'upload_failed', reason: errorMsg ?? 'upload failed' }, + }; +} + +/** + * Submit a form (best-effort) by clicking a submit control or calling requestSubmit(). + */ +export async function submit( + browser: IBrowser, + elementId: number, + takeSnapshot: boolean = false +): Promise { + const page = browser.getPage(); + if (!page) throw new Error('Browser not started. Call start() first.'); + + const startTime = Date.now(); + const urlBefore = page.url(); + + const ok = await BrowserEvaluator.evaluate( + page, + id => { + const el = (window as any).sentience_registry?.[id]; + if (!el) return false; + try { + el.focus?.(); + } catch { + /* ignore */ + } + const tag = String(el.tagName || '').toUpperCase(); + if (tag === 'FORM') { + if (typeof el.requestSubmit === 'function') { + el.requestSubmit(); + return true; + } + try { + el.submit(); + return true; + } catch { + return false; + } + } + const form = el.form; + if (form && typeof form.requestSubmit === 'function') { + form.requestSubmit(); + return true; + } + try { + el.click(); + return true; + } catch { + return false; + } + }, + elementId + ); + + if (!ok) { + return { + success: false, + duration_ms: Date.now() - startTime, + outcome: 'error', + error: { code: 'submit_failed', reason: 'Element not found or not submittable' }, + }; + } + + try { + await page.waitForTimeout(500); + } catch { + /* ignore */ + } + + const durationMs = Date.now() - startTime; + const urlAfter = page.url(); + const urlChanged = urlBefore !== urlAfter; + const outcome = urlChanged ? 'navigated' : 'dom_updated'; + + let snapshotAfter: Snapshot | undefined; + if (takeSnapshot) { + try { + snapshotAfter = await snapshot(browser); + } catch { + /* ignore */ + } + } + + return { + success: true, + duration_ms: durationMs, + outcome, + url_changed: urlChanged, + snapshot_after: snapshotAfter, + }; +} + +/** + * Navigate back in history (best-effort). + */ +export async function back( + browser: IBrowser, + takeSnapshot: boolean = false +): Promise { + const page = browser.getPage(); + if (!page) throw new Error('Browser not started. Call start() first.'); + + const startTime = Date.now(); + const urlBefore = page.url(); + + let success = false; + let errorMsg: string | undefined; + try { + await page.goBack(); + success = true; + } catch (e: any) { + success = false; + errorMsg = String(e?.message ?? e); + } + + try { + await page.waitForTimeout(500); + } catch { + /* ignore */ + } + + const durationMs = Date.now() - startTime; + let urlChanged = false; + try { + urlChanged = urlBefore !== page.url(); + } catch { + urlChanged = true; + } + const outcome = urlChanged ? 'navigated' : success ? 'dom_updated' : 'error'; + + let snapshotAfter: Snapshot | undefined; + if (takeSnapshot) { + try { + snapshotAfter = await snapshot(browser); + } catch { + /* ignore */ + } + } + + return { + success, + duration_ms: durationMs, + outcome, + url_changed: urlChanged, + snapshot_after: snapshotAfter, + error: success ? undefined : { code: 'back_failed', reason: errorMsg ?? 'back failed' }, + }; +} + /** * Scroll an element into view * diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts index e13a326..2566a7c 100644 --- a/src/agent-runtime.ts +++ b/src/agent-runtime.ts @@ -38,6 +38,8 @@ * ``` */ +import * as fs from 'fs'; +import * as path from 'path'; import { Page } from 'playwright'; import { Snapshot } from './types'; import { AssertContext, Predicate } from './verification'; @@ -336,6 +338,8 @@ export class AgentRuntime { stepIndex: number = 0; /** Most recent snapshot (for assertion context) */ lastSnapshot: Snapshot | null = null; + /** Best-effort download records (Playwright downloads) */ + private downloads: Array> = []; /** Failure artifact buffer (Phase 1) */ private artifactBuffer: FailureArtifactBuffer | null = null; @@ -438,6 +442,15 @@ export class AgentRuntime { this.browser = browser; this.page = page; this.tracer = tracer; + + // Best-effort download tracking (does not change behavior unless a download occurs). + try { + this.page.on('download', download => { + void this.trackDownload(download); + }); + } catch { + // ignore + } } /** @@ -466,9 +479,48 @@ export class AgentRuntime { snapshot: this.lastSnapshot, url, stepId: this.stepId, + downloads: this.downloads, }; } + private async trackDownload(download: any): Promise { + const rec: Record = { + status: 'started', + suggested_filename: download?.suggestedFilename?.() ?? download?.suggested_filename, + url: download?.url?.() ?? download?.url, + }; + this.downloads.push(rec); + try { + const p = (await download.path?.()) as string | null; + rec.status = 'completed'; + if (p) { + rec.path = p; + try { + // Best-effort size and mime type (no new deps). + rec.size_bytes = Number(fs.statSync(p).size); + const ext = String(path.extname(p) || '').toLowerCase(); + const mimeByExt: Record = { + '.pdf': 'application/pdf', + '.txt': 'text/plain', + '.csv': 'text/csv', + '.json': 'application/json', + '.zip': 'application/zip', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.webp': 'image/webp', + }; + if (mimeByExt[ext]) rec.mime_type = mimeByExt[ext]; + } catch { + // ignore + } + } + } catch (e: any) { + rec.status = 'failed'; + rec.error = String(e?.message ?? e); + } + } + /** * Take a snapshot of the current page state. * diff --git a/src/index.ts b/src/index.ts index 6a84a30..6359b7d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,7 +5,21 @@ export { SentienceBrowser } from './browser'; export { snapshot, SnapshotOptions } from './snapshot'; export { query, find, parseSelector } from './query'; -export { click, typeText, press, scrollTo, clickRect, ClickRect } from './actions'; +export { + back, + check, + clear, + click, + clickRect, + ClickRect, + press, + scrollTo, + selectOption, + submit, + typeText, + uncheck, + uploadFile, +} from './actions'; export { CursorPolicy, CursorMode, CursorMovementMetadata, CursorPathPoint } from './cursor-policy'; export { waitFor } from './wait'; export { expect, Expectation } from './expect'; @@ -52,6 +66,7 @@ export { AssertOutcome, AssertContext, Predicate, + downloadCompleted, urlMatches, urlContains, exists, @@ -72,6 +87,7 @@ export { export { AgentRuntime, AssertionHandle, AssertionRecord, EventuallyOptions } from './agent-runtime'; export { RuntimeAgent } from './runtime-agent'; export type { RuntimeStep, StepVerification } from './runtime-agent'; +export { parseVisionExecutorAction, executeVisionExecutorAction } from './vision-executor'; export * from './captcha/types'; export * from './captcha/strategies'; diff --git a/src/snapshot.ts b/src/snapshot.ts index 08e38aa..29b45f2 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -2,7 +2,6 @@ * Snapshot functionality - calls window.sentience.snapshot() or server-side API */ -import { SentienceBrowser } from './browser'; import { IBrowser } from './protocols/browser-protocol'; import { Snapshot } from './types'; import * as fs from 'fs'; @@ -212,7 +211,7 @@ async function snapshotViaApi( () => typeof (window as any).sentience !== 'undefined', 5000 ); - } catch (e) { + } catch (_e) { throw new Error( 'Sentience extension failed to inject. Cannot collect raw data for API processing.' ); @@ -239,9 +238,15 @@ async function snapshotViaApi( // Use raw_elements (raw data) instead of elements (processed data) // Server validates API key and applies proprietary ranking logic const clientMetrics = rawResult?.diagnostics?.metrics; - const clientDiagnostics = rawResult?.diagnostics?.captcha - ? { captcha: rawResult.diagnostics.captcha } - : undefined; + const diag = rawResult?.diagnostics; + const clientDiagnostics = + diag?.captcha || diag?.requires_vision || diag?.requires_vision_reason + ? { + captcha: diag?.captcha, + requires_vision: diag?.requires_vision, + requires_vision_reason: diag?.requires_vision_reason, + } + : undefined; const payload = { raw_elements: rawResult.raw_elements || [], // Raw data needed for server processing url: rawResult.url || '', diff --git a/src/types.ts b/src/types.ts index 5132443..a786702 100644 --- a/src/types.ts +++ b/src/types.ts @@ -176,6 +176,9 @@ export interface SnapshotDiagnostics { reasons?: string[]; metrics?: SnapshotDiagnosticsMetrics; captcha?: CaptchaDiagnostics; + /** P1-01: forward-compatible vision recommendation signal (optional) */ + requires_vision?: boolean | null; + requires_vision_reason?: string | null; } /** diff --git a/src/verification.ts b/src/verification.ts index 1e0c8a7..50dee6a 100644 --- a/src/verification.ts +++ b/src/verification.ts @@ -60,6 +60,8 @@ export interface AssertContext { url: string | null; /** Current step identifier (for trace correlation) */ stepId: string | null; + /** Optional: non-snapshot state signals for verification (e.g., downloads). */ + downloads?: Array> | null; } /** @@ -68,6 +70,32 @@ export interface AssertContext { */ export type Predicate = (ctx: AssertContext) => AssertOutcome; +/** + * Predicate that passes if a browser download has completed. + * + * Notes: + * - This relies on `AssertContext.downloads` being populated by the runtime/browser. + */ +export function downloadCompleted(filenameSubstring?: string): Predicate { + return (ctx: AssertContext): AssertOutcome => { + const downloads = ctx.downloads ?? []; + for (const d of downloads) { + if (String(d?.status ?? '') !== 'completed') continue; + const fname = String(d?.filename ?? d?.suggested_filename ?? ''); + if (!filenameSubstring || fname.includes(filenameSubstring)) { + return { passed: true, reason: '', details: { download: d } }; + } + } + return { + passed: false, + reason: filenameSubstring + ? `no completed download matched: ${filenameSubstring}` + : 'no completed downloads', + details: { filenameSubstring, downloads }, + }; + }; +} + /** * Create a predicate that checks if current URL matches a regex pattern. * diff --git a/src/vision-executor.ts b/src/vision-executor.ts new file mode 100644 index 0000000..9979f43 --- /dev/null +++ b/src/vision-executor.ts @@ -0,0 +1,76 @@ +/** + * Vision executor primitives (shared parsing/execution helpers). + * + * This is used by higher-level agents when falling back to a vision model to propose + * coordinate-based actions. + */ + +export type VisionExecutorActionKind = 'click_xy' | 'click_rect' | 'press' | 'type' | 'finish'; + +export interface VisionExecutorAction { + kind: VisionExecutorActionKind; + args: Record; +} + +export function parseVisionExecutorAction(text: string): VisionExecutorAction { + const t = String(text || '') + .replace(/```[\w]*\n?/g, '') + .trim(); + + if (/^FINISH\s*\(\s*\)\s*$/i.test(t)) return { kind: 'finish', args: {} }; + + let m = t.match(/^PRESS\s*\(\s*["']([^"']+)["']\s*\)\s*$/i); + if (m) return { kind: 'press', args: { key: m[1] } }; + + m = t.match(/^TYPE\s*\(\s*["']([\s\S]*?)["']\s*\)\s*$/i); + if (m) return { kind: 'type', args: { text: m[1] } }; + + m = t.match(/^CLICK_XY\s*\(\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\)\s*$/i); + if (m) return { kind: 'click_xy', args: { x: Number(m[1]), y: Number(m[2]) } }; + + m = t.match( + /^CLICK_RECT\s*\(\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\)\s*$/i + ); + if (m) + return { + kind: 'click_rect', + args: { x: Number(m[1]), y: Number(m[2]), w: Number(m[3]), h: Number(m[4]) }, + }; + + throw new Error(`unrecognized vision action: ${t.slice(0, 200)}`); +} + +export async function executeVisionExecutorAction(params: { + backend: any; + page?: any; + action: VisionExecutorAction; +}): Promise { + const { backend, page, action } = params; + + if (action.kind === 'click_xy') { + await backend.mouse_click(Number(action.args.x), Number(action.args.y)); + return; + } + + if (action.kind === 'click_rect') { + const cx = Number(action.args.x) + Number(action.args.w) / 2; + const cy = Number(action.args.y) + Number(action.args.h) / 2; + await backend.mouse_click(cx, cy); + return; + } + + if (action.kind === 'press') { + if (!page) throw new Error('PRESS requires a Playwright page'); + await page.keyboard.press(String(action.args.key)); + return; + } + + if (action.kind === 'type') { + await backend.type_text(String(action.args.text)); + return; + } + + if (action.kind === 'finish') return; + + throw new Error(`unknown vision action kind: ${(action as any).kind}`); +} diff --git a/tests/actions.test.ts b/tests/actions.test.ts index b91639a..3da90e5 100644 --- a/tests/actions.test.ts +++ b/tests/actions.test.ts @@ -4,17 +4,27 @@ import { SentienceBrowser, + back, + check, + clear, click, typeText, press, scrollTo, clickRect, + selectOption, + submit, snapshot, find, BBox, Element, + uncheck, + uploadFile, } from '../src'; import { createTestBrowser, getPageOrThrow } from './test-utils'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; describe('Actions', () => { describe('click', () => { @@ -367,4 +377,96 @@ describe('Actions', () => { } }, 60000); }); + + describe('CRUD helpers', () => { + it('should clear/check/uncheck/select/upload/submit (best-effort)', async () => { + const browser = await createTestBrowser(); + try { + const page = getPageOrThrow(browser); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); + await page.setContent(` + + + + +
+ + +
+ + + `); + + await snapshot(browser, { screenshot: false, limit: 50 }); + + const idOf = async (predSrc: string): Promise => { + const id = await page.evaluate((src: string) => { + const reg = (window as any).sentience_registry || {}; + const pred = eval(src) as (el: any) => boolean; // test-only + for (const [id, el] of Object.entries(reg)) { + try { + if (pred(el)) return Number(id); + } catch {} + } + return null; + }, predSrc); + if (typeof id !== 'number') throw new Error('id not found'); + return id; + }; + + const tid = await idOf("(el) => el && el.id === 't'"); + const cbid = await idOf("(el) => el && el.id === 'cb'"); + const selid = await idOf("(el) => el && el.id === 'sel'"); + const fileid = await idOf("(el) => el && el.id === 'file'"); + const btnid = await idOf("(el) => el && el.id === 'btn'"); + + expect((await clear(browser, tid)).success).toBe(true); + expect( + await page.evaluate(() => (document.getElementById('t') as HTMLInputElement).value) + ).toBe(''); + + expect((await check(browser, cbid)).success).toBe(true); + expect( + await page.evaluate(() => (document.getElementById('cb') as HTMLInputElement).checked) + ).toBe(true); + + expect((await uncheck(browser, cbid)).success).toBe(true); + expect( + await page.evaluate(() => (document.getElementById('cb') as HTMLInputElement).checked) + ).toBe(false); + + expect((await selectOption(browser, selid, 'b')).success).toBe(true); + expect( + await page.evaluate(() => (document.getElementById('sel') as HTMLSelectElement).value) + ).toBe('b'); + + const tmp = path.join(os.tmpdir(), `sentience-upload-${Date.now()}.txt`); + fs.writeFileSync(tmp, 'hi', 'utf8'); + expect((await uploadFile(browser, fileid, tmp)).success).toBe(true); + expect( + await page.evaluate( + () => (document.getElementById('file') as HTMLInputElement).files?.[0]?.name + ) + ).toBe(path.basename(tmp)); + + expect((await submit(browser, btnid)).success).toBe(true); + expect(await page.evaluate(() => (window as any)._submitted)).toBe(true); + + // back() best-effort: just ensure it returns + const r = await back(browser); + expect(r.duration_ms).toBeGreaterThanOrEqual(0); + } finally { + await browser.close(); + } + }, 60000); + }); }); diff --git a/tests/verification.test.ts b/tests/verification.test.ts index 97e6c7b..70e3f78 100644 --- a/tests/verification.test.ts +++ b/tests/verification.test.ts @@ -22,6 +22,7 @@ import { valueContains, isExpanded, isCollapsed, + downloadCompleted, } from '../src/verification'; import { Snapshot, Element, BBox, Viewport, VisualCues } from '../src/types'; @@ -220,6 +221,36 @@ describe('state-aware predicates', () => { }); }); +describe('downloadCompleted', () => { + it('fails when no completed downloads', () => { + const pred = downloadCompleted(); + const ctx: AssertContext = { snapshot: null, url: null, stepId: null, downloads: [] }; + expect(pred(ctx).passed).toBe(false); + }); + + it('passes when any download completed', () => { + const pred = downloadCompleted(); + const ctx: AssertContext = { + snapshot: null, + url: null, + stepId: null, + downloads: [{ status: 'started' }, { status: 'completed', suggested_filename: 'report.pdf' }], + }; + expect(pred(ctx).passed).toBe(true); + }); + + it('passes when filename substring matches', () => { + const pred = downloadCompleted('report'); + const ctx: AssertContext = { + snapshot: null, + url: null, + stepId: null, + downloads: [{ status: 'completed', suggested_filename: 'report.pdf' }], + }; + expect(pred(ctx).passed).toBe(true); + }); +}); + describe('notExists', () => { it('passes when element absent', () => { const elements = [makeElement(1, 'button')];