diff --git a/examples/human-cursor-click-demo.ts b/examples/human-cursor-click-demo.ts new file mode 100644 index 00000000..45b313b7 --- /dev/null +++ b/examples/human-cursor-click-demo.ts @@ -0,0 +1,46 @@ +/** + * Human-like cursor movement demo (TypeScript SDK). + * + * This example shows how to opt into human-like mouse movement before clicking, + * and how to read the returned cursor metadata for tracing/debugging. + */ + +import { CursorPolicy, SentienceBrowser, click, find, snapshot } from '../src'; + +async function main() { + const browser = new SentienceBrowser(); + await browser.start(); + const page = browser.getPage(); + if (!page) throw new Error('Browser started but no page is available'); + + try { + await page.goto('https://example.com'); + await page.waitForLoadState('networkidle', { timeout: 10_000 }); + + const snap = await snapshot(browser); + const link = find(snap, 'role=link'); + if (!link) throw new Error('No link found on page'); + + const policy: CursorPolicy = { + mode: 'human', + steps: 18, + durationMs: 350, + jitterPx: 1.2, + overshootPx: 6.0, + pauseBeforeClickMs: 30, + seed: 123, // optional: deterministic for demos/tests + }; + + const result = await click(browser, link.id, true, false, policy); + console.log('clicked:', result.success, 'outcome:', result.outcome); + console.log('cursor meta:', result.cursor); + } finally { + await browser.close(); + } +} + +main().catch(err => { + console.error(err); + process.exit(1); +}); + diff --git a/src/actions.ts b/src/actions.ts index c07707e6..ce8169ab 100644 --- a/src/actions.ts +++ b/src/actions.ts @@ -6,6 +6,41 @@ import { IBrowser } from './protocols/browser-protocol'; import { ActionResult, Snapshot, BBox } from './types'; import { snapshot } from './snapshot'; import { BrowserEvaluator } from './utils/browser-evaluator'; +import { CursorPolicy, buildHumanCursorPath } from './cursor-policy'; + +const cursorPosByPage: WeakMap = new WeakMap(); + +async function humanMoveIfEnabled( + page: any, + target: { x: number; y: number }, + cursorPolicy?: CursorPolicy +): Promise | undefined> { + if (!cursorPolicy || cursorPolicy.mode !== 'human') return undefined; + + const prev = cursorPosByPage.get(page); + let from: { x: number; y: number }; + if (prev) { + from = prev; + } else { + const vp = page.viewportSize ? page.viewportSize() : null; + from = vp ? { x: vp.width / 2, y: vp.height / 2 } : { x: 0, y: 0 }; + } + + const meta = buildHumanCursorPath([from.x, from.y], [target.x, target.y], cursorPolicy); + const pts = meta.path || []; + const durationMs = meta.duration_ms || 0; + const perStepMs = durationMs > 0 ? durationMs / Math.max(1, pts.length) : 0; + for (const p of pts) { + await page.mouse.move(p.x, p.y); + if (perStepMs > 0) await page.waitForTimeout(perStepMs); + } + if (meta.pause_before_click_ms > 0) { + await page.waitForTimeout(meta.pause_before_click_ms); + } + + cursorPosByPage.set(page, { x: target.x, y: target.y }); + return meta as any; +} export interface ClickRect { x: number; @@ -109,7 +144,8 @@ export async function click( browser: IBrowser, elementId: number, useMouse: boolean = true, - takeSnapshot: boolean = false + takeSnapshot: boolean = false, + cursorPolicy?: CursorPolicy ): Promise { const page = browser.getPage(); if (!page) { @@ -119,6 +155,7 @@ export async function click( const urlBefore = page.url(); let success: boolean; + let cursorMeta: Record | undefined; if (useMouse) { // Hybrid approach: Get element bbox from snapshot, calculate center, use mouse.click() @@ -130,9 +167,12 @@ export async function click( // Calculate center of element bbox const centerX = element.bbox.x + element.bbox.width / 2; const centerY = element.bbox.y + element.bbox.height / 2; + cursorMeta = await humanMoveIfEnabled(page, { x: centerX, y: centerY }, cursorPolicy); // Use Playwright's native mouse click for realistic simulation await page.mouse.click(centerX, centerY); success = true; + // Keep cursor position even when not in human mode (for future moves) + cursorPosByPage.set(page, { x: centerX, y: centerY }); } else { // Fallback to JS click if element not found in snapshot success = await BrowserEvaluator.evaluateWithNavigationFallback( @@ -208,6 +248,7 @@ export async function click( outcome, url_changed: urlChanged, snapshot_after: snapshotAfter, + cursor: cursorMeta, error: success ? undefined : { code: 'click_failed', reason: 'Element not found or not clickable' }, @@ -486,7 +527,8 @@ export async function clickRect( rect: ClickRect | BBox, highlight: boolean = true, highlightDuration: number = 2.0, - takeSnapshot: boolean = false + takeSnapshot: boolean = false, + cursorPolicy?: CursorPolicy ): Promise { const page = browser.getPage(); if (!page) { @@ -529,6 +571,7 @@ export async function clickRect( // Calculate center of rectangle const centerX = x + w / 2; const centerY = y + h / 2; + let cursorMeta: Record | undefined; // Show highlight before clicking (if enabled) if (highlight) { @@ -541,8 +584,10 @@ export async function clickRect( let success: boolean; let errorMsg: string | undefined; try { + cursorMeta = await humanMoveIfEnabled(page, { x: centerX, y: centerY }, cursorPolicy); await page.mouse.click(centerX, centerY); success = true; + cursorPosByPage.set(page, { x: centerX, y: centerY }); } catch (error) { success = false; errorMsg = error instanceof Error ? error.message : String(error); @@ -577,6 +622,7 @@ export async function clickRect( outcome, url_changed: urlChanged, snapshot_after: snapshotAfter, + cursor: cursorMeta, error: success ? undefined : { code: 'click_failed', reason: errorMsg || 'Click failed' }, }; } diff --git a/src/agent.ts b/src/agent.ts index 91f096cd..32c9323a 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -31,6 +31,8 @@ export interface AgentActResult { goal: string; error?: string; message?: string; + /** Optional action metadata (e.g., human-like cursor movement path) */ + cursor?: Record; } /** @@ -298,6 +300,7 @@ export class SentienceAgent { text: result.text, key: result.key, success: result.success, + cursor: result.cursor, }, stepId ); diff --git a/src/backends/actions.ts b/src/backends/actions.ts index 05a2fde0..51cc4a0c 100644 --- a/src/backends/actions.ts +++ b/src/backends/actions.ts @@ -19,6 +19,34 @@ import type { ActionResult, BBox } from '../types'; import type { BrowserBackend, MouseButton } from './protocol'; +import type { CursorPolicy } from '../cursor-policy'; +import { buildHumanCursorPath } from '../cursor-policy'; + +const cursorPosByBackend: WeakMap = new WeakMap(); + +async function humanMoveBackendIfEnabled( + backend: BrowserBackend, + target: { x: number; y: number }, + cursorPolicy?: CursorPolicy +): Promise | undefined> { + if (!cursorPolicy || cursorPolicy.mode !== 'human') return undefined; + const key = backend as unknown as object; + const prev = cursorPosByBackend.get(key); + const from = prev ? prev : { x: target.x, y: target.y }; + + const meta = buildHumanCursorPath([from.x, from.y], [target.x, target.y], cursorPolicy); + const pts = meta.path || []; + const durationMs = meta.duration_ms || 0; + const perStepMs = durationMs > 0 ? durationMs / Math.max(1, pts.length) : 0; + for (const p of pts) { + await backend.mouseMove(p.x, p.y); + if (perStepMs > 0) await sleep(perStepMs); + } + if (meta.pause_before_click_ms > 0) await sleep(meta.pause_before_click_ms); + + cursorPosByBackend.set(key, { x: target.x, y: target.y }); + return meta as any; +} /** * Target type for coordinate resolution. @@ -117,26 +145,34 @@ export async function click( target: ClickTarget, button: MouseButton = 'left', clickCount: number = 1, - moveFirst: boolean = true + moveFirst: boolean = true, + cursorPolicy?: CursorPolicy ): Promise { const startTime = Date.now(); const [x, y] = resolveCoordinates(target); + let cursorMeta: Record | undefined; try { // Optional mouse move for hover effects if (moveFirst) { - await backend.mouseMove(x, y); - await sleep(20); // Brief pause for hover + cursorMeta = await humanMoveBackendIfEnabled(backend, { x, y }, cursorPolicy); + if (!cursorMeta) { + await backend.mouseMove(x, y); + await sleep(20); // Brief pause for hover + } } // Perform click await backend.mouseClick(x, y, button, clickCount); - return successResult(measureDuration(startTime)); + return { ...successResult(measureDuration(startTime)), cursor: cursorMeta }; } catch (e) { const reason = e instanceof Error ? e.message : String(e); - return errorResult(measureDuration(startTime), 'click_failed', reason); + return { + ...errorResult(measureDuration(startTime), 'click_failed', reason), + cursor: cursorMeta, + }; } } diff --git a/src/cursor-policy.ts b/src/cursor-policy.ts new file mode 100644 index 00000000..8d115002 --- /dev/null +++ b/src/cursor-policy.ts @@ -0,0 +1,140 @@ +export type CursorMode = 'instant' | 'human'; + +/** + * Policy for cursor movement. + * + * - mode="instant": current behavior (single click without multi-step motion) + * - mode="human": move with a curved path + optional jitter/overshoot + */ +export interface CursorPolicy { + mode: CursorMode; + steps?: number; + durationMs?: number; + jitterPx?: number; + overshootPx?: number; + pauseBeforeClickMs?: number; + /** Determinism hook for tests/repro */ + seed?: number; +} + +export interface CursorPathPoint { + x: number; + y: number; + t?: number; +} + +export interface CursorMovementMetadata { + mode: CursorMode; + from: { x: number; y: number }; + to: { x: number; y: number }; + steps: number; + duration_ms: number; + pause_before_click_ms: number; + jitter_px: number; + overshoot_px: number; + path: CursorPathPoint[]; +} + +function clamp(v: number, lo: number, hi: number): number { + return Math.max(lo, Math.min(hi, v)); +} + +function easeInOut(t: number): number { + return t * t * (3 - 2 * t); +} + +function bezier( + p0: [number, number], + p1: [number, number], + p2: [number, number], + p3: [number, number], + t: number +): [number, number] { + const u = 1 - t; + const tt = t * t; + const uu = u * u; + const uuu = uu * u; + const ttt = tt * t; + const x = uuu * p0[0] + 3 * uu * t * p1[0] + 3 * u * tt * p2[0] + ttt * p3[0]; + const y = uuu * p0[1] + 3 * uu * t * p1[1] + 3 * u * tt * p2[1] + ttt * p3[1]; + return [x, y]; +} + +// Simple seeded RNG for reproducibility (mulberry32) +function mulberry32(seed: number): () => number { + let t = seed >>> 0; + return () => { + t += 0x6d2b79f5; + let x = t; + x = Math.imul(x ^ (x >>> 15), x | 1); + x ^= x + Math.imul(x ^ (x >>> 7), x | 61); + return ((x ^ (x >>> 14)) >>> 0) / 4294967296; + }; +} + +function randBetween(rng: () => number, lo: number, hi: number): number { + return lo + (hi - lo) * rng(); +} + +export function buildHumanCursorPath( + start: [number, number], + target: [number, number], + policy: CursorPolicy +): CursorMovementMetadata { + const seed = policy.seed ?? Date.now() & 0xffffffff; + const rng = mulberry32(seed); + + const [x0, y0] = start; + const [x1, y1] = target; + const dx = x1 - x0; + const dy = y1 - y0; + const dist0 = Math.hypot(dx, dy); + const dist = dist0 < 1e-6 ? 1 : dist0; + + const steps = Math.floor(policy.steps ?? clamp(10 + dist / 25, 12, 40)); + const durationMs = Math.floor(policy.durationMs ?? clamp(120 + dist * 0.9, 120, 700)); + + const ux = dx / dist; + const uy = dy / dist; + const px = -uy; + const py = ux; + + let curveMag = clamp(dist / 3.5, 10, 140); + curveMag *= randBetween(rng, 0.5, 1.2); + + const c1: [number, number] = [x0 + dx * 0.25 + px * curveMag, y0 + dy * 0.25 + py * curveMag]; + const c2: [number, number] = [x0 + dx * 0.75 - px * curveMag, y0 + dy * 0.75 - py * curveMag]; + + const overshoot = policy.overshootPx ?? 6.0; + const overshootPoint: [number, number] = + overshoot > 0 ? [x1 + ux * overshoot, y1 + uy * overshoot] : [x1, y1]; + + const jitterPx = policy.jitterPx ?? 1.0; + const pts: CursorPathPoint[] = []; + + for (let i = 0; i < steps; i++) { + const tRaw = steps <= 1 ? 0 : i / (steps - 1); + const t = easeInOut(tRaw); + const [bx, by] = bezier([x0, y0], c1, c2, overshootPoint, t); + const jitterScale = jitterPx * (1 - tRaw) * 0.9; + const jx = randBetween(rng, -jitterScale, jitterScale); + const jy = randBetween(rng, -jitterScale, jitterScale); + pts.push({ x: bx + jx, y: by + jy, t: Math.round(tRaw * 10_000) / 10_000 }); + } + + if (overshoot > 0) { + pts.push({ x: x1, y: y1, t: 1.0 }); + } + + return { + mode: 'human', + from: { x: x0, y: y0 }, + to: { x: x1, y: y1 }, + steps, + duration_ms: durationMs, + pause_before_click_ms: policy.pauseBeforeClickMs ?? 20, + jitter_px: jitterPx, + overshoot_px: overshoot, + path: pts.slice(0, 64), + }; +} diff --git a/src/index.ts b/src/index.ts index ad80abb0..02c9f268 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,6 +6,7 @@ export { SentienceBrowser } from './browser'; export { snapshot, SnapshotOptions } from './snapshot'; export { query, find, parseSelector } from './query'; export { click, typeText, press, scrollTo, clickRect, ClickRect } from './actions'; +export { CursorPolicy, CursorMode, CursorMovementMetadata, CursorPathPoint } from './cursor-policy'; export { waitFor } from './wait'; export { expect, Expectation } from './expect'; export { Inspector, inspect } from './inspector'; diff --git a/src/tracing/types.ts b/src/tracing/types.ts index 7dcac36b..3c133ea8 100644 --- a/src/tracing/types.ts +++ b/src/tracing/types.ts @@ -85,6 +85,8 @@ export interface ExecutionData { text?: string; key?: string; error?: string; + /** Optional action metadata (e.g., human-like cursor movement path) */ + cursor?: Record; } /** @@ -160,6 +162,8 @@ export interface TraceEventData { text?: string; key?: string; success?: boolean; + /** Optional action metadata (e.g., human-like cursor movement path) */ + cursor?: Record; // Error data error?: string; diff --git a/src/types.ts b/src/types.ts index 761af962..8a8c0fff 100644 --- a/src/types.ts +++ b/src/types.ts @@ -177,6 +177,8 @@ export interface ActionResult { outcome?: 'navigated' | 'dom_updated' | 'no_change' | 'error'; url_changed?: boolean; snapshot_after?: Snapshot; + /** Optional: action metadata (e.g., human-like cursor movement path) */ + cursor?: Record; error?: { code: string; reason: string; diff --git a/src/utils/action-executor.ts b/src/utils/action-executor.ts index 7ddd00f4..44b6fd23 100644 --- a/src/utils/action-executor.ts +++ b/src/utils/action-executor.ts @@ -68,6 +68,7 @@ export class ActionExecutor { success: result.success, action: 'click', elementId, + cursor: result.cursor, outcome: result.outcome || (result.success ? 'Clicked successfully' : 'Click failed'), durationMs: result.duration_ms, attempt: 0, diff --git a/src/utils/trace-event-builder.ts b/src/utils/trace-event-builder.ts index f3879515..fbdd4c3f 100644 --- a/src/utils/trace-event-builder.ts +++ b/src/utils/trace-event-builder.ts @@ -79,6 +79,10 @@ export class TraceEventBuilder { duration_ms: result.durationMs, }; + if (result.cursor !== undefined) { + (execData as any).cursor = result.cursor; + } + // Add optional exec fields if (result.elementId !== undefined) { execData.element_id = result.elementId; diff --git a/tests/actions.test.ts b/tests/actions.test.ts index d04c9aa3..232e3e75 100644 --- a/tests/actions.test.ts +++ b/tests/actions.test.ts @@ -23,10 +23,9 @@ describe('Actions', () => { try { const page = getPageOrThrow(browser); - await page.goto('https://example.com'); - await page.waitForLoadState('networkidle', { timeout: 10000 }); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); - const snap = await snapshot(browser); + const snap = await snapshot(browser, { screenshot: false, limit: 30 }); const link = find(snap, 'role=link'); @@ -46,10 +45,9 @@ describe('Actions', () => { try { const page = getPageOrThrow(browser); - await page.goto('https://example.com'); - await page.waitForLoadState('networkidle', { timeout: 10000 }); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); - const snap = await snapshot(browser); + const snap = await snapshot(browser, { screenshot: false, limit: 30 }); const link = find(snap, 'role=link'); if (link) { @@ -70,10 +68,9 @@ describe('Actions', () => { try { const page = getPageOrThrow(browser); - await page.goto('https://example.com'); - await page.waitForLoadState('networkidle', { timeout: 10000 }); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); - const snap = await snapshot(browser); + const snap = await snapshot(browser, { screenshot: false, limit: 30 }); const link = find(snap, 'role=link'); if (link) { diff --git a/tests/backends/actions.test.ts b/tests/backends/actions.test.ts index f2d0c7e7..901ed377 100644 --- a/tests/backends/actions.test.ts +++ b/tests/backends/actions.test.ts @@ -104,6 +104,26 @@ describe('backends/actions', () => { expect(result.error?.code).toBe('click_failed'); expect(result.error?.reason).toContain('Click failed'); }); + + it('should support human-like cursor movement policy (opt-in)', async () => { + const result = await click(mockBackend, [100, 200], 'left', 1, true, { + mode: 'human', + steps: 6, + durationMs: 0, + pauseBeforeClickMs: 0, + jitterPx: 0, + overshootPx: 0, + seed: 123, + }); + + expect(result.success).toBe(true); + expect(result.cursor).toBeDefined(); + expect(result.cursor?.mode).toBe('human'); + // Multiple moves (not just one) + expect(mockBackend.mouseMove.mock.calls.length).toBeGreaterThan(1); + // Final click should still happen at the target coordinates + expect(mockBackend.mouseClick).toHaveBeenCalledWith(100, 200, 'left', 1); + }); }); describe('typeText', () => { diff --git a/tests/browser.test.ts b/tests/browser.test.ts index ac6ddc59..b3e7a34b 100644 --- a/tests/browser.test.ts +++ b/tests/browser.test.ts @@ -4,6 +4,9 @@ import { SentienceBrowser } from '../src/browser'; import { chromium, BrowserContext, Page } from 'playwright'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; describe('Browser Proxy Support', () => { describe('Proxy Parsing', () => { @@ -208,7 +211,8 @@ describe('Browser Proxy Support', () => { it('should create SentienceBrowser from existing context', async () => { // Auto-detect headless mode (headless in CI, headed locally) const isCI = process.env.CI === 'true' || process.env.CI === '1'; - const context = await chromium.launchPersistentContext('', { + const userDataDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-pw-')); + const context = await chromium.launchPersistentContext(userDataDir, { headless: isCI, viewport: { width: 1600, height: 900 }, }); @@ -224,8 +228,7 @@ describe('Browser Proxy Support', () => { if (!page) { throw new Error('Browser page is not available'); } - await page.goto('https://example.com'); - await page.waitForLoadState('networkidle', { timeout: 10000 }); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); const viewportSize = await page.evaluate(() => ({ width: window.innerWidth, @@ -236,13 +239,19 @@ describe('Browser Proxy Support', () => { expect(viewportSize.height).toBe(900); } finally { await context.close(); + try { + fs.rmSync(userDataDir, { recursive: true, force: true }); + } catch { + // ignore + } } - }, 30000); + }, 60000); it('should accept API key configuration', async () => { // Auto-detect headless mode (headless in CI, headed locally) const isCI = process.env.CI === 'true' || process.env.CI === '1'; - const context = await chromium.launchPersistentContext('', { + const userDataDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-pw-')); + const context = await chromium.launchPersistentContext(userDataDir, { headless: isCI, }); @@ -258,8 +267,13 @@ describe('Browser Proxy Support', () => { expect(browser.getContext()).toBe(context); } finally { await context.close(); + try { + fs.rmSync(userDataDir, { recursive: true, force: true }); + } catch { + // ignore + } } - }, 30000); + }, 60000); }); describe('fromPage', () => { diff --git a/tests/snapshot.test.ts b/tests/snapshot.test.ts index a5f1833f..3db2fa3b 100644 --- a/tests/snapshot.test.ts +++ b/tests/snapshot.test.ts @@ -11,10 +11,10 @@ describe('Snapshot', () => { try { const page = getPageOrThrow(browser); - await page.goto('https://example.com'); - await page.waitForLoadState('networkidle', { timeout: 10000 }); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); - const snap = await snapshot(browser); + // CI hardening: avoid screenshot payload + reduce work + const snap = await snapshot(browser, { screenshot: false, limit: 30 }); expect(snap.status).toBe('success'); expect(snap.url).toContain('example.com'); @@ -30,10 +30,9 @@ describe('Snapshot', () => { try { const page = getPageOrThrow(browser); - await page.goto('https://example.com'); - await page.waitForLoadState('networkidle', { timeout: 10000 }); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); - const snap = await snapshot(browser); + const snap = await snapshot(browser, { screenshot: false, limit: 30 }); if (snap.elements.length > 0) { const element = snap.elements[0]; @@ -53,11 +52,14 @@ describe('Snapshot', () => { try { const page = getPageOrThrow(browser); - await page.goto('https://example.com'); - await page.waitForLoadState('networkidle', { timeout: 10000 }); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); // Test snapshot with goal - const snap = await snapshot(browser, { goal: 'Find the main heading' }); + const snap = await snapshot(browser, { + goal: 'Find the main heading', + screenshot: false, + limit: 30, + }); expect(snap.status).toBe('success'); expect(snap.url).toContain('example.com'); diff --git a/tests/video-recording.test.ts b/tests/video-recording.test.ts index 746a1218..c5e9b5ee 100644 --- a/tests/video-recording.test.ts +++ b/tests/video-recording.test.ts @@ -40,8 +40,9 @@ describe('video recording', () => { try { const page = getPageOrThrow(browser); - await page.goto('https://example.com'); - await page.waitForLoadState('domcontentloaded'); + await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); + // Give the recorder a moment to capture some frames (CI can be slow) + await page.waitForTimeout(750); const videoPath = await browser.close(); @@ -57,7 +58,7 @@ describe('video recording', () => { await browser.close(); throw error; } - }); + }, 120000); it('should record video with custom resolution', async () => { const videoDir = path.join(tempDir, 'recordings');