Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions examples/human-cursor-click-demo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/**
* Human-like cursor movement demo (TypeScript SDK).
*
* This example shows how to opt into human-like mouse movement before clicking,
* and how to read the returned cursor metadata for tracing/debugging.
*/

import { CursorPolicy, SentienceBrowser, click, find, snapshot } from '../src';

async function main() {
const browser = new SentienceBrowser();
await browser.start();
const page = browser.getPage();
if (!page) throw new Error('Browser started but no page is available');

try {
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10_000 });

const snap = await snapshot(browser);
const link = find(snap, 'role=link');
if (!link) throw new Error('No link found on page');

const policy: CursorPolicy = {
mode: 'human',
steps: 18,
durationMs: 350,
jitterPx: 1.2,
overshootPx: 6.0,
pauseBeforeClickMs: 30,
seed: 123, // optional: deterministic for demos/tests
};

const result = await click(browser, link.id, true, false, policy);
console.log('clicked:', result.success, 'outcome:', result.outcome);
console.log('cursor meta:', result.cursor);
} finally {
await browser.close();
}
}

main().catch(err => {
console.error(err);
process.exit(1);
});

50 changes: 48 additions & 2 deletions src/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,41 @@ import { IBrowser } from './protocols/browser-protocol';
import { ActionResult, Snapshot, BBox } from './types';
import { snapshot } from './snapshot';
import { BrowserEvaluator } from './utils/browser-evaluator';
import { CursorPolicy, buildHumanCursorPath } from './cursor-policy';

const cursorPosByPage: WeakMap<any, { x: number; y: number }> = new WeakMap();

async function humanMoveIfEnabled(
page: any,
target: { x: number; y: number },
cursorPolicy?: CursorPolicy
): Promise<Record<string, any> | undefined> {
if (!cursorPolicy || cursorPolicy.mode !== 'human') return undefined;

const prev = cursorPosByPage.get(page);
let from: { x: number; y: number };
if (prev) {
from = prev;
} else {
const vp = page.viewportSize ? page.viewportSize() : null;
from = vp ? { x: vp.width / 2, y: vp.height / 2 } : { x: 0, y: 0 };
}

const meta = buildHumanCursorPath([from.x, from.y], [target.x, target.y], cursorPolicy);
const pts = meta.path || [];
const durationMs = meta.duration_ms || 0;
const perStepMs = durationMs > 0 ? durationMs / Math.max(1, pts.length) : 0;
for (const p of pts) {
await page.mouse.move(p.x, p.y);
if (perStepMs > 0) await page.waitForTimeout(perStepMs);
}
if (meta.pause_before_click_ms > 0) {
await page.waitForTimeout(meta.pause_before_click_ms);
}

cursorPosByPage.set(page, { x: target.x, y: target.y });
return meta as any;
}

export interface ClickRect {
x: number;
Expand Down Expand Up @@ -109,7 +144,8 @@ export async function click(
browser: IBrowser,
elementId: number,
useMouse: boolean = true,
takeSnapshot: boolean = false
takeSnapshot: boolean = false,
cursorPolicy?: CursorPolicy
): Promise<ActionResult> {
const page = browser.getPage();
if (!page) {
Expand All @@ -119,6 +155,7 @@ export async function click(
const urlBefore = page.url();

let success: boolean;
let cursorMeta: Record<string, any> | undefined;

if (useMouse) {
// Hybrid approach: Get element bbox from snapshot, calculate center, use mouse.click()
Expand All @@ -130,9 +167,12 @@ export async function click(
// Calculate center of element bbox
const centerX = element.bbox.x + element.bbox.width / 2;
const centerY = element.bbox.y + element.bbox.height / 2;
cursorMeta = await humanMoveIfEnabled(page, { x: centerX, y: centerY }, cursorPolicy);
// Use Playwright's native mouse click for realistic simulation
await page.mouse.click(centerX, centerY);
success = true;
// Keep cursor position even when not in human mode (for future moves)
cursorPosByPage.set(page, { x: centerX, y: centerY });
} else {
// Fallback to JS click if element not found in snapshot
success = await BrowserEvaluator.evaluateWithNavigationFallback(
Expand Down Expand Up @@ -208,6 +248,7 @@ export async function click(
outcome,
url_changed: urlChanged,
snapshot_after: snapshotAfter,
cursor: cursorMeta,
error: success
? undefined
: { code: 'click_failed', reason: 'Element not found or not clickable' },
Expand Down Expand Up @@ -486,7 +527,8 @@ export async function clickRect(
rect: ClickRect | BBox,
highlight: boolean = true,
highlightDuration: number = 2.0,
takeSnapshot: boolean = false
takeSnapshot: boolean = false,
cursorPolicy?: CursorPolicy
): Promise<ActionResult> {
const page = browser.getPage();
if (!page) {
Expand Down Expand Up @@ -529,6 +571,7 @@ export async function clickRect(
// Calculate center of rectangle
const centerX = x + w / 2;
const centerY = y + h / 2;
let cursorMeta: Record<string, any> | undefined;

// Show highlight before clicking (if enabled)
if (highlight) {
Expand All @@ -541,8 +584,10 @@ export async function clickRect(
let success: boolean;
let errorMsg: string | undefined;
try {
cursorMeta = await humanMoveIfEnabled(page, { x: centerX, y: centerY }, cursorPolicy);
await page.mouse.click(centerX, centerY);
success = true;
cursorPosByPage.set(page, { x: centerX, y: centerY });
} catch (error) {
success = false;
errorMsg = error instanceof Error ? error.message : String(error);
Expand Down Expand Up @@ -577,6 +622,7 @@ export async function clickRect(
outcome,
url_changed: urlChanged,
snapshot_after: snapshotAfter,
cursor: cursorMeta,
error: success ? undefined : { code: 'click_failed', reason: errorMsg || 'Click failed' },
};
}
3 changes: 3 additions & 0 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export interface AgentActResult {
goal: string;
error?: string;
message?: string;
/** Optional action metadata (e.g., human-like cursor movement path) */
cursor?: Record<string, any>;
}

/**
Expand Down Expand Up @@ -298,6 +300,7 @@ export class SentienceAgent {
text: result.text,
key: result.key,
success: result.success,
cursor: result.cursor,
},
stepId
);
Expand Down
46 changes: 41 additions & 5 deletions src/backends/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,34 @@

import type { ActionResult, BBox } from '../types';
import type { BrowserBackend, MouseButton } from './protocol';
import type { CursorPolicy } from '../cursor-policy';
import { buildHumanCursorPath } from '../cursor-policy';

const cursorPosByBackend: WeakMap<object, { x: number; y: number }> = new WeakMap();

async function humanMoveBackendIfEnabled(
backend: BrowserBackend,
target: { x: number; y: number },
cursorPolicy?: CursorPolicy
): Promise<Record<string, any> | undefined> {
if (!cursorPolicy || cursorPolicy.mode !== 'human') return undefined;
const key = backend as unknown as object;
const prev = cursorPosByBackend.get(key);
const from = prev ? prev : { x: target.x, y: target.y };

const meta = buildHumanCursorPath([from.x, from.y], [target.x, target.y], cursorPolicy);
const pts = meta.path || [];
const durationMs = meta.duration_ms || 0;
const perStepMs = durationMs > 0 ? durationMs / Math.max(1, pts.length) : 0;
for (const p of pts) {
await backend.mouseMove(p.x, p.y);
if (perStepMs > 0) await sleep(perStepMs);
}
if (meta.pause_before_click_ms > 0) await sleep(meta.pause_before_click_ms);

cursorPosByBackend.set(key, { x: target.x, y: target.y });
return meta as any;
}

/**
* Target type for coordinate resolution.
Expand Down Expand Up @@ -117,26 +145,34 @@ export async function click(
target: ClickTarget,
button: MouseButton = 'left',
clickCount: number = 1,
moveFirst: boolean = true
moveFirst: boolean = true,
cursorPolicy?: CursorPolicy
): Promise<ActionResult> {
const startTime = Date.now();

const [x, y] = resolveCoordinates(target);
let cursorMeta: Record<string, any> | undefined;

try {
// Optional mouse move for hover effects
if (moveFirst) {
await backend.mouseMove(x, y);
await sleep(20); // Brief pause for hover
cursorMeta = await humanMoveBackendIfEnabled(backend, { x, y }, cursorPolicy);
if (!cursorMeta) {
await backend.mouseMove(x, y);
await sleep(20); // Brief pause for hover
}
}

// Perform click
await backend.mouseClick(x, y, button, clickCount);

return successResult(measureDuration(startTime));
return { ...successResult(measureDuration(startTime)), cursor: cursorMeta };
} catch (e) {
const reason = e instanceof Error ? e.message : String(e);
return errorResult(measureDuration(startTime), 'click_failed', reason);
return {
...errorResult(measureDuration(startTime), 'click_failed', reason),
cursor: cursorMeta,
};
}
}

Expand Down
140 changes: 140 additions & 0 deletions src/cursor-policy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
export type CursorMode = 'instant' | 'human';

/**
* Policy for cursor movement.
*
* - mode="instant": current behavior (single click without multi-step motion)
* - mode="human": move with a curved path + optional jitter/overshoot
*/
export interface CursorPolicy {
mode: CursorMode;
steps?: number;
durationMs?: number;
jitterPx?: number;
overshootPx?: number;
pauseBeforeClickMs?: number;
/** Determinism hook for tests/repro */
seed?: number;
}

export interface CursorPathPoint {
x: number;
y: number;
t?: number;
}

export interface CursorMovementMetadata {
mode: CursorMode;
from: { x: number; y: number };
to: { x: number; y: number };
steps: number;
duration_ms: number;
pause_before_click_ms: number;
jitter_px: number;
overshoot_px: number;
path: CursorPathPoint[];
}

function clamp(v: number, lo: number, hi: number): number {
return Math.max(lo, Math.min(hi, v));
}

function easeInOut(t: number): number {
return t * t * (3 - 2 * t);
}

function bezier(
p0: [number, number],
p1: [number, number],
p2: [number, number],
p3: [number, number],
t: number
): [number, number] {
const u = 1 - t;
const tt = t * t;
const uu = u * u;
const uuu = uu * u;
const ttt = tt * t;
const x = uuu * p0[0] + 3 * uu * t * p1[0] + 3 * u * tt * p2[0] + ttt * p3[0];
const y = uuu * p0[1] + 3 * uu * t * p1[1] + 3 * u * tt * p2[1] + ttt * p3[1];
return [x, y];
}

// Simple seeded RNG for reproducibility (mulberry32)
function mulberry32(seed: number): () => number {
let t = seed >>> 0;
return () => {
t += 0x6d2b79f5;
let x = t;
x = Math.imul(x ^ (x >>> 15), x | 1);
x ^= x + Math.imul(x ^ (x >>> 7), x | 61);
return ((x ^ (x >>> 14)) >>> 0) / 4294967296;
};
}

function randBetween(rng: () => number, lo: number, hi: number): number {
return lo + (hi - lo) * rng();
}

export function buildHumanCursorPath(
start: [number, number],
target: [number, number],
policy: CursorPolicy
): CursorMovementMetadata {
const seed = policy.seed ?? Date.now() & 0xffffffff;
const rng = mulberry32(seed);

const [x0, y0] = start;
const [x1, y1] = target;
const dx = x1 - x0;
const dy = y1 - y0;
const dist0 = Math.hypot(dx, dy);
const dist = dist0 < 1e-6 ? 1 : dist0;

const steps = Math.floor(policy.steps ?? clamp(10 + dist / 25, 12, 40));
const durationMs = Math.floor(policy.durationMs ?? clamp(120 + dist * 0.9, 120, 700));

const ux = dx / dist;
const uy = dy / dist;
const px = -uy;
const py = ux;

let curveMag = clamp(dist / 3.5, 10, 140);
curveMag *= randBetween(rng, 0.5, 1.2);

const c1: [number, number] = [x0 + dx * 0.25 + px * curveMag, y0 + dy * 0.25 + py * curveMag];
const c2: [number, number] = [x0 + dx * 0.75 - px * curveMag, y0 + dy * 0.75 - py * curveMag];

const overshoot = policy.overshootPx ?? 6.0;
const overshootPoint: [number, number] =
overshoot > 0 ? [x1 + ux * overshoot, y1 + uy * overshoot] : [x1, y1];

const jitterPx = policy.jitterPx ?? 1.0;
const pts: CursorPathPoint[] = [];

for (let i = 0; i < steps; i++) {
const tRaw = steps <= 1 ? 0 : i / (steps - 1);
const t = easeInOut(tRaw);
const [bx, by] = bezier([x0, y0], c1, c2, overshootPoint, t);
const jitterScale = jitterPx * (1 - tRaw) * 0.9;
const jx = randBetween(rng, -jitterScale, jitterScale);
const jy = randBetween(rng, -jitterScale, jitterScale);
pts.push({ x: bx + jx, y: by + jy, t: Math.round(tRaw * 10_000) / 10_000 });
}

if (overshoot > 0) {
pts.push({ x: x1, y: y1, t: 1.0 });
}

return {
mode: 'human',
from: { x: x0, y: y0 },
to: { x: x1, y: y1 },
steps,
duration_ms: durationMs,
pause_before_click_ms: policy.pauseBeforeClickMs ?? 20,
jitter_px: jitterPx,
overshoot_px: overshoot,
path: pts.slice(0, 64),
};
}
Loading
Loading