Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
522 changes: 522 additions & 0 deletions src/actions.ts

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions src/agent-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
* ```
*/

import * as fs from 'fs';
import * as path from 'path';
import { Page } from 'playwright';
import { Snapshot } from './types';
import { AssertContext, Predicate } from './verification';
Expand Down Expand Up @@ -336,6 +338,8 @@ export class AgentRuntime {
stepIndex: number = 0;
/** Most recent snapshot (for assertion context) */
lastSnapshot: Snapshot | null = null;
/** Best-effort download records (Playwright downloads) */
private downloads: Array<Record<string, any>> = [];

/** Failure artifact buffer (Phase 1) */
private artifactBuffer: FailureArtifactBuffer | null = null;
Expand Down Expand Up @@ -438,6 +442,15 @@ export class AgentRuntime {
this.browser = browser;
this.page = page;
this.tracer = tracer;

// Best-effort download tracking (does not change behavior unless a download occurs).
try {
this.page.on('download', download => {
void this.trackDownload(download);
});
} catch {
// ignore
}
}

/**
Expand Down Expand Up @@ -466,9 +479,48 @@ export class AgentRuntime {
snapshot: this.lastSnapshot,
url,
stepId: this.stepId,
downloads: this.downloads,
};
}

private async trackDownload(download: any): Promise<void> {
const rec: Record<string, any> = {
status: 'started',
suggested_filename: download?.suggestedFilename?.() ?? download?.suggested_filename,
url: download?.url?.() ?? download?.url,
};
this.downloads.push(rec);
try {
const p = (await download.path?.()) as string | null;
rec.status = 'completed';
if (p) {
rec.path = p;
try {
// Best-effort size and mime type (no new deps).
rec.size_bytes = Number(fs.statSync(p).size);
const ext = String(path.extname(p) || '').toLowerCase();
const mimeByExt: Record<string, string> = {
'.pdf': 'application/pdf',
'.txt': 'text/plain',
'.csv': 'text/csv',
'.json': 'application/json',
'.zip': 'application/zip',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.webp': 'image/webp',
};
if (mimeByExt[ext]) rec.mime_type = mimeByExt[ext];
} catch {
// ignore
}
}
} catch (e: any) {
rec.status = 'failed';
rec.error = String(e?.message ?? e);
}
}

/**
* Take a snapshot of the current page state.
*
Expand Down
18 changes: 17 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,21 @@
export { SentienceBrowser } from './browser';
export { snapshot, SnapshotOptions } from './snapshot';
export { query, find, parseSelector } from './query';
export { click, typeText, press, scrollTo, clickRect, ClickRect } from './actions';
export {
back,
check,
clear,
click,
clickRect,
ClickRect,
press,
scrollTo,
selectOption,
submit,
typeText,
uncheck,
uploadFile,
} from './actions';
export { CursorPolicy, CursorMode, CursorMovementMetadata, CursorPathPoint } from './cursor-policy';
export { waitFor } from './wait';
export { expect, Expectation } from './expect';
Expand Down Expand Up @@ -52,6 +66,7 @@ export {
AssertOutcome,
AssertContext,
Predicate,
downloadCompleted,
urlMatches,
urlContains,
exists,
Expand All @@ -72,6 +87,7 @@ export {
export { AgentRuntime, AssertionHandle, AssertionRecord, EventuallyOptions } from './agent-runtime';
export { RuntimeAgent } from './runtime-agent';
export type { RuntimeStep, StepVerification } from './runtime-agent';
export { parseVisionExecutorAction, executeVisionExecutorAction } from './vision-executor';
export * from './captcha/types';
export * from './captcha/strategies';

Expand Down
15 changes: 10 additions & 5 deletions src/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
* Snapshot functionality - calls window.sentience.snapshot() or server-side API
*/

import { SentienceBrowser } from './browser';
import { IBrowser } from './protocols/browser-protocol';
import { Snapshot } from './types';
import * as fs from 'fs';
Expand Down Expand Up @@ -212,7 +211,7 @@
() => typeof (window as any).sentience !== 'undefined',
5000
);
} catch (e) {
} catch (_e) {

Check warning on line 214 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (macos-latest, 20)

'_e' is defined but never used

Check warning on line 214 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 20)

'_e' is defined but never used

Check warning on line 214 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (windows-latest, 20)

'_e' is defined but never used
throw new Error(
'Sentience extension failed to inject. Cannot collect raw data for API processing.'
);
Expand All @@ -239,9 +238,15 @@
// Use raw_elements (raw data) instead of elements (processed data)
// Server validates API key and applies proprietary ranking logic
const clientMetrics = rawResult?.diagnostics?.metrics;
const clientDiagnostics = rawResult?.diagnostics?.captcha
? { captcha: rawResult.diagnostics.captcha }
: undefined;
const diag = rawResult?.diagnostics;
const clientDiagnostics =
diag?.captcha || diag?.requires_vision || diag?.requires_vision_reason
? {
captcha: diag?.captcha,
requires_vision: diag?.requires_vision,
requires_vision_reason: diag?.requires_vision_reason,
}
: undefined;
const payload = {
raw_elements: rawResult.raw_elements || [], // Raw data needed for server processing
url: rawResult.url || '',
Expand Down
3 changes: 3 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ export interface SnapshotDiagnostics {
reasons?: string[];
metrics?: SnapshotDiagnosticsMetrics;
captcha?: CaptchaDiagnostics;
/** P1-01: forward-compatible vision recommendation signal (optional) */
requires_vision?: boolean | null;
requires_vision_reason?: string | null;
}

/**
Expand Down
28 changes: 28 additions & 0 deletions src/verification.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ export interface AssertContext {
url: string | null;
/** Current step identifier (for trace correlation) */
stepId: string | null;
/** Optional: non-snapshot state signals for verification (e.g., downloads). */
downloads?: Array<Record<string, any>> | null;
}

/**
Expand All @@ -68,6 +70,32 @@ export interface AssertContext {
*/
export type Predicate = (ctx: AssertContext) => AssertOutcome;

/**
* Predicate that passes if a browser download has completed.
*
* Notes:
* - This relies on `AssertContext.downloads` being populated by the runtime/browser.
*/
export function downloadCompleted(filenameSubstring?: string): Predicate {
return (ctx: AssertContext): AssertOutcome => {
const downloads = ctx.downloads ?? [];
for (const d of downloads) {
if (String(d?.status ?? '') !== 'completed') continue;
const fname = String(d?.filename ?? d?.suggested_filename ?? '');
if (!filenameSubstring || fname.includes(filenameSubstring)) {
return { passed: true, reason: '', details: { download: d } };
}
}
return {
passed: false,
reason: filenameSubstring
? `no completed download matched: ${filenameSubstring}`
: 'no completed downloads',
details: { filenameSubstring, downloads },
};
};
}

/**
* Create a predicate that checks if current URL matches a regex pattern.
*
Expand Down
76 changes: 76 additions & 0 deletions src/vision-executor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/**
* Vision executor primitives (shared parsing/execution helpers).
*
* This is used by higher-level agents when falling back to a vision model to propose
* coordinate-based actions.
*/

export type VisionExecutorActionKind = 'click_xy' | 'click_rect' | 'press' | 'type' | 'finish';

export interface VisionExecutorAction {
kind: VisionExecutorActionKind;
args: Record<string, any>;
}

export function parseVisionExecutorAction(text: string): VisionExecutorAction {
const t = String(text || '')
.replace(/```[\w]*\n?/g, '')
.trim();

if (/^FINISH\s*\(\s*\)\s*$/i.test(t)) return { kind: 'finish', args: {} };

let m = t.match(/^PRESS\s*\(\s*["']([^"']+)["']\s*\)\s*$/i);
if (m) return { kind: 'press', args: { key: m[1] } };

m = t.match(/^TYPE\s*\(\s*["']([\s\S]*?)["']\s*\)\s*$/i);
if (m) return { kind: 'type', args: { text: m[1] } };

m = t.match(/^CLICK_XY\s*\(\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\)\s*$/i);
if (m) return { kind: 'click_xy', args: { x: Number(m[1]), y: Number(m[2]) } };

m = t.match(
/^CLICK_RECT\s*\(\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\)\s*$/i
);
if (m)
return {
kind: 'click_rect',
args: { x: Number(m[1]), y: Number(m[2]), w: Number(m[3]), h: Number(m[4]) },
};

throw new Error(`unrecognized vision action: ${t.slice(0, 200)}`);
}

export async function executeVisionExecutorAction(params: {
backend: any;
page?: any;
action: VisionExecutorAction;
}): Promise<void> {
const { backend, page, action } = params;

if (action.kind === 'click_xy') {
await backend.mouse_click(Number(action.args.x), Number(action.args.y));
return;
}

if (action.kind === 'click_rect') {
const cx = Number(action.args.x) + Number(action.args.w) / 2;
const cy = Number(action.args.y) + Number(action.args.h) / 2;
await backend.mouse_click(cx, cy);
return;
}

if (action.kind === 'press') {
if (!page) throw new Error('PRESS requires a Playwright page');
await page.keyboard.press(String(action.args.key));
return;
}

if (action.kind === 'type') {
await backend.type_text(String(action.args.text));
return;
}

if (action.kind === 'finish') return;

throw new Error(`unknown vision action kind: ${(action as any).kind}`);
}
Loading
Loading