Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,30 @@ All notable changes to `@predicatelabs/sdk` will be documented in this file.

## Unreleased

### 2026-02-13

#### Expanded deterministic verifications (adaptive resnapshotting)

You can now make `.eventually()` verifications more reliable on long / virtualized pages by **automatically increasing the snapshot `limit` across retries** (so later attempts see more elements).

- **AgentRuntime assertions**: `AssertionHandle.eventually({ snapshotLimitGrowth: ... })`
- **Expect-style verifications**: `expect(...).eventually({ snapshotLimitGrowth: ... })`
- **Commit**: `5f011b878c9a1dcb8c5976b365f0f80b7abe135c`

**Example**

```ts
await dbg.check(exists("text~'Checkout'"), 'checkout_visible', true).eventually({
timeoutMs: 12_000,
snapshotLimitGrowth: {
startLimit: 60,
step: 40,
maxLimit: 220,
applyOn: 'only_on_fail', // default; or "all"
},
});
```

### Deprecated

- Soft-deprecated legacy `Sentience*` class names in favor of `Predicate*` names:
Expand Down
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,24 @@ async function loginExample(): Promise<void> {
- Fluent assertion DSL via `expect(...)`
- Retrying verification via `runtime.check(...).eventually(...)`

### Scroll verification (prevent no-op scroll drift)

A common agent failure mode is “scrolling” without the UI actually advancing (overlays, nested scrollers, focus issues). Use `AgentRuntime.scrollBy(...)` to deterministically verify scroll _had effect_ via before/after `scrollTop`.

```ts
runtime.beginStep('Scroll the page and verify it moved');
const ok = await runtime.scrollBy(600, {
verify: true,
minDeltaPx: 50,
label: 'scroll_effective',
required: true,
timeoutMs: 5_000,
});
if (!ok) {
throw new Error('Scroll had no effect (likely blocked by overlay or nested scroller).');
}
```

### Explained failure

- JSONL trace events (`Tracer` + `JsonlTraceSink`)
Expand Down
138 changes: 136 additions & 2 deletions src/agent-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,8 @@ export class AgentRuntime {
label: string,
required: boolean,
extra: Record<string, any> | null,
recordInStep: boolean
recordInStep: boolean,
kind: 'assert' | 'task_done' | 'captcha' | 'scroll' = 'assert'
): void {
const details = { ...(outcome.details || {}) } as Record<string, any>;

Expand Down Expand Up @@ -541,7 +542,7 @@ export class AgentRuntime {
this.tracer.emit(
'verification',
{
kind: 'assert',
kind,
...record,
},
this.stepId || undefined
Expand Down Expand Up @@ -784,6 +785,139 @@ export class AgentRuntime {
}
}

private async getScrollTop(): Promise<number> {
try {
const v = await this.page.evaluate(
"(() => { const el = document.scrollingElement || document.documentElement || document.body; return (el && typeof el.scrollTop === 'number') ? el.scrollTop : (typeof window.scrollY === 'number' ? window.scrollY : 0); })()"
);
const n = Number(v);
return Number.isFinite(n) ? n : 0;
} catch {
return 0;
}
}

/**
* Scroll and (optionally) verify the scroll had effect (delta in scrollTop).
*
* This targets a common drift failure mode: "we scrolled" but the UI didn't advance.
*/
async scrollBy(
deltaY: number,
opts?: {
verify?: boolean;
minDeltaPx?: number;
label?: string;
required?: boolean;
timeoutMs?: number;
pollMs?: number;
x?: number;
y?: number;
jsFallback?: boolean;
}
): Promise<boolean> {
const verify = opts?.verify ?? true;
const minDeltaPx = opts?.minDeltaPx ?? 50;
const label = opts?.label ?? 'scroll_effective';
const required = opts?.required ?? true;
const timeoutMs = opts?.timeoutMs ?? 10_000;
const pollMs = opts?.pollMs ?? 250;
const jsFallback = opts?.jsFallback ?? true;

await this.recordAction(`scrollBy(deltaY=${deltaY})`, this.page?.url?.());

const doWheel = async (): Promise<void> => {
const mouse: any = (this.page as any)?.mouse;
if (mouse && typeof mouse.wheel === 'function') {
// Playwright: mouse.wheel(deltaX, deltaY)
await mouse.wheel(opts?.x ?? 0, deltaY);
return;
}
// Fallback: request scroll via JS (best-effort)
await this.page.evaluate(`window.scrollBy(0, ${Number(deltaY)})`);
};

if (!verify) {
await doWheel();
return true;
}

const beforeTop = await this.getScrollTop();
await doWheel();

const start = Date.now();
let usedJsFallback = false;

while (true) {
const afterTop = await this.getScrollTop();
const delta = afterTop - beforeTop;
const passed = Math.abs(delta) >= minDeltaPx;

if (passed) {
this._recordOutcome(
{
passed: true,
reason: '',
details: {
deltaY,
min_delta_px: minDeltaPx,
before_top: beforeTop,
after_top: afterTop,
delta_px: delta,
js_fallback_used: usedJsFallback,
},
} as any,
label,
required,
null,
true,
'scroll'
);
return true;
}

if (Date.now() - start >= timeoutMs) {
this._recordOutcome(
{
passed: false,
reason: `scroll delta ${delta.toFixed(1)}px < min_delta_px=${minDeltaPx.toFixed(1)}px`,
details: {
deltaY,
min_delta_px: minDeltaPx,
before_top: beforeTop,
after_top: afterTop,
delta_px: delta,
js_fallback_used: usedJsFallback,
timeout_ms: timeoutMs,
},
} as any,
label,
required,
null,
true,
'scroll'
);
if (required) {
this.persistFailureArtifacts(`scroll_failed:${label}`).catch(() => {
// best-effort
});
}
return false;
}

if (jsFallback && !usedJsFallback && Math.abs(delta) < 1.0) {
usedJsFallback = true;
try {
await this.page.evaluate(`window.scrollBy(0, ${Number(deltaY)})`);
} catch {
// ignore
}
}

await new Promise(resolve => setTimeout(resolve, pollMs));
}
}

/**
* List open tabs in the current browser context.
*/
Expand Down
2 changes: 1 addition & 1 deletion src/tracing/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ export interface TraceEventData {
verify?: VerifyData;

// Verification event fields (for assertion loop)
kind?: 'assert' | 'task_done' | 'captcha';
kind?: 'assert' | 'task_done' | 'captcha' | 'scroll';
label?: string;
passed?: boolean;
required?: boolean;
Expand Down
86 changes: 86 additions & 0 deletions tests/agent-runtime-scroll-verify.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import { AgentRuntime } from '../src/agent-runtime';
import { TraceSink } from '../src/tracing/sink';
import { Tracer } from '../src/tracing/tracer';
import { MockPage } from './mocks/browser-mock';

class MockSink extends TraceSink {
public events: any[] = [];
emit(event: Record<string, any>): void {
this.events.push(event);
}
async close(): Promise<void> {
// no-op
}
getSinkType(): string {
return 'MockSink';
}
}

describe('AgentRuntime.scrollBy() deterministic verification', () => {
it('passes when scrollTop delta >= minDeltaPx', async () => {
const sink = new MockSink();
const tracer = new Tracer('test-run', sink);
const page = new MockPage('https://example.com') as any;
const browserLike = {
snapshot: async () => ({
status: 'success',
url: 'https://example.com',
elements: [],
timestamp: 't1',
}),
};

const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
runtime.beginStep('scroll');

const ok = await runtime.scrollBy(200, {
verify: true,
minDeltaPx: 50,
timeoutMs: 1000,
pollMs: 1,
});
expect(ok).toBe(true);

const hasScrollVerification = sink.events.some(
e => e.type === 'verification' && e.data?.kind === 'scroll' && e.data?.passed === true
);
expect(hasScrollVerification).toBe(true);
});

it('fails when scrollTop delta stays below minDeltaPx', async () => {
const sink = new MockSink();
const tracer = new Tracer('test-run', sink);
const page = new MockPage('https://example.com') as any;

// Override wheel to be a no-op scroll (simulates blocked scroll).
(page.mouse as any).wheel = async (_dx: number, _dy: number) => {
// no-op
};

const browserLike = {
snapshot: async () => ({
status: 'success',
url: 'https://example.com',
elements: [],
timestamp: 't1',
}),
};

const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
runtime.beginStep('scroll');

const ok = await runtime.scrollBy(200, {
verify: true,
minDeltaPx: 50,
timeoutMs: 30,
pollMs: 1,
jsFallback: false,
});
expect(ok).toBe(false);

const hasFailedScrollVerification = sink.events.some(
e => e.type === 'verification' && e.data?.kind === 'scroll' && e.data?.passed === false
);
expect(hasFailedScrollVerification).toBe(true);
});
});
13 changes: 13 additions & 0 deletions tests/mocks/browser-mock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ import { Page } from 'playwright';
*/
export class MockPage implements IPage {
private _url: string = 'https://example.com';
private _scrollTop: number = 0;
public evaluateCalls: Array<{ script: string | Function; args: any[] }> = [];
public gotoCalls: Array<{ url: string; options?: any }> = [];
public waitForFunctionCalls: Array<{ fn: () => boolean | Promise<boolean>; options?: any }> = [];
public waitForTimeoutCalls: number[] = [];
public mouseClickCalls: Array<{ x: number; y: number }> = [];
public mouseWheelCalls: Array<{ dx: number; dy: number }> = [];
public keyboardTypeCalls: string[] = [];
public keyboardPressCalls: string[] = [];
public screenshotCalls: Array<{ options?: any }> = [];
Expand Down Expand Up @@ -51,6 +53,13 @@ export class MockPage implements IPage {
} as T;
}

if (
typeof script === 'string' &&
(script.includes('scrollTop') || script.includes('scrollY'))
) {
return this._scrollTop as any as T;
}

return {} as T;
}

Expand Down Expand Up @@ -79,6 +88,10 @@ export class MockPage implements IPage {
click: async (x: number, y: number): Promise<void> => {
this.mouseClickCalls.push({ x, y });
},
wheel: async (dx: number, dy: number): Promise<void> => {
this.mouseWheelCalls.push({ dx, dy });
this._scrollTop += dy;
},
};

keyboard = {
Expand Down
Loading