From b8f55023380a2de9c94705a8cc169db699cd8074 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sat, 14 Feb 2026 12:33:09 -0800 Subject: [PATCH] scroll verification --- CHANGELOG.md | 24 ++++ README.md | 18 +++ src/agent-runtime.ts | 138 +++++++++++++++++++++- src/tracing/types.ts | 2 +- tests/agent-runtime-scroll-verify.test.ts | 86 ++++++++++++++ tests/mocks/browser-mock.ts | 13 ++ 6 files changed, 278 insertions(+), 3 deletions(-) create mode 100644 tests/agent-runtime-scroll-verify.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 0681884..68e1532 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,30 @@ All notable changes to `@predicatelabs/sdk` will be documented in this file. ## Unreleased +### 2026-02-13 + +#### Expanded deterministic verifications (adaptive resnapshotting) + +You can now make `.eventually()` verifications more reliable on long / virtualized pages by **automatically increasing the snapshot `limit` across retries** (so later attempts see more elements). + +- **AgentRuntime assertions**: `AssertionHandle.eventually({ snapshotLimitGrowth: ... })` +- **Expect-style verifications**: `expect(...).eventually({ snapshotLimitGrowth: ... })` +- **Commit**: `5f011b878c9a1dcb8c5976b365f0f80b7abe135c` + +**Example** + +```ts +await dbg.check(exists("text~'Checkout'"), 'checkout_visible', true).eventually({ + timeoutMs: 12_000, + snapshotLimitGrowth: { + startLimit: 60, + step: 40, + maxLimit: 220, + applyOn: 'only_on_fail', // default; or "all" + }, +}); +``` + ### Deprecated - Soft-deprecated legacy `Sentience*` class names in favor of `Predicate*` names: diff --git a/README.md b/README.md index 5f41e7c..ae1c37d 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,24 @@ async function loginExample(): Promise { - Fluent assertion DSL via `expect(...)` - Retrying verification via `runtime.check(...).eventually(...)` +### Scroll verification (prevent no-op scroll drift) + +A common agent failure mode is “scrolling” without the UI actually advancing (overlays, nested scrollers, focus issues). Use `AgentRuntime.scrollBy(...)` to deterministically verify scroll _had effect_ via before/after `scrollTop`. + +```ts +runtime.beginStep('Scroll the page and verify it moved'); +const ok = await runtime.scrollBy(600, { + verify: true, + minDeltaPx: 50, + label: 'scroll_effective', + required: true, + timeoutMs: 5_000, +}); +if (!ok) { + throw new Error('Scroll had no effect (likely blocked by overlay or nested scroller).'); +} +``` + ### Explained failure - JSONL trace events (`Tracer` + `JsonlTraceSink`) diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts index 610a67f..5fd2e8b 100644 --- a/src/agent-runtime.ts +++ b/src/agent-runtime.ts @@ -501,7 +501,8 @@ export class AgentRuntime { label: string, required: boolean, extra: Record | null, - recordInStep: boolean + recordInStep: boolean, + kind: 'assert' | 'task_done' | 'captcha' | 'scroll' = 'assert' ): void { const details = { ...(outcome.details || {}) } as Record; @@ -541,7 +542,7 @@ export class AgentRuntime { this.tracer.emit( 'verification', { - kind: 'assert', + kind, ...record, }, this.stepId || undefined @@ -784,6 +785,139 @@ export class AgentRuntime { } } + private async getScrollTop(): Promise { + try { + const v = await this.page.evaluate( + "(() => { const el = document.scrollingElement || document.documentElement || document.body; return (el && typeof el.scrollTop === 'number') ? el.scrollTop : (typeof window.scrollY === 'number' ? window.scrollY : 0); })()" + ); + const n = Number(v); + return Number.isFinite(n) ? n : 0; + } catch { + return 0; + } + } + + /** + * Scroll and (optionally) verify the scroll had effect (delta in scrollTop). + * + * This targets a common drift failure mode: "we scrolled" but the UI didn't advance. + */ + async scrollBy( + deltaY: number, + opts?: { + verify?: boolean; + minDeltaPx?: number; + label?: string; + required?: boolean; + timeoutMs?: number; + pollMs?: number; + x?: number; + y?: number; + jsFallback?: boolean; + } + ): Promise { + const verify = opts?.verify ?? true; + const minDeltaPx = opts?.minDeltaPx ?? 50; + const label = opts?.label ?? 'scroll_effective'; + const required = opts?.required ?? true; + const timeoutMs = opts?.timeoutMs ?? 10_000; + const pollMs = opts?.pollMs ?? 250; + const jsFallback = opts?.jsFallback ?? true; + + await this.recordAction(`scrollBy(deltaY=${deltaY})`, this.page?.url?.()); + + const doWheel = async (): Promise => { + const mouse: any = (this.page as any)?.mouse; + if (mouse && typeof mouse.wheel === 'function') { + // Playwright: mouse.wheel(deltaX, deltaY) + await mouse.wheel(opts?.x ?? 0, deltaY); + return; + } + // Fallback: request scroll via JS (best-effort) + await this.page.evaluate(`window.scrollBy(0, ${Number(deltaY)})`); + }; + + if (!verify) { + await doWheel(); + return true; + } + + const beforeTop = await this.getScrollTop(); + await doWheel(); + + const start = Date.now(); + let usedJsFallback = false; + + while (true) { + const afterTop = await this.getScrollTop(); + const delta = afterTop - beforeTop; + const passed = Math.abs(delta) >= minDeltaPx; + + if (passed) { + this._recordOutcome( + { + passed: true, + reason: '', + details: { + deltaY, + min_delta_px: minDeltaPx, + before_top: beforeTop, + after_top: afterTop, + delta_px: delta, + js_fallback_used: usedJsFallback, + }, + } as any, + label, + required, + null, + true, + 'scroll' + ); + return true; + } + + if (Date.now() - start >= timeoutMs) { + this._recordOutcome( + { + passed: false, + reason: `scroll delta ${delta.toFixed(1)}px < min_delta_px=${minDeltaPx.toFixed(1)}px`, + details: { + deltaY, + min_delta_px: minDeltaPx, + before_top: beforeTop, + after_top: afterTop, + delta_px: delta, + js_fallback_used: usedJsFallback, + timeout_ms: timeoutMs, + }, + } as any, + label, + required, + null, + true, + 'scroll' + ); + if (required) { + this.persistFailureArtifacts(`scroll_failed:${label}`).catch(() => { + // best-effort + }); + } + return false; + } + + if (jsFallback && !usedJsFallback && Math.abs(delta) < 1.0) { + usedJsFallback = true; + try { + await this.page.evaluate(`window.scrollBy(0, ${Number(deltaY)})`); + } catch { + // ignore + } + } + + await new Promise(resolve => setTimeout(resolve, pollMs)); + } + } + /** * List open tabs in the current browser context. */ diff --git a/src/tracing/types.ts b/src/tracing/types.ts index a942c92..e2171ca 100644 --- a/src/tracing/types.ts +++ b/src/tracing/types.ts @@ -184,7 +184,7 @@ export interface TraceEventData { verify?: VerifyData; // Verification event fields (for assertion loop) - kind?: 'assert' | 'task_done' | 'captcha'; + kind?: 'assert' | 'task_done' | 'captcha' | 'scroll'; label?: string; passed?: boolean; required?: boolean; diff --git a/tests/agent-runtime-scroll-verify.test.ts b/tests/agent-runtime-scroll-verify.test.ts new file mode 100644 index 0000000..9d0690b --- /dev/null +++ b/tests/agent-runtime-scroll-verify.test.ts @@ -0,0 +1,86 @@ +import { AgentRuntime } from '../src/agent-runtime'; +import { TraceSink } from '../src/tracing/sink'; +import { Tracer } from '../src/tracing/tracer'; +import { MockPage } from './mocks/browser-mock'; + +class MockSink extends TraceSink { + public events: any[] = []; + emit(event: Record): void { + this.events.push(event); + } + async close(): Promise { + // no-op + } + getSinkType(): string { + return 'MockSink'; + } +} + +describe('AgentRuntime.scrollBy() deterministic verification', () => { + it('passes when scrollTop delta >= minDeltaPx', async () => { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + const browserLike = { + snapshot: async () => ({ + status: 'success', + url: 'https://example.com', + elements: [], + timestamp: 't1', + }), + }; + + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.beginStep('scroll'); + + const ok = await runtime.scrollBy(200, { + verify: true, + minDeltaPx: 50, + timeoutMs: 1000, + pollMs: 1, + }); + expect(ok).toBe(true); + + const hasScrollVerification = sink.events.some( + e => e.type === 'verification' && e.data?.kind === 'scroll' && e.data?.passed === true + ); + expect(hasScrollVerification).toBe(true); + }); + + it('fails when scrollTop delta stays below minDeltaPx', async () => { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + + // Override wheel to be a no-op scroll (simulates blocked scroll). + (page.mouse as any).wheel = async (_dx: number, _dy: number) => { + // no-op + }; + + const browserLike = { + snapshot: async () => ({ + status: 'success', + url: 'https://example.com', + elements: [], + timestamp: 't1', + }), + }; + + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.beginStep('scroll'); + + const ok = await runtime.scrollBy(200, { + verify: true, + minDeltaPx: 50, + timeoutMs: 30, + pollMs: 1, + jsFallback: false, + }); + expect(ok).toBe(false); + + const hasFailedScrollVerification = sink.events.some( + e => e.type === 'verification' && e.data?.kind === 'scroll' && e.data?.passed === false + ); + expect(hasFailedScrollVerification).toBe(true); + }); +}); diff --git a/tests/mocks/browser-mock.ts b/tests/mocks/browser-mock.ts index cc1d4c3..fbbf114 100644 --- a/tests/mocks/browser-mock.ts +++ b/tests/mocks/browser-mock.ts @@ -14,11 +14,13 @@ import { Page } from 'playwright'; */ export class MockPage implements IPage { private _url: string = 'https://example.com'; + private _scrollTop: number = 0; public evaluateCalls: Array<{ script: string | Function; args: any[] }> = []; public gotoCalls: Array<{ url: string; options?: any }> = []; public waitForFunctionCalls: Array<{ fn: () => boolean | Promise; options?: any }> = []; public waitForTimeoutCalls: number[] = []; public mouseClickCalls: Array<{ x: number; y: number }> = []; + public mouseWheelCalls: Array<{ dx: number; dy: number }> = []; public keyboardTypeCalls: string[] = []; public keyboardPressCalls: string[] = []; public screenshotCalls: Array<{ options?: any }> = []; @@ -51,6 +53,13 @@ export class MockPage implements IPage { } as T; } + if ( + typeof script === 'string' && + (script.includes('scrollTop') || script.includes('scrollY')) + ) { + return this._scrollTop as any as T; + } + return {} as T; } @@ -79,6 +88,10 @@ export class MockPage implements IPage { click: async (x: number, y: number): Promise => { this.mouseClickCalls.push({ x, y }); }, + wheel: async (dx: number, dy: number): Promise => { + this.mouseWheelCalls.push({ dx, dy }); + this._scrollTop += dy; + }, }; keyboard = {