From e672b0773b2b0211e4aa6c5ceef98f61fe929b4d Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 22 Jan 2026 17:01:53 -0800 Subject: [PATCH 1/3] add verification/assertion results to trace --- src/agent.ts | 14 ++++++++++++++ src/utils/trace-event-builder.ts | 19 +++++++++++++++++-- src/visual-agent.ts | 14 ++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index a6f2754..c80706f 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -339,6 +339,19 @@ export class SentienceAgent { if (this.tracer) { const preUrl = snap.url; const postUrl = this.browser.getPage()?.url() || null; + let postSnapshotDigest: string | undefined; + try { + const postSnap = await snapshot(this.browser, { + goal: `${goal} (post)`, + limit: Math.min(this.snapshotLimit, 10), + show_overlay: this.showOverlay, + }); + if (postSnap.status === 'success') { + postSnapshotDigest = TraceEventBuilder.buildSnapshotDigest(postSnap); + } + } catch { + postSnapshotDigest = undefined; + } // Build step_end event using TraceEventBuilder // Use snapWithDiff to include elements with diff_status in pre field @@ -349,6 +362,7 @@ export class SentienceAgent { attempt, preUrl, postUrl, + postSnapshotDigest, snapshot: snapWithDiff, llmResponse, result, diff --git a/src/utils/trace-event-builder.ts b/src/utils/trace-event-builder.ts index ce0204f..a85cd82 100644 --- a/src/utils/trace-event-builder.ts +++ b/src/utils/trace-event-builder.ts @@ -169,12 +169,23 @@ export class TraceEventBuilder { attempt: number; preUrl: string; postUrl: string | null; + postSnapshotDigest?: string; snapshot: Snapshot; llmResponse: LLMResponse; result: AgentActResult; }): TraceEventData { - const { stepId, stepIndex, goal, attempt, preUrl, postUrl, snapshot, llmResponse, result } = - params; + const { + stepId, + stepIndex, + goal, + attempt, + preUrl, + postUrl, + postSnapshotDigest, + snapshot, + llmResponse, + result, + } = params; const snapshotDigest = this.buildSnapshotDigest(snapshot); const llmData = this.buildLLMData(llmResponse); @@ -231,6 +242,7 @@ export class TraceEventBuilder { exec: execData, post: { url: postUrl || undefined, + snapshot_digest: postSnapshotDigest, }, verify: verifyData, }; @@ -297,6 +309,7 @@ export class TraceEventBuilder { attempt: number; preUrl: string | null; postUrl: string | null; + postSnapshotDigest?: string; snapshot?: Snapshot | null; llmResponse?: LLMResponse | null; error: string; @@ -310,6 +323,7 @@ export class TraceEventBuilder { preUrl, postUrl, snapshot, + postSnapshotDigest, llmResponse, error, durationMs, @@ -390,6 +404,7 @@ export class TraceEventBuilder { exec: execData, post: { url: postUrl || undefined, + snapshot_digest: postSnapshotDigest, }, verify: verifyData, }; diff --git a/src/visual-agent.ts b/src/visual-agent.ts index f620e84..b330286 100644 --- a/src/visual-agent.ts +++ b/src/visual-agent.ts @@ -832,6 +832,19 @@ Return ONLY the integer ID number from the label, nothing else.`; const preUrl = snap.url; const page = (this as any).browser.getPage(); const postUrl = page ? page.url() || null : null; + let postSnapshotDigest: string | undefined; + try { + const postSnap = await snapshot((this as any).browser, { + goal: `${goal} (post)`, + limit: Math.min((this as any).snapshotLimit, 10), + show_overlay: (this as any).showOverlay, + }); + if (postSnap.status === 'success') { + postSnapshotDigest = TraceEventBuilder.buildSnapshotDigest(postSnap); + } + } catch { + postSnapshotDigest = undefined; + } // Build complete step_end event // Note: snapshotDigest, llmResponseText, execData, and verifyData are computed @@ -845,6 +858,7 @@ Return ONLY the integer ID number from the label, nothing else.`; attempt: 0, preUrl, postUrl: postUrl || preUrl, + postSnapshotDigest, snapshot: snapWithDiff, llmResponse, result, From 5042d40bf892f6ef4afc34f9f54a7380f367c8c1 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 22 Jan 2026 19:26:30 -0800 Subject: [PATCH 2/3] verification payment step_end in agent runtime --- src/agent-runtime.ts | 92 ++++++++++++++++++++++++++++++++ src/runtime-agent.ts | 55 +++++++++++++------ src/utils/trace-event-builder.ts | 68 +++++++++++++++++++++++ 3 files changed, 199 insertions(+), 16 deletions(-) diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts index 2566a7c..ca3aad9 100644 --- a/src/agent-runtime.ts +++ b/src/agent-runtime.ts @@ -44,6 +44,7 @@ import { Page } from 'playwright'; import { Snapshot } from './types'; import { AssertContext, Predicate } from './verification'; import { Tracer } from './tracing/tracer'; +import { TraceEventBuilder } from './utils/trace-event-builder'; import { LLMProvider } from './llm-provider'; import { FailureArtifactBuffer, FailureArtifactsOptions } from './failure-artifacts'; import { @@ -338,6 +339,8 @@ export class AgentRuntime { stepIndex: number = 0; /** Most recent snapshot (for assertion context) */ lastSnapshot: Snapshot | null = null; + private stepPreSnapshot: Snapshot | null = null; + private stepPreUrl: string | null = null; /** Best-effort download records (Playwright downloads) */ private downloads: Array> = []; @@ -347,6 +350,8 @@ export class AgentRuntime { /** Assertions accumulated during current step */ private assertionsThisStep: AssertionRecord[] = []; + private stepGoal: string | null = null; + private lastAction: string | null = null; /** Task completion tracking */ private taskDone: boolean = false; private taskDoneLabel: string | null = null; @@ -532,6 +537,10 @@ export class AgentRuntime { async snapshot(options?: Record): Promise { const { _skipCaptchaHandling, ...snapshotOptions } = options || {}; this.lastSnapshot = await this.browser.snapshot(this.page, snapshotOptions); + if (this.lastSnapshot && !this.stepPreSnapshot) { + this.stepPreSnapshot = this.lastSnapshot; + this.stepPreUrl = this.lastSnapshot.url; + } if (!_skipCaptchaHandling) { await this.handleCaptchaIfNeeded(this.lastSnapshot, 'gateway'); } @@ -713,6 +722,7 @@ export class AgentRuntime { * Record an action in the artifact timeline and capture a frame if enabled. */ async recordAction(action: string, url?: string): Promise { + this.lastAction = action; if (!this.artifactBuffer) { return; } @@ -722,6 +732,84 @@ export class AgentRuntime { } } + /** + * Emit a step_end event using TraceEventBuilder. + */ + emitStepEnd(opts: { + action?: string; + success?: boolean; + error?: string; + outcome?: string; + durationMs?: number; + attempt?: number; + verifyPassed?: boolean; + verifySignals?: Record; + postUrl?: string; + postSnapshotDigest?: string; + }): any { + const goal = this.stepGoal || ''; + const preSnap = this.stepPreSnapshot || this.lastSnapshot; + const preUrl = this.stepPreUrl || preSnap?.url || this.page?.url?.() || ''; + const postUrl = opts.postUrl || this.page?.url?.() || this.lastSnapshot?.url || preUrl; + + const preDigest = preSnap ? TraceEventBuilder.buildSnapshotDigest(preSnap) : undefined; + const postDigest = + opts.postSnapshotDigest || + (this.lastSnapshot ? TraceEventBuilder.buildSnapshotDigest(this.lastSnapshot) : undefined); + + const urlChanged = Boolean(preUrl && postUrl && String(preUrl) !== String(postUrl)); + const assertionsData = this.getAssertionsForStepEnd(); + + const signals = { ...(opts.verifySignals || {}) } as Record; + if (signals.url_changed === undefined) { + signals.url_changed = urlChanged; + } + if (opts.error && signals.error === undefined) { + signals.error = opts.error; + } + if (assertionsData.task_done !== undefined) { + signals.task_done = assertionsData.task_done; + } + if (assertionsData.task_done_label) { + signals.task_done_label = assertionsData.task_done_label; + } + + const verifyPassed = + opts.verifyPassed !== undefined ? opts.verifyPassed : this.requiredAssertionsPassed(); + + const execData = { + success: opts.success !== undefined ? opts.success : verifyPassed, + action: opts.action || this.lastAction || 'unknown', + outcome: opts.outcome || '', + duration_ms: opts.durationMs, + error: opts.error, + }; + + const verifyData = { + passed: Boolean(verifyPassed), + signals, + }; + + const stepEndData = TraceEventBuilder.buildRuntimeStepEndData({ + stepId: this.stepId || '', + stepIndex: this.stepIndex, + goal, + attempt: opts.attempt ?? 0, + preUrl, + postUrl, + preSnapshotDigest: preDigest, + postSnapshotDigest: postDigest, + execData, + verifyData, + assertions: assertionsData.assertions, + taskDone: assertionsData.task_done, + taskDoneLabel: assertionsData.task_done_label, + }); + + this.tracer.emit('step_end', stepEndData, this.stepId || undefined); + return stepEndData; + } + private async captureArtifactFrame(): Promise { if (!this.artifactBuffer) { return; @@ -797,6 +885,10 @@ export class AgentRuntime { beginStep(goal: string, stepIndex?: number): string { // Clear previous step state this.assertionsThisStep = []; + this.stepPreSnapshot = null; + this.stepPreUrl = null; + this.stepGoal = goal; + this.lastAction = null; // Update step index if (stepIndex !== undefined) { diff --git a/src/runtime-agent.ts b/src/runtime-agent.ts index ead51c0..1f1837c 100644 --- a/src/runtime-agent.ts +++ b/src/runtime-agent.ts @@ -78,26 +78,49 @@ export class RuntimeAgent { const { taskGoal, step } = opts; this.runtime.beginStep(step.goal); - const snap = await this.snapshotWithRamp(step); + let ok = false; + let emitted = false; + try { + const snap = await this.snapshotWithRamp(step); - if (await this.shouldShortCircuitToVision(step, snap)) { - return await this.visionExecutorAttempt({ taskGoal, step, snap }); - } + if (await this.shouldShortCircuitToVision(step, snap)) { + ok = await this.visionExecutorAttempt({ taskGoal, step, snap }); + return ok; + } - // 1) Structured executor attempt. - const action = await this.proposeStructuredAction({ taskGoal, step, snap }); - await this.executeAction(action, snap); - const ok = await this.applyVerifications(step); - if (ok) return true; + // 1) Structured executor attempt. + const action = await this.proposeStructuredAction({ taskGoal, step, snap }); + await this.executeAction(action, snap); + ok = await this.applyVerifications(step); + if (ok) return true; + + // 2) Optional vision executor fallback (bounded). + const enabled = step.visionExecutorEnabled ?? true; + const maxAttempts = step.maxVisionExecutorAttempts ?? 1; + if (enabled && maxAttempts > 0) { + ok = await this.visionExecutorAttempt({ taskGoal, step, snap }); + return ok; + } - // 2) Optional vision executor fallback (bounded). - const enabled = step.visionExecutorEnabled ?? true; - const maxAttempts = step.maxVisionExecutorAttempts ?? 1; - if (enabled && maxAttempts > 0) { - return await this.visionExecutorAttempt({ taskGoal, step, snap }); + return false; + } catch (error: any) { + this.runtime.emitStepEnd({ + success: false, + verifyPassed: false, + error: String(error?.message ?? error), + outcome: 'exception', + }); + emitted = true; + throw error; + } finally { + if (!emitted) { + this.runtime.emitStepEnd({ + success: ok, + verifyPassed: ok, + outcome: ok ? 'ok' : 'verification_failed', + }); + } } - - return false; } private async snapshotWithRamp(step: RuntimeStep): Promise { diff --git a/src/utils/trace-event-builder.ts b/src/utils/trace-event-builder.ts index a85cd82..566b93a 100644 --- a/src/utils/trace-event-builder.ts +++ b/src/utils/trace-event-builder.ts @@ -248,6 +248,74 @@ export class TraceEventBuilder { }; } + /** + * Build step_end event data for AgentRuntime (verification loop). + */ + static buildRuntimeStepEndData(params: { + stepId: string; + stepIndex: number; + goal: string; + attempt: number; + preUrl: string; + postUrl: string; + preSnapshotDigest?: string; + postSnapshotDigest?: string; + execData: TraceEventData['exec']; + verifyData: TraceEventData['verify']; + assertions?: TraceEventData['verify']['signals']['assertions']; + taskDone?: boolean; + taskDoneLabel?: string; + }): TraceEventData { + const { + stepId, + stepIndex, + goal, + attempt, + preUrl, + postUrl, + preSnapshotDigest, + postSnapshotDigest, + execData, + verifyData, + assertions, + taskDone, + taskDoneLabel, + } = params; + + const signals = { ...(verifyData.signals || {}) } as Record; + if (assertions && assertions.length > 0) { + signals.assertions = assertions; + } + if (typeof taskDone === 'boolean') { + signals.task_done = taskDone; + } + if (taskDoneLabel) { + signals.task_done_label = taskDoneLabel; + } + + return { + v: 1, + step_id: stepId, + step_index: stepIndex, + goal, + attempt, + pre: { + url: preUrl, + snapshot_digest: preSnapshotDigest, + }, + llm: {}, + exec: execData, + post: { + url: postUrl, + snapshot_digest: postSnapshotDigest, + }, + verify: { + passed: verifyData.passed, + signals, + }, + }; + } + /** * Build snapshot event data * From f5951d36735ae731bf43e2d8a33be033fe2e8f17 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 22 Jan 2026 19:47:08 -0800 Subject: [PATCH 3/3] update models with modal detection --- src/types.ts | 15 +++++ src/utils/trace-event-builder.ts | 6 +- tests/grid-bounds.test.ts | 108 ++++++++++++++++++++++++++++++- 3 files changed, 125 insertions(+), 4 deletions(-) diff --git a/src/types.ts b/src/types.ts index a786702..0b8bf94 100644 --- a/src/types.ts +++ b/src/types.ts @@ -131,6 +131,16 @@ export interface GridInfo { label?: string | null; /** Whether this grid is the dominant group (main content area) */ is_dominant?: boolean; + + // Z-index and modal detection fields (from gateway/sentience-core) + /** Z-index of this grid (max among elements in this grid) */ + z_index?: number; + /** Global max z-index across ALL grids (for comparison) */ + z_index_max?: number; + /** Whether this grid blocks interaction with content behind it */ + blocks_interaction?: boolean; + /** Ratio of grid area to viewport area (0.0-1.0) */ + viewport_coverage?: number; } export interface Snapshot { @@ -147,6 +157,11 @@ export interface Snapshot { dominant_group_key?: string; // The most common group_key (main content group) // Phase 2: Runtime stability/debug info (confidence/reasons/metrics) diagnostics?: SnapshotDiagnostics; + // Modal detection fields (from gateway) + /** True if a modal/overlay grid was detected */ + modal_detected?: boolean; + /** Array of GridInfo for detected modal grids */ + modal_grids?: GridInfo[]; } export interface SnapshotDiagnosticsMetrics { diff --git a/src/utils/trace-event-builder.ts b/src/utils/trace-event-builder.ts index 566b93a..c4a0790 100644 --- a/src/utils/trace-event-builder.ts +++ b/src/utils/trace-event-builder.ts @@ -262,7 +262,7 @@ export class TraceEventBuilder { postSnapshotDigest?: string; execData: TraceEventData['exec']; verifyData: TraceEventData['verify']; - assertions?: TraceEventData['verify']['signals']['assertions']; + assertions?: NonNullable['signals']['assertions']; taskDone?: boolean; taskDoneLabel?: string; }): TraceEventData { @@ -282,7 +282,7 @@ export class TraceEventBuilder { taskDoneLabel, } = params; - const signals = { ...(verifyData.signals || {}) } as Record; + const signals = { ...(verifyData?.signals || {}) } as Record; if (assertions && assertions.length > 0) { signals.assertions = assertions; } @@ -310,7 +310,7 @@ export class TraceEventBuilder { snapshot_digest: postSnapshotDigest, }, verify: { - passed: verifyData.passed, + passed: verifyData?.passed ?? false, signals, }, }; diff --git a/tests/grid-bounds.test.ts b/tests/grid-bounds.test.ts index d1e2e44..7728fa5 100644 --- a/tests/grid-bounds.test.ts +++ b/tests/grid-bounds.test.ts @@ -2,7 +2,15 @@ * Tests for getGridBounds functionality */ -import { getGridBounds, Snapshot, Element, BBox, LayoutHints, GridPosition } from '../src'; +import { + getGridBounds, + Snapshot, + Element, + BBox, + LayoutHints, + GridPosition, + GridInfo, +} from '../src'; /** * Helper to create test elements with layout data @@ -312,3 +320,101 @@ describe('getGridBounds', () => { expect(result[2].grid_id).toBe(2); }); }); + +describe('GridInfo modal detection fields', () => { + it('should accept GridInfo with z-index and modal fields', () => { + // Test that GridInfo type accepts the new optional fields + const gridInfo = { + grid_id: 1, + bbox: { x: 100, y: 100, width: 500, height: 400 } as BBox, + row_count: 2, + col_count: 3, + item_count: 6, + confidence: 0.95, + z_index: 1000, + z_index_max: 1000, + blocks_interaction: true, + viewport_coverage: 0.25, + }; + + expect(gridInfo.z_index).toBe(1000); + expect(gridInfo.z_index_max).toBe(1000); + expect(gridInfo.blocks_interaction).toBe(true); + expect(gridInfo.viewport_coverage).toBe(0.25); + }); + + it('should accept GridInfo without optional modal fields', () => { + // Test backward compatibility - new fields are optional + const gridInfo = { + grid_id: 0, + bbox: { x: 0, y: 0, width: 100, height: 100 } as BBox, + row_count: 1, + col_count: 1, + item_count: 1, + confidence: 1.0, + }; + + expect(gridInfo.grid_id).toBe(0); + expect(gridInfo.confidence).toBe(1.0); + // Optional fields should be undefined + expect((gridInfo as any).z_index).toBeUndefined(); + expect((gridInfo as any).z_index_max).toBeUndefined(); + expect((gridInfo as any).blocks_interaction).toBeUndefined(); + expect((gridInfo as any).viewport_coverage).toBeUndefined(); + }); +}); + +describe('Snapshot modal detection fields', () => { + it('should accept snapshot without modal fields', () => { + const snapshot: Snapshot = { + status: 'success', + url: 'https://example.com', + elements: [], + }; + + // modal_detected and modal_grids should be undefined by default + expect(snapshot.modal_detected).toBeUndefined(); + expect(snapshot.modal_grids).toBeUndefined(); + }); + + it('should accept snapshot with modal_detected true', () => { + const modalGrid = { + grid_id: 1, + bbox: { x: 200, y: 150, width: 600, height: 400 } as BBox, + row_count: 1, + col_count: 2, + item_count: 5, + confidence: 1.0, + z_index: 1000, + z_index_max: 1000, + blocks_interaction: true, + viewport_coverage: 0.2, + }; + + const snapshot: Snapshot = { + status: 'success', + url: 'https://example.com', + elements: [], + modal_detected: true, + modal_grids: [modalGrid], + }; + + expect(snapshot.modal_detected).toBe(true); + expect(snapshot.modal_grids).toBeDefined(); + expect(snapshot.modal_grids!.length).toBe(1); + expect(snapshot.modal_grids![0].z_index).toBe(1000); + expect(snapshot.modal_grids![0].blocks_interaction).toBe(true); + }); + + it('should accept snapshot with modal_detected false', () => { + const snapshot: Snapshot = { + status: 'success', + url: 'https://example.com', + elements: [], + modal_detected: false, + }; + + expect(snapshot.modal_detected).toBe(false); + expect(snapshot.modal_grids).toBeUndefined(); + }); +});