Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions src/agent-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import { Page } from 'playwright';
import { Snapshot } from './types';
import { AssertContext, Predicate } from './verification';
import { Tracer } from './tracing/tracer';
import { TraceEventBuilder } from './utils/trace-event-builder';
import { LLMProvider } from './llm-provider';
import { FailureArtifactBuffer, FailureArtifactsOptions } from './failure-artifacts';
import {
Expand Down Expand Up @@ -338,6 +339,8 @@ export class AgentRuntime {
stepIndex: number = 0;
/** Most recent snapshot (for assertion context) */
lastSnapshot: Snapshot | null = null;
private stepPreSnapshot: Snapshot | null = null;
private stepPreUrl: string | null = null;
/** Best-effort download records (Playwright downloads) */
private downloads: Array<Record<string, any>> = [];

Expand All @@ -347,6 +350,8 @@ export class AgentRuntime {

/** Assertions accumulated during current step */
private assertionsThisStep: AssertionRecord[] = [];
private stepGoal: string | null = null;
private lastAction: string | null = null;
/** Task completion tracking */
private taskDone: boolean = false;
private taskDoneLabel: string | null = null;
Expand Down Expand Up @@ -532,6 +537,10 @@ export class AgentRuntime {
async snapshot(options?: Record<string, any>): Promise<Snapshot> {
const { _skipCaptchaHandling, ...snapshotOptions } = options || {};
this.lastSnapshot = await this.browser.snapshot(this.page, snapshotOptions);
if (this.lastSnapshot && !this.stepPreSnapshot) {
this.stepPreSnapshot = this.lastSnapshot;
this.stepPreUrl = this.lastSnapshot.url;
}
if (!_skipCaptchaHandling) {
await this.handleCaptchaIfNeeded(this.lastSnapshot, 'gateway');
}
Expand Down Expand Up @@ -713,6 +722,7 @@ export class AgentRuntime {
* Record an action in the artifact timeline and capture a frame if enabled.
*/
async recordAction(action: string, url?: string): Promise<void> {
this.lastAction = action;
if (!this.artifactBuffer) {
return;
}
Expand All @@ -722,6 +732,84 @@ export class AgentRuntime {
}
}

/**
* Emit a step_end event using TraceEventBuilder.
*/
emitStepEnd(opts: {
action?: string;
success?: boolean;
error?: string;
outcome?: string;
durationMs?: number;
attempt?: number;
verifyPassed?: boolean;
verifySignals?: Record<string, any>;
postUrl?: string;
postSnapshotDigest?: string;
}): any {
const goal = this.stepGoal || '';
const preSnap = this.stepPreSnapshot || this.lastSnapshot;
const preUrl = this.stepPreUrl || preSnap?.url || this.page?.url?.() || '';
const postUrl = opts.postUrl || this.page?.url?.() || this.lastSnapshot?.url || preUrl;

const preDigest = preSnap ? TraceEventBuilder.buildSnapshotDigest(preSnap) : undefined;
const postDigest =
opts.postSnapshotDigest ||
(this.lastSnapshot ? TraceEventBuilder.buildSnapshotDigest(this.lastSnapshot) : undefined);

const urlChanged = Boolean(preUrl && postUrl && String(preUrl) !== String(postUrl));
const assertionsData = this.getAssertionsForStepEnd();

const signals = { ...(opts.verifySignals || {}) } as Record<string, any>;
if (signals.url_changed === undefined) {
signals.url_changed = urlChanged;
}
if (opts.error && signals.error === undefined) {
signals.error = opts.error;
}
if (assertionsData.task_done !== undefined) {
signals.task_done = assertionsData.task_done;
}
if (assertionsData.task_done_label) {
signals.task_done_label = assertionsData.task_done_label;
}

const verifyPassed =
opts.verifyPassed !== undefined ? opts.verifyPassed : this.requiredAssertionsPassed();

const execData = {
success: opts.success !== undefined ? opts.success : verifyPassed,
action: opts.action || this.lastAction || 'unknown',
outcome: opts.outcome || '',
duration_ms: opts.durationMs,
error: opts.error,
};

const verifyData = {
passed: Boolean(verifyPassed),
signals,
};

const stepEndData = TraceEventBuilder.buildRuntimeStepEndData({
stepId: this.stepId || '',
stepIndex: this.stepIndex,
goal,
attempt: opts.attempt ?? 0,
preUrl,
postUrl,
preSnapshotDigest: preDigest,
postSnapshotDigest: postDigest,
execData,
verifyData,
assertions: assertionsData.assertions,
taskDone: assertionsData.task_done,
taskDoneLabel: assertionsData.task_done_label,
});

this.tracer.emit('step_end', stepEndData, this.stepId || undefined);
return stepEndData;
}

private async captureArtifactFrame(): Promise<void> {
if (!this.artifactBuffer) {
return;
Expand Down Expand Up @@ -797,6 +885,10 @@ export class AgentRuntime {
beginStep(goal: string, stepIndex?: number): string {
// Clear previous step state
this.assertionsThisStep = [];
this.stepPreSnapshot = null;
this.stepPreUrl = null;
this.stepGoal = goal;
this.lastAction = null;

// Update step index
if (stepIndex !== undefined) {
Expand Down
14 changes: 14 additions & 0 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,19 @@ export class SentienceAgent {
if (this.tracer) {
const preUrl = snap.url;
const postUrl = this.browser.getPage()?.url() || null;
let postSnapshotDigest: string | undefined;
try {
const postSnap = await snapshot(this.browser, {
goal: `${goal} (post)`,
limit: Math.min(this.snapshotLimit, 10),
show_overlay: this.showOverlay,
});
if (postSnap.status === 'success') {
postSnapshotDigest = TraceEventBuilder.buildSnapshotDigest(postSnap);
}
} catch {
postSnapshotDigest = undefined;
}

// Build step_end event using TraceEventBuilder
// Use snapWithDiff to include elements with diff_status in pre field
Expand All @@ -349,6 +362,7 @@ export class SentienceAgent {
attempt,
preUrl,
postUrl,
postSnapshotDigest,
snapshot: snapWithDiff,
llmResponse,
result,
Expand Down
55 changes: 39 additions & 16 deletions src/runtime-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,26 +78,49 @@ export class RuntimeAgent {
const { taskGoal, step } = opts;
this.runtime.beginStep(step.goal);

const snap = await this.snapshotWithRamp(step);
let ok = false;
let emitted = false;
try {
const snap = await this.snapshotWithRamp(step);

if (await this.shouldShortCircuitToVision(step, snap)) {
return await this.visionExecutorAttempt({ taskGoal, step, snap });
}
if (await this.shouldShortCircuitToVision(step, snap)) {
ok = await this.visionExecutorAttempt({ taskGoal, step, snap });
return ok;
}

// 1) Structured executor attempt.
const action = await this.proposeStructuredAction({ taskGoal, step, snap });
await this.executeAction(action, snap);
const ok = await this.applyVerifications(step);
if (ok) return true;
// 1) Structured executor attempt.
const action = await this.proposeStructuredAction({ taskGoal, step, snap });
await this.executeAction(action, snap);
ok = await this.applyVerifications(step);
if (ok) return true;

// 2) Optional vision executor fallback (bounded).
const enabled = step.visionExecutorEnabled ?? true;
const maxAttempts = step.maxVisionExecutorAttempts ?? 1;
if (enabled && maxAttempts > 0) {
ok = await this.visionExecutorAttempt({ taskGoal, step, snap });
return ok;
}

// 2) Optional vision executor fallback (bounded).
const enabled = step.visionExecutorEnabled ?? true;
const maxAttempts = step.maxVisionExecutorAttempts ?? 1;
if (enabled && maxAttempts > 0) {
return await this.visionExecutorAttempt({ taskGoal, step, snap });
return false;
} catch (error: any) {
this.runtime.emitStepEnd({
success: false,
verifyPassed: false,
error: String(error?.message ?? error),
outcome: 'exception',
});
emitted = true;
throw error;
} finally {
if (!emitted) {
this.runtime.emitStepEnd({
success: ok,
verifyPassed: ok,
outcome: ok ? 'ok' : 'verification_failed',
});
}
}

return false;
}

private async snapshotWithRamp(step: RuntimeStep): Promise<Snapshot> {
Expand Down
15 changes: 15 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ export interface GridInfo {
label?: string | null;
/** Whether this grid is the dominant group (main content area) */
is_dominant?: boolean;

// Z-index and modal detection fields (from gateway/sentience-core)
/** Z-index of this grid (max among elements in this grid) */
z_index?: number;
/** Global max z-index across ALL grids (for comparison) */
z_index_max?: number;
/** Whether this grid blocks interaction with content behind it */
blocks_interaction?: boolean;
/** Ratio of grid area to viewport area (0.0-1.0) */
viewport_coverage?: number;
}

export interface Snapshot {
Expand All @@ -147,6 +157,11 @@ export interface Snapshot {
dominant_group_key?: string; // The most common group_key (main content group)
// Phase 2: Runtime stability/debug info (confidence/reasons/metrics)
diagnostics?: SnapshotDiagnostics;
// Modal detection fields (from gateway)
/** True if a modal/overlay grid was detected */
modal_detected?: boolean;
/** Array of GridInfo for detected modal grids */
modal_grids?: GridInfo[];
}

export interface SnapshotDiagnosticsMetrics {
Expand Down
87 changes: 85 additions & 2 deletions src/utils/trace-event-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,23 @@ export class TraceEventBuilder {
attempt: number;
preUrl: string;
postUrl: string | null;
postSnapshotDigest?: string;
snapshot: Snapshot;
llmResponse: LLMResponse;
result: AgentActResult;
}): TraceEventData {
const { stepId, stepIndex, goal, attempt, preUrl, postUrl, snapshot, llmResponse, result } =
params;
const {
stepId,
stepIndex,
goal,
attempt,
preUrl,
postUrl,
postSnapshotDigest,
snapshot,
llmResponse,
result,
} = params;

const snapshotDigest = this.buildSnapshotDigest(snapshot);
const llmData = this.buildLLMData(llmResponse);
Expand Down Expand Up @@ -231,11 +242,80 @@ export class TraceEventBuilder {
exec: execData,
post: {
url: postUrl || undefined,
snapshot_digest: postSnapshotDigest,
},
verify: verifyData,
};
}

/**
* Build step_end event data for AgentRuntime (verification loop).
*/
static buildRuntimeStepEndData(params: {
stepId: string;
stepIndex: number;
goal: string;
attempt: number;
preUrl: string;
postUrl: string;
preSnapshotDigest?: string;
postSnapshotDigest?: string;
execData: TraceEventData['exec'];
verifyData: TraceEventData['verify'];
assertions?: NonNullable<TraceEventData['verify']>['signals']['assertions'];
taskDone?: boolean;
taskDoneLabel?: string;
}): TraceEventData {
const {
stepId,
stepIndex,
goal,
attempt,
preUrl,
postUrl,
preSnapshotDigest,
postSnapshotDigest,
execData,
verifyData,
assertions,
taskDone,
taskDoneLabel,
} = params;

const signals = { ...(verifyData?.signals || {}) } as Record<string, any>;
if (assertions && assertions.length > 0) {
signals.assertions = assertions;
}
if (typeof taskDone === 'boolean') {
signals.task_done = taskDone;
}
if (taskDoneLabel) {
signals.task_done_label = taskDoneLabel;
}

return {
v: 1,
step_id: stepId,
step_index: stepIndex,
goal,
attempt,
pre: {
url: preUrl,
snapshot_digest: preSnapshotDigest,
},
llm: {},
exec: execData,
post: {
url: postUrl,
snapshot_digest: postSnapshotDigest,
},
verify: {
passed: verifyData?.passed ?? false,
signals,
},
};
}

/**
* Build snapshot event data
*
Expand Down Expand Up @@ -297,6 +377,7 @@ export class TraceEventBuilder {
attempt: number;
preUrl: string | null;
postUrl: string | null;
postSnapshotDigest?: string;
snapshot?: Snapshot | null;
llmResponse?: LLMResponse | null;
error: string;
Expand All @@ -310,6 +391,7 @@ export class TraceEventBuilder {
preUrl,
postUrl,
snapshot,
postSnapshotDigest,
llmResponse,
error,
durationMs,
Expand Down Expand Up @@ -390,6 +472,7 @@ export class TraceEventBuilder {
exec: execData,
post: {
url: postUrl || undefined,
snapshot_digest: postSnapshotDigest,
},
verify: verifyData,
};
Expand Down
Loading
Loading