Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,17 @@ export class SentienceAgent {
this.tracer.emitStepStart(stepId, this.stepCount, goal, 0, currentUrl);
}

// Track data collected during step execution for step_end emission on failure
let stepSnapWithDiff: Snapshot | null = null;
let stepPreUrl: string | null = null;
let stepLlmResponse: LLMResponse | null = null;
let stepStartTime: number = Date.now();

for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
// 1. OBSERVE: Get refined semantic snapshot
const startTime = Date.now();
stepStartTime = startTime;

const snapOpts: SnapshotOptions = {
...snapshotOptions,
Expand Down Expand Up @@ -246,6 +253,10 @@ export class SentienceAgent {
const snapWithDiff = processed.withDiff;
const filteredSnap = processed.filtered;

// Track for step_end emission on failure
stepSnapWithDiff = snapWithDiff;
stepPreUrl = snap.url;

// Emit snapshot event
if (this.tracer) {
const snapshotData = SnapshotEventBuilder.buildSnapshotEventData(snapWithDiff, stepId);
Expand All @@ -258,6 +269,9 @@ export class SentienceAgent {
// 3. THINK: Query LLM for next action
const llmResponse = await this.llmHandler.queryLLM(context, goal);

// Track for step_end emission on failure
stepLlmResponse = llmResponse;

if (this.verbose) {
console.log(`🧠 LLM Decision: ${llmResponse.content}`);
}
Expand Down Expand Up @@ -357,6 +371,28 @@ export class SentienceAgent {
await new Promise(resolve => setTimeout(resolve, 1000));
continue;
} else {
// Emit step_end with whatever data we collected before failure
// This ensures diff_status and other fields are preserved in traces
if (this.tracer && stepSnapWithDiff) {
const postUrl = this.browser.getPage()?.url() || null;
const durationMs = Date.now() - stepStartTime;

const stepEndData = TraceEventBuilder.buildPartialStepEndData({
stepId,
stepIndex: this.stepCount,
goal,
attempt,
preUrl: stepPreUrl,
postUrl,
snapshot: stepSnapWithDiff,
llmResponse: stepLlmResponse,
error: error.message,
durationMs,
});

this.tracer.emit('step_end', stepEndData, stepId);
}

const errorResult: AgentActResult = {
success: false,
goal,
Expand Down
48 changes: 43 additions & 5 deletions src/failure-artifacts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,31 @@ function isFfmpegAvailable(): boolean {
}
}

/**
* Get ffmpeg version as a tuple [major, minor] or null if unable to determine.
* Used to determine which flags to use (e.g., -vsync vs -fps_mode).
*/
function getFfmpegVersion(): [number, number] | null {
try {
const result = spawnSync('ffmpeg', ['-version'], {
timeout: 5000,
stdio: 'pipe',
});
if (result.status !== 0) {
return null;
}
const output = result.stdout?.toString('utf-8') || '';
// Parse version from output like "ffmpeg version 7.0.1 ..." or "ffmpeg version n7.0.1 ..."
const match = output.match(/ffmpeg version [n]?(\d+)\.(\d+)/i);
if (match) {
return [parseInt(match[1], 10), parseInt(match[2], 10)];
}
return null;
} catch {
return null;
}
}

/**
* Generate an MP4 video clip from a directory of frames using ffmpeg.
*/
Expand All @@ -123,7 +148,9 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
}

// Create a temporary file list for ffmpeg concat demuxer
const listFile = path.join(framesDir, 'frames_list.txt');
// Use relative path (just filename) since we run ffmpeg with cwd=framesDir
const listFile = 'frames_list.txt';
const listFilePath = path.join(framesDir, listFile);
const frameDuration = 1.0 / fps;

try {
Expand All @@ -132,7 +159,19 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
files.map(f => `file '${f}'\nduration ${frameDuration}`).join('\n') +
`\nfile '${files[files.length - 1]}'`; // ffmpeg concat quirk

fs.writeFileSync(listFile, listContent);
fs.writeFileSync(listFilePath, listContent);

// Determine which vsync/fps_mode flag to use based on ffmpeg version
// -vsync is deprecated in ffmpeg 7.0+, use -fps_mode instead (available since 5.1)
const version = getFfmpegVersion();
let syncArgs: string[];
if (version && (version[0] > 5 || (version[0] === 5 && version[1] >= 1))) {
// ffmpeg 5.1+: use -fps_mode
syncArgs = ['-fps_mode', 'vfr'];
} else {
// ffmpeg < 5.1: use legacy -vsync
syncArgs = ['-vsync', 'vfr'];
}

// Run ffmpeg to generate the clip
const result = spawnSync(
Expand All @@ -145,8 +184,7 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
'0',
'-i',
listFile,
'-vsync',
'vfr',
...syncArgs,
'-pix_fmt',
'yuv420p',
'-c:v',
Expand Down Expand Up @@ -175,7 +213,7 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
} finally {
// Clean up the list file
try {
fs.unlinkSync(listFile);
fs.unlinkSync(listFilePath);
} catch {
// ignore
}
Expand Down
115 changes: 115 additions & 0 deletions src/utils/trace-event-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -279,4 +279,119 @@ export class TraceEventBuilder {

return data;
}

/**
* Build partial step_end event data for failed steps
*
* This is used when a step fails after collecting some data (snapshot, LLM response, etc.)
* but before completing execution. It ensures diff_status and other fields are preserved
* in traces even when the agent run fails.
*
* @param params - Parameters for building partial step_end event
* @returns Partial step_end event data
*/
static buildPartialStepEndData(params: {
stepId: string;
stepIndex: number;
goal: string;
attempt: number;
preUrl: string | null;
postUrl: string | null;
snapshot?: Snapshot | null;
llmResponse?: LLMResponse | null;
error: string;
durationMs: number;
}): TraceEventData {
const {
stepId,
stepIndex,
goal,
attempt,
preUrl,
postUrl,
snapshot,
llmResponse,
error,
durationMs,
} = params;

// Build pre data
const preData: TraceEventData['pre'] = {
url: preUrl || undefined,
snapshot_digest: snapshot ? this.buildSnapshotDigest(snapshot) : undefined,
};

// Add elements with diff_status if snapshot is available
if (snapshot && snapshot.elements.length > 0) {
const importanceValues = snapshot.elements.map(el => el.importance);
const minImportance = importanceValues.length > 0 ? Math.min(...importanceValues) : 0;
const maxImportance = importanceValues.length > 0 ? Math.max(...importanceValues) : 0;
const importanceRange = maxImportance - minImportance;

preData.elements = snapshot.elements.map(el => {
let importanceScore: number;
if (importanceRange > 0) {
importanceScore = (el.importance - minImportance) / importanceRange;
} else {
importanceScore = 0.5;
}

return {
id: el.id,
role: el.role,
text: el.text,
bbox: el.bbox,
importance: el.importance,
importance_score: importanceScore,
visual_cues: el.visual_cues,
in_viewport: el.in_viewport,
is_occluded: el.is_occluded,
z_index: el.z_index,
rerank_index: el.rerank_index,
heuristic_index: el.heuristic_index,
ml_probability: el.ml_probability,
ml_score: el.ml_score,
diff_status: el.diff_status,
};
});
}

// Build LLM data if available
let llmData: TraceEventData['llm'] | undefined;
if (llmResponse) {
llmData = this.buildLLMData(llmResponse);
}

// Build exec data for failure
const execData: TraceEventData['exec'] = {
success: false,
action: 'error',
outcome: error,
duration_ms: durationMs,
error: error,
};

// Build verify data for failure
const verifyData: TraceEventData['verify'] = {
passed: false,
signals: {
error: error,
},
};

return {
v: 1,
step_id: stepId,
step_index: stepIndex,
goal: goal,
attempt: attempt,
pre: preData,
llm: llmData,
exec: execData,
post: {
url: postUrl || undefined,
},
verify: verifyData,
};
}
}
35 changes: 35 additions & 0 deletions src/visual-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,11 @@ Return ONLY the integer ID number from the label, nothing else.`;

const startTime = Date.now();

// Track data collected during step execution for step_end emission on failure
let stepSnapWithDiff: Snapshot | null = null;
let stepPreUrl: string | null = null;
let stepLlmResponse: LLMResponse | null = null;

try {
// Ensure screenshot is enabled
const snapOpts: SnapshotOptions = {
Expand Down Expand Up @@ -634,6 +639,10 @@ Return ONLY the integer ID number from the label, nothing else.`;

const snapWithDiff = processed.withDiff;

// Track for step_end emission on failure
stepSnapWithDiff = snapWithDiff;
stepPreUrl = snap.url;

// Emit snapshot event
if (tracer) {
const snapshotData = SnapshotEventBuilder.buildSnapshotEventData(snapWithDiff, stepId);
Expand Down Expand Up @@ -710,6 +719,9 @@ Return ONLY the integer ID number from the label, nothing else.`;

const llmResponse = await this.queryLLMWithVision(labeledImageDataUrl, goal);

// Track for step_end emission on failure
stepLlmResponse = llmResponse;

// Emit LLM query event
if (tracer) {
tracer.emit(
Expand Down Expand Up @@ -848,6 +860,29 @@ Return ONLY the integer ID number from the label, nothing else.`;
tracer.emitError(stepId, error.message, 0);
}

// Emit step_end with whatever data we collected before failure
// This ensures diff_status and other fields are preserved in traces
if (tracer && stepSnapWithDiff) {
const page = (this as any).browser.getPage();
const postUrl = page ? page.url() || null : null;
const durationMs = Date.now() - startTime;

const stepEndData = TraceEventBuilder.buildPartialStepEndData({
stepId,
stepIndex: stepCount,
goal,
attempt: 0,
preUrl: stepPreUrl,
postUrl,
snapshot: stepSnapWithDiff,
llmResponse: stepLlmResponse,
error: error.message,
durationMs,
});

tracer.emit('step_end', stepEndData, stepId);
}

if ((this as any).verbose) {
console.log(`❌ Error: ${error.message}`);
}
Expand Down
Loading