PredicateSystems · rcholic · Jan 20, 2026 · Jan 20, 2026
diff --git a/src/agent.ts b/src/agent.ts
@@ -211,10 +211,17 @@ export class SentienceAgent {
       this.tracer.emitStepStart(stepId, this.stepCount, goal, 0, currentUrl);
     }
 
+    // Track data collected during step execution for step_end emission on failure
+    let stepSnapWithDiff: Snapshot | null = null;
+    let stepPreUrl: string | null = null;
+    let stepLlmResponse: LLMResponse | null = null;
+    let stepStartTime: number = Date.now();
+
     for (let attempt = 0; attempt <= maxRetries; attempt++) {
       try {
         // 1. OBSERVE: Get refined semantic snapshot
         const startTime = Date.now();
+        stepStartTime = startTime;
 
         const snapOpts: SnapshotOptions = {
           ...snapshotOptions,
@@ -246,6 +253,10 @@ export class SentienceAgent {
         const snapWithDiff = processed.withDiff;
         const filteredSnap = processed.filtered;
 
+        // Track for step_end emission on failure
+        stepSnapWithDiff = snapWithDiff;
+        stepPreUrl = snap.url;
+
         // Emit snapshot event
         if (this.tracer) {
           const snapshotData = SnapshotEventBuilder.buildSnapshotEventData(snapWithDiff, stepId);
@@ -258,6 +269,9 @@ export class SentienceAgent {
         // 3. THINK: Query LLM for next action
         const llmResponse = await this.llmHandler.queryLLM(context, goal);
 
+        // Track for step_end emission on failure
+        stepLlmResponse = llmResponse;
+
         if (this.verbose) {
           console.log(`🧠 LLM Decision: ${llmResponse.content}`);
         }
@@ -357,6 +371,28 @@ export class SentienceAgent {
           await new Promise(resolve => setTimeout(resolve, 1000));
           continue;
         } else {
+          // Emit step_end with whatever data we collected before failure
+          // This ensures diff_status and other fields are preserved in traces
+          if (this.tracer && stepSnapWithDiff) {
+            const postUrl = this.browser.getPage()?.url() || null;
+            const durationMs = Date.now() - stepStartTime;
+
+            const stepEndData = TraceEventBuilder.buildPartialStepEndData({
+              stepId,
+              stepIndex: this.stepCount,
+              goal,
+              attempt,
+              preUrl: stepPreUrl,
+              postUrl,
+              snapshot: stepSnapWithDiff,
+              llmResponse: stepLlmResponse,
+              error: error.message,
+              durationMs,
+            });
+
+            this.tracer.emit('step_end', stepEndData, stepId);
+          }
+
           const errorResult: AgentActResult = {
             success: false,
             goal,

diff --git a/src/failure-artifacts.ts b/src/failure-artifacts.ts
@@ -104,6 +104,31 @@ function isFfmpegAvailable(): boolean {
   }
 }
 
+/**
+ * Get ffmpeg version as a tuple [major, minor] or null if unable to determine.
+ * Used to determine which flags to use (e.g., -vsync vs -fps_mode).
+ */
+function getFfmpegVersion(): [number, number] | null {
+  try {
+    const result = spawnSync('ffmpeg', ['-version'], {
+      timeout: 5000,
+      stdio: 'pipe',
+    });
+    if (result.status !== 0) {
+      return null;
+    }
+    const output = result.stdout?.toString('utf-8') || '';
+    // Parse version from output like "ffmpeg version 7.0.1 ..." or "ffmpeg version n7.0.1 ..."
+    const match = output.match(/ffmpeg version [n]?(\d+)\.(\d+)/i);
+    if (match) {
+      return [parseInt(match[1], 10), parseInt(match[2], 10)];
+    }
+    return null;
+  } catch {
+    return null;
+  }
+}
+
 /**
  * Generate an MP4 video clip from a directory of frames using ffmpeg.
  */
@@ -123,7 +148,9 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
   }
 
   // Create a temporary file list for ffmpeg concat demuxer
-  const listFile = path.join(framesDir, 'frames_list.txt');
+  // Use relative path (just filename) since we run ffmpeg with cwd=framesDir
+  const listFile = 'frames_list.txt';
+  const listFilePath = path.join(framesDir, listFile);
   const frameDuration = 1.0 / fps;
 
   try {
@@ -132,7 +159,19 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
       files.map(f => `file '${f}'\nduration ${frameDuration}`).join('\n') +
       `\nfile '${files[files.length - 1]}'`; // ffmpeg concat quirk
 
-    fs.writeFileSync(listFile, listContent);
+    fs.writeFileSync(listFilePath, listContent);
+
+    // Determine which vsync/fps_mode flag to use based on ffmpeg version
+    // -vsync is deprecated in ffmpeg 7.0+, use -fps_mode instead (available since 5.1)
+    const version = getFfmpegVersion();
+    let syncArgs: string[];
+    if (version && (version[0] > 5 || (version[0] === 5 && version[1] >= 1))) {
+      // ffmpeg 5.1+: use -fps_mode
+      syncArgs = ['-fps_mode', 'vfr'];
+    } else {
+      // ffmpeg < 5.1: use legacy -vsync
+      syncArgs = ['-vsync', 'vfr'];
+    }
 
     // Run ffmpeg to generate the clip
     const result = spawnSync(
@@ -145,8 +184,7 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
         '0',
         '-i',
         listFile,
-        '-vsync',
-        'vfr',
+        ...syncArgs,
         '-pix_fmt',
         'yuv420p',
         '-c:v',
@@ -175,7 +213,7 @@ function generateClipFromFrames(framesDir: string, outputPath: string, fps: numb
   } finally {
     // Clean up the list file
     try {
-      fs.unlinkSync(listFile);
+      fs.unlinkSync(listFilePath);
     } catch {
       // ignore
     }

diff --git a/src/utils/trace-event-builder.ts b/src/utils/trace-event-builder.ts
@@ -279,4 +279,119 @@ export class TraceEventBuilder {
 
     return data;
   }
+
+  /**
+   * Build partial step_end event data for failed steps
+   *
+   * This is used when a step fails after collecting some data (snapshot, LLM response, etc.)
+   * but before completing execution. It ensures diff_status and other fields are preserved
+   * in traces even when the agent run fails.
+   *
+   * @param params - Parameters for building partial step_end event
+   * @returns Partial step_end event data
+   */
+  static buildPartialStepEndData(params: {
+    stepId: string;
+    stepIndex: number;
+    goal: string;
+    attempt: number;
+    preUrl: string | null;
+    postUrl: string | null;
+    snapshot?: Snapshot | null;
+    llmResponse?: LLMResponse | null;
+    error: string;
+    durationMs: number;
+  }): TraceEventData {
+    const {
+      stepId,
+      stepIndex,
+      goal,
+      attempt,
+      preUrl,
+      postUrl,
+      snapshot,
+      llmResponse,
+      error,
+      durationMs,
+    } = params;
+
+    // Build pre data
+    const preData: TraceEventData['pre'] = {
+      url: preUrl || undefined,
+      snapshot_digest: snapshot ? this.buildSnapshotDigest(snapshot) : undefined,
+    };
+
+    // Add elements with diff_status if snapshot is available
+    if (snapshot && snapshot.elements.length > 0) {
+      const importanceValues = snapshot.elements.map(el => el.importance);
+      const minImportance = importanceValues.length > 0 ? Math.min(...importanceValues) : 0;
+      const maxImportance = importanceValues.length > 0 ? Math.max(...importanceValues) : 0;
+      const importanceRange = maxImportance - minImportance;
+
+      preData.elements = snapshot.elements.map(el => {
+        let importanceScore: number;
+        if (importanceRange > 0) {
+          importanceScore = (el.importance - minImportance) / importanceRange;
+        } else {
+          importanceScore = 0.5;
+        }
+
+        return {
+          id: el.id,
+          role: el.role,
+          text: el.text,
+          bbox: el.bbox,
+          importance: el.importance,
+          importance_score: importanceScore,
+          visual_cues: el.visual_cues,
+          in_viewport: el.in_viewport,
+          is_occluded: el.is_occluded,
+          z_index: el.z_index,
+          rerank_index: el.rerank_index,
+          heuristic_index: el.heuristic_index,
+          ml_probability: el.ml_probability,
+          ml_score: el.ml_score,
+          diff_status: el.diff_status,
+        };
+      });
+    }
+
+    // Build LLM data if available
+    let llmData: TraceEventData['llm'] | undefined;
+    if (llmResponse) {
+      llmData = this.buildLLMData(llmResponse);
+    }
+
+    // Build exec data for failure
+    const execData: TraceEventData['exec'] = {
+      success: false,
+      action: 'error',
+      outcome: error,
+      duration_ms: durationMs,
+      error: error,
+    };
+
+    // Build verify data for failure
+    const verifyData: TraceEventData['verify'] = {
+      passed: false,
+      signals: {
+        error: error,
+      },
+    };
+
+    return {
+      v: 1,
+      step_id: stepId,
+      step_index: stepIndex,
+      goal: goal,
+      attempt: attempt,
+      pre: preData,
+      llm: llmData,
+      exec: execData,
+      post: {
+        url: postUrl || undefined,
+      },
+      verify: verifyData,
+    };
+  }
 }
diff --git a/src/visual-agent.ts b/src/visual-agent.ts
@@ -596,6 +596,11 @@ Return ONLY the integer ID number from the label, nothing else.`;
 
     const startTime = Date.now();
 
+    // Track data collected during step execution for step_end emission on failure
+    let stepSnapWithDiff: Snapshot | null = null;
+    let stepPreUrl: string | null = null;
+    let stepLlmResponse: LLMResponse | null = null;
+
     try {
       // Ensure screenshot is enabled
       const snapOpts: SnapshotOptions = {
@@ -634,6 +639,10 @@ Return ONLY the integer ID number from the label, nothing else.`;
 
       const snapWithDiff = processed.withDiff;
 
+      // Track for step_end emission on failure
+      stepSnapWithDiff = snapWithDiff;
+      stepPreUrl = snap.url;
+
       // Emit snapshot event
       if (tracer) {
         const snapshotData = SnapshotEventBuilder.buildSnapshotEventData(snapWithDiff, stepId);
@@ -710,6 +719,9 @@ Return ONLY the integer ID number from the label, nothing else.`;
 
       const llmResponse = await this.queryLLMWithVision(labeledImageDataUrl, goal);
 
+      // Track for step_end emission on failure
+      stepLlmResponse = llmResponse;
+
       // Emit LLM query event
       if (tracer) {
         tracer.emit(
@@ -848,6 +860,29 @@ Return ONLY the integer ID number from the label, nothing else.`;
         tracer.emitError(stepId, error.message, 0);
       }
 
+      // Emit step_end with whatever data we collected before failure
+      // This ensures diff_status and other fields are preserved in traces
+      if (tracer && stepSnapWithDiff) {
+        const page = (this as any).browser.getPage();
+        const postUrl = page ? page.url() || null : null;
+        const durationMs = Date.now() - startTime;
+
+        const stepEndData = TraceEventBuilder.buildPartialStepEndData({
+          stepId,
+          stepIndex: stepCount,
+          goal,
+          attempt: 0,
+          preUrl: stepPreUrl,
+          postUrl,
+          snapshot: stepSnapWithDiff,
+          llmResponse: stepLlmResponse,
+          error: error.message,
+          durationMs,
+        });
+
+        tracer.emit('step_end', stepEndData, stepId);
+      }
+
       if ((this as any).verbose) {
         console.log(`❌ Error: ${error.message}`);
       }