diff --git a/src/agent.ts b/src/agent.ts index 6f03701a..5a8f8eed 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -9,7 +9,7 @@ import { click, typeText, press } from './actions'; import { Snapshot, Element, ActionResult } from './types'; import { LLMProvider, LLMResponse } from './llm-provider'; import { Tracer } from './tracing/tracer'; -import { randomUUID } from 'crypto'; +import { randomUUID, createHash } from 'crypto'; /** * Execution result from agent.act() @@ -124,6 +124,28 @@ export class SentienceAgent { } + /** + * Compute SHA256 hash of text + */ + private computeHash(text: string): string { + return createHash('sha256').update(text, 'utf8').digest('hex'); + } + + /** + * Get bounding box for an element from snapshot + */ + private getElementBbox(elementId: number | undefined, snap: Snapshot): { x: number; y: number; width: number; height: number } | undefined { + if (elementId === undefined) return undefined; + const el = snap.elements.find(e => e.id === elementId); + if (!el) return undefined; + return { + x: el.bbox.x, + y: el.bbox.y, + width: el.bbox.width, + height: el.bbox.height, + }; + } + /** * Execute a high-level goal using observe → think → act loop * @param goal - Natural language instruction (e.g., "Click the Sign In button") @@ -288,6 +310,103 @@ export class SentienceAgent { console.log(`${status} Completed in ${durationMs}ms`); } + // Emit step_end event if tracer is enabled + if (this.tracer) { + const preUrl = snap.url; + const postUrl = this.browser.getPage()?.url() || null; + + // Compute snapshot digest (simplified - use URL + timestamp) + const snapshotDigest = `sha256:${this.computeHash(`${preUrl}${snap.timestamp}`)}`; + + // Build LLM data + const llmResponseText = llmResponse.content; + const llmResponseHash = `sha256:${this.computeHash(llmResponseText)}`; + const llmData = { + response_text: llmResponseText, + response_hash: llmResponseHash, + usage: { + prompt_tokens: llmResponse.promptTokens || 0, + completion_tokens: llmResponse.completionTokens || 0, + total_tokens: llmResponse.totalTokens || 0, + }, + }; + + // Build exec data + const execData: any = { + success: result.success, + action: result.action || 'unknown', + outcome: result.outcome || (result.success ? `Action ${result.action || 'unknown'} executed successfully` : `Action ${result.action || 'unknown'} failed`), + duration_ms: durationMs, + }; + + // Add optional exec fields + if (result.elementId !== undefined) { + execData.element_id = result.elementId; + // Add bounding box if element found + const bbox = this.getElementBbox(result.elementId, snap); + if (bbox) { + execData.bounding_box = bbox; + } + } + if (result.text !== undefined) { + execData.text = result.text; + } + if (result.key !== undefined) { + execData.key = result.key; + } + if (result.error !== undefined) { + execData.error = result.error; + } + + // Build verify data (simplified - based on success and url_changed) + const verifyPassed = result.success && (result.urlChanged || result.action !== 'click'); + const verifySignals: any = { + url_changed: result.urlChanged || false, + }; + if (result.error) { + verifySignals.error = result.error; + } + + // Add elements_found array if element was targeted + if (result.elementId !== undefined) { + const bbox = this.getElementBbox(result.elementId, snap); + if (bbox) { + verifySignals.elements_found = [ + { + label: `Element ${result.elementId}`, + bounding_box: bbox, + }, + ]; + } + } + + const verifyData = { + passed: verifyPassed, + signals: verifySignals, + }; + + // Build complete step_end event + const stepEndData = { + v: 1, + step_id: stepId, + step_index: this.stepCount, + goal: goal, + attempt: attempt, + pre: { + url: preUrl, + snapshot_digest: snapshotDigest, + }, + llm: llmData, + exec: execData, + post: { + url: postUrl, + }, + verify: verifyData, + }; + + this.tracer.emit('step_end', stepEndData, stepId); + } + return result; } catch (error: any) { diff --git a/src/tracing/index-schema.ts b/src/tracing/index-schema.ts index d5441ac4..1783331d 100644 --- a/src/tracing/index-schema.ts +++ b/src/tracing/index-schema.ts @@ -6,7 +6,8 @@ export class TraceFileInfo { constructor( public path: string, public size_bytes: number, - public sha256: string + public sha256: string, + public line_count: number | null = null // Number of lines in the trace file ) {} toJSON() { @@ -14,6 +15,7 @@ export class TraceFileInfo { path: this.path, size_bytes: this.size_bytes, sha256: this.sha256, + line_count: this.line_count, }; } } @@ -25,7 +27,11 @@ export class TraceSummary { public event_count: number, public step_count: number, public error_count: number, - public final_url: string | null + public final_url: string | null, + public status: 'success' | 'failure' | 'partial' | 'unknown' | null = null, + public agent_name: string | null = null, // Agent name from run_start event + public duration_ms: number | null = null, // Calculated duration in milliseconds + public counters: { snapshot_count: number; action_count: number; error_count: number } | null = null // Aggregated counters ) {} toJSON() { @@ -36,6 +42,10 @@ export class TraceSummary { step_count: this.step_count, error_count: this.error_count, final_url: this.final_url, + status: this.status, + agent_name: this.agent_name, + duration_ms: this.duration_ms, + counters: this.counters, }; } } @@ -92,7 +102,7 @@ export class StepCounters { } } -export type StepStatus = 'ok' | 'error' | 'partial'; +export type StepStatus = 'success' | 'failure' | 'partial' | 'unknown'; export class StepIndex { constructor( @@ -104,6 +114,7 @@ export class StepIndex { public ts_end: string, public offset_start: number, public offset_end: number, + public line_number: number | null = null, // Line number for byte-range fetching public url_before: string | null, public url_after: string | null, public snapshot_before: SnapshotInfo, @@ -122,6 +133,7 @@ export class StepIndex { ts_end: this.ts_end, offset_start: this.offset_start, offset_end: this.offset_end, + line_number: this.line_number, url_before: this.url_before, url_after: this.url_after, snapshot_before: this.snapshot_before.toJSON(), @@ -152,4 +164,73 @@ export class TraceIndex { steps: this.steps.map((s) => s.toJSON()), }; } + + /** + * Convert to SS format. + * + * Maps SDK field names to frontend expectations: + * - created_at -> generated_at + * - first_ts -> start_time + * - last_ts -> end_time + * - step_index -> step (already 1-based, good!) + * - ts_start -> timestamp + * - Filters out "unknown" status + */ + toSentienceStudioJSON(): any { + // Calculate duration if not already set + let durationMs = this.summary.duration_ms; + if (durationMs === null && this.summary.first_ts && this.summary.last_ts) { + const start = new Date(this.summary.first_ts); + const end = new Date(this.summary.last_ts); + durationMs = end.getTime() - start.getTime(); + } + + // Aggregate counters if not already set + let counters = this.summary.counters; + if (counters === null) { + const snapshotCount = this.steps.reduce((sum, s) => sum + s.counters.snapshots, 0); + const actionCount = this.steps.reduce((sum, s) => sum + s.counters.actions, 0); + counters = { + snapshot_count: snapshotCount, + action_count: actionCount, + error_count: this.summary.error_count, + }; + } + + return { + version: this.version, + run_id: this.run_id, + generated_at: this.created_at, // Renamed from created_at + trace_file: { + path: this.trace_file.path, + size_bytes: this.trace_file.size_bytes, + line_count: this.trace_file.line_count, // Added + }, + summary: { + agent_name: this.summary.agent_name, // Added + total_steps: this.summary.step_count, // Renamed from step_count + status: this.summary.status !== 'unknown' ? this.summary.status : null, // Filter out unknown + start_time: this.summary.first_ts, // Renamed from first_ts + end_time: this.summary.last_ts, // Renamed from last_ts + duration_ms: durationMs, // Added + counters: counters, // Added + }, + steps: this.steps.map((s) => ({ + step: s.step_index, // Already 1-based ✅ + byte_offset: s.offset_start, + line_number: s.line_number, // Added + timestamp: s.ts_start, // Use start time + action: { + type: s.action.type || '', + goal: s.goal, // Move goal into action + digest: s.action.args_digest, + }, + snapshot: s.snapshot_after.url ? { + url: s.snapshot_after.url, + digest: s.snapshot_after.digest, + } : undefined, + status: s.status !== 'unknown' ? s.status : undefined, // Filter out unknown + })), + }; + } } diff --git a/src/tracing/indexer.ts b/src/tracing/indexer.ts index bbde2d36..77c9fef5 100644 --- a/src/tracing/indexer.ts +++ b/src/tracing/indexer.ts @@ -140,16 +140,22 @@ export function buildTraceIndex(tracePath: string): TraceIndex { let eventCount = 0; let errorCount = 0; let finalUrl: string | null = null; + let runEndStatus: string | null = null; // Track status from run_end event + let agentName: string | null = null; // Extract from run_start event + let lineCount = 0; // Track total line count const stepsById: Map = new Map(); const stepOrder: string[] = []; - // Stream through file, tracking byte offsets + // Stream through file, tracking byte offsets and line numbers const fileBuffer = fs.readFileSync(tracePath); let byteOffset = 0; const lines = fileBuffer.toString('utf-8').split('\n'); + let lineNumber = 0; // Track line number for each event for (const line of lines) { + lineNumber++; + lineCount++; const lineBytes = Buffer.byteLength(line + '\n', 'utf-8'); if (!line.trim()) { @@ -183,6 +189,11 @@ export function buildTraceIndex(tracePath: string): TraceIndex { errorCount++; } + // Extract agent_name from run_start event + if (eventType === 'run_start') { + agentName = data.agent || null; + } + // Initialize step if first time seeing this step_id if (!stepsById.has(stepId)) { stepOrder.push(stepId); @@ -192,11 +203,12 @@ export function buildTraceIndex(tracePath: string): TraceIndex { stepOrder.length, stepId, null, - 'partial', + 'failure', // Default to failure (will be updated by step_end event) ts, ts, byteOffset, byteOffset + lineBytes, + lineNumber, // Track line number null, null, new SnapshotInfo(), @@ -212,13 +224,15 @@ export function buildTraceIndex(tracePath: string): TraceIndex { // Update step metadata step.ts_end = ts; step.offset_end = byteOffset + lineBytes; + step.line_number = lineNumber; // Update line number on each event step.counters.events++; // Handle specific event types if (eventType === 'step_start') { step.goal = data.goal; step.url_before = data.pre_url; - } else if (eventType === 'snapshot') { + } else if (eventType === 'snapshot' || eventType === 'snapshot_taken') { + // Handle both "snapshot" (current) and "snapshot_taken" (schema) for backward compatibility const snapshotId = data.snapshot_id; const url = data.url; const digest = computeSnapshotDigest(data); @@ -233,7 +247,8 @@ export function buildTraceIndex(tracePath: string): TraceIndex { step.url_after = url; step.counters.snapshots++; finalUrl = url; - } else if (eventType === 'action') { + } else if (eventType === 'action' || eventType === 'action_executed') { + // Handle both "action" (current) and "action_executed" (schema) for backward compatibility step.action = new ActionInfo( data.type, data.target_element_id, @@ -241,19 +256,91 @@ export function buildTraceIndex(tracePath: string): TraceIndex { data.success !== false ); step.counters.actions++; - } else if (eventType === 'llm_response') { + } else if (eventType === 'llm_response' || eventType === 'llm_called') { + // Handle both "llm_response" (current) and "llm_called" (schema) for backward compatibility step.counters.llm_calls++; } else if (eventType === 'error') { - step.status = 'error'; + step.status = 'failure'; } else if (eventType === 'step_end') { - if (step.status !== 'error') { - step.status = 'ok'; + // Determine status from step_end event data + // Frontend expects: success, failure, or partial + // Logic: success = exec.success && verify.passed + // partial = exec.success && !verify.passed + // failure = !exec.success + const execData = data.exec || {}; + const verifyData = data.verify || {}; + + const execSuccess = execData.success === true; + const verifyPassed = verifyData.passed === true; + + if (execSuccess && verifyPassed) { + step.status = 'success'; + } else if (execSuccess && !verifyPassed) { + step.status = 'partial'; + } else if (!execSuccess) { + step.status = 'failure'; + } else { + // Fallback: if step_end exists but no exec/verify data, default to failure + step.status = 'failure'; + } + } else if (eventType === 'run_end') { + // Extract status from run_end event (will be used for summary) + runEndStatus = data.status; + // Validate status value + if (runEndStatus && !['success', 'failure', 'partial', 'unknown'].includes(runEndStatus)) { + runEndStatus = null; } } byteOffset += lineBytes; } + // Use run_end status if available, otherwise infer from step statuses + let summaryStatus: 'success' | 'failure' | 'partial' | 'unknown' | null = null; + if (runEndStatus) { + summaryStatus = runEndStatus as 'success' | 'failure' | 'partial' | 'unknown'; + } else { + const stepStatuses = Array.from(stepsById.values()).map(s => s.status); + if (stepStatuses.length > 0) { + // Infer overall status from step statuses + if (stepStatuses.every(s => s === 'success')) { + summaryStatus = 'success'; + } else if (stepStatuses.some(s => s === 'failure')) { + // If any failure and no successes, it's failure; otherwise partial + if (stepStatuses.some(s => s === 'success')) { + summaryStatus = 'partial'; + } else { + summaryStatus = 'failure'; + } + } else if (stepStatuses.some(s => s === 'partial')) { + summaryStatus = 'partial'; + } else { + summaryStatus = 'failure'; // Default to failure instead of unknown + } + } else { + summaryStatus = 'failure'; // Default to failure instead of unknown + } + } + + // Calculate duration + let durationMs: number | null = null; + if (firstTs && lastTs) { + const start = new Date(firstTs); + const end = new Date(lastTs); + durationMs = end.getTime() - start.getTime(); + } + + // Aggregate counters + const snapshotCount = Array.from(stepsById.values()) + .reduce((sum, s) => sum + s.counters.snapshots, 0); + const actionCount = Array.from(stepsById.values()) + .reduce((sum, s) => sum + s.counters.actions, 0); + const counters = { + snapshot_count: snapshotCount, + action_count: actionCount, + error_count: errorCount, + }; + // Build summary const summary = new TraceSummary( firstTs, @@ -261,7 +348,11 @@ export function buildTraceIndex(tracePath: string): TraceIndex { eventCount, stepsById.size, errorCount, - finalUrl + finalUrl, + summaryStatus, + agentName, + durationMs, + counters ); // Build steps list in order @@ -271,7 +362,8 @@ export function buildTraceIndex(tracePath: string): TraceIndex { const traceFile = new TraceFileInfo( tracePath, fs.statSync(tracePath).size, - computeFileSha256(tracePath) + computeFileSha256(tracePath), + lineCount ); // Build final index @@ -289,15 +381,26 @@ export function buildTraceIndex(tracePath: string): TraceIndex { /** * Build index and write to file + * @param tracePath - Path to trace JSONL file + * @param indexPath - Optional custom path for index file + * @param frontendFormat - If true, write in frontend-compatible format (default: false) */ -export function writeTraceIndex(tracePath: string, indexPath?: string): string { +export function writeTraceIndex( + tracePath: string, + indexPath?: string, + frontendFormat: boolean = false +): string { if (!indexPath) { indexPath = tracePath.replace(/\.jsonl$/, '.index.json'); } const index = buildTraceIndex(tracePath); - fs.writeFileSync(indexPath, JSON.stringify(index.toJSON(), null, 2)); + if (frontendFormat) { + fs.writeFileSync(indexPath, JSON.stringify(index.toSentienceStudioJSON(), null, 2)); + } else { + fs.writeFileSync(indexPath, JSON.stringify(index.toJSON(), null, 2)); + } return indexPath; } diff --git a/tests/actions.test.ts b/tests/actions.test.ts index 7f2442f4..f609107c 100644 --- a/tests/actions.test.ts +++ b/tests/actions.test.ts @@ -12,7 +12,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const snap = await snapshot(browser); const link = find(snap, 'role=link'); @@ -33,7 +33,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const snap = await snapshot(browser); const link = find(snap, 'role=link'); @@ -56,7 +56,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const snap = await snapshot(browser); const link = find(snap, 'role=link'); @@ -81,7 +81,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const snap = await snapshot(browser); const textbox = find(snap, 'role=textbox'); @@ -103,7 +103,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const result = await press(browser, 'Enter'); expect(result.success).toBe(true); @@ -120,7 +120,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); // Click at a specific rectangle (top-left area) const result = await clickRect(browser, { x: 100, y: 100, w: 50, h: 30 }); @@ -137,7 +137,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); // Get an element and click its bbox const snap = await snapshot(browser); @@ -164,7 +164,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const result = await clickRect(browser, { x: 100, y: 100, w: 50, h: 30 }, false); expect(result.success).toBe(true); @@ -179,7 +179,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); // Invalid: zero width const result1 = await clickRect(browser, { x: 100, y: 100, w: 0, h: 30 }); @@ -202,7 +202,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const result = await clickRect(browser, { x: 100, y: 100, w: 50, h: 30 }, true, 2.0, true); expect(result.success).toBe(true); @@ -219,7 +219,7 @@ describe('Actions', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const result = await clickRect(browser, { x: 100, y: 100, width: 50, height: 30 }); expect(result.success).toBe(true); diff --git a/tests/browser.test.ts b/tests/browser.test.ts index c52de0f2..29811af0 100644 --- a/tests/browser.test.ts +++ b/tests/browser.test.ts @@ -177,7 +177,7 @@ describe('Browser Proxy Support', () => { // Verify viewport is preserved const page = browser.getPage(); await page.goto('https://example.com'); - await page.waitForLoadState('networkidle'); + await page.waitForLoadState('networkidle', { timeout: 10000 }); const viewportSize = await page.evaluate(() => ({ width: window.innerWidth, @@ -228,7 +228,7 @@ describe('Browser Proxy Support', () => { // Test that we can use it await page.goto('https://example.com'); - await page.waitForLoadState('networkidle'); + await page.waitForLoadState('networkidle', { timeout: 10000 }); // Verify viewport is preserved const viewportSize = await page.evaluate(() => ({ diff --git a/tests/generator.test.ts b/tests/generator.test.ts index 508e1ed4..022854bd 100644 --- a/tests/generator.test.ts +++ b/tests/generator.test.ts @@ -14,7 +14,7 @@ describe('ScriptGenerator', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); @@ -41,7 +41,7 @@ describe('ScriptGenerator', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); @@ -67,7 +67,7 @@ describe('ScriptGenerator', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); @@ -94,7 +94,7 @@ describe('ScriptGenerator', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); diff --git a/tests/inspector.test.ts b/tests/inspector.test.ts index 45a133d8..94c346ad 100644 --- a/tests/inspector.test.ts +++ b/tests/inspector.test.ts @@ -11,7 +11,7 @@ describe('Inspector', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const inspector = inspect(browser); await inspector.start(); diff --git a/tests/read.test.ts b/tests/read.test.ts index cc55e7db..ba8445c8 100644 --- a/tests/read.test.ts +++ b/tests/read.test.ts @@ -10,7 +10,7 @@ describe('read', () => { const browser = await createTestBrowser(); try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const result = await read(browser, { format: 'text' }); @@ -29,7 +29,7 @@ describe('read', () => { const browser = await createTestBrowser(); try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const result = await read(browser, { format: 'markdown' }); @@ -48,7 +48,7 @@ describe('read', () => { const browser = await createTestBrowser(); try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); // Test with enhancement (default) const resultEnhanced = await read(browser, { diff --git a/tests/recorder.test.ts b/tests/recorder.test.ts index 498d0e2f..6eae550b 100644 --- a/tests/recorder.test.ts +++ b/tests/recorder.test.ts @@ -14,7 +14,7 @@ describe('Recorder', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); @@ -33,7 +33,7 @@ describe('Recorder', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); @@ -57,7 +57,7 @@ describe('Recorder', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); @@ -81,7 +81,7 @@ describe('Recorder', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); @@ -103,7 +103,7 @@ describe('Recorder', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const rec = record(browser); rec.start(); diff --git a/tests/screenshot.test.ts b/tests/screenshot.test.ts index 101e5f82..7252dbc8 100644 --- a/tests/screenshot.test.ts +++ b/tests/screenshot.test.ts @@ -10,7 +10,7 @@ describe('screenshot', () => { const browser = await createTestBrowser(); try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const dataUrl = await screenshot(browser, { format: 'png' }); @@ -29,7 +29,7 @@ describe('screenshot', () => { const browser = await createTestBrowser(); try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 80 }); @@ -48,7 +48,7 @@ describe('screenshot', () => { const browser = await createTestBrowser(); try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const dataUrl = await screenshot(browser); @@ -62,7 +62,7 @@ describe('screenshot', () => { const browser = await createTestBrowser(); try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); // Valid quality await screenshot(browser, { format: 'jpeg', quality: 50 }); // Should not throw diff --git a/tests/snapshot.test.ts b/tests/snapshot.test.ts index 2101bbb4..42be93e0 100644 --- a/tests/snapshot.test.ts +++ b/tests/snapshot.test.ts @@ -11,7 +11,7 @@ describe('Snapshot', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const snap = await snapshot(browser); @@ -29,7 +29,7 @@ describe('Snapshot', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const snap = await snapshot(browser); @@ -51,7 +51,7 @@ describe('Snapshot', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); // Test snapshot with goal const snap = await snapshot(browser, { goal: 'Find the main heading' }); diff --git a/tests/tracing/indexer.test.ts b/tests/tracing/indexer.test.ts index 81561475..c6ff1dfd 100644 --- a/tests/tracing/indexer.test.ts +++ b/tests/tracing/indexer.test.ts @@ -60,7 +60,18 @@ describe('Trace Indexing', () => { type: 'step_end', ts: '2025-12-29T10:00:02.000Z', step_id: 'step-1', - data: {}, + data: { + v: 1, + step_id: 'step-1', + step_index: 1, + goal: 'Test goal', + attempt: 0, + pre: { url: 'https://example.com', snapshot_digest: 'sha256:test' }, + llm: { response_text: 'CLICK(42)', response_hash: 'sha256:test' }, + exec: { success: true, action: 'click', outcome: 'Action executed', duration_ms: 100 }, + post: { url: 'https://example.com' }, + verify: { passed: true, signals: {} }, + }, }, ]; @@ -77,7 +88,7 @@ describe('Trace Indexing', () => { expect(step.step_id).toBe('step-1'); expect(step.step_index).toBe(1); expect(step.goal).toBe('Test goal'); - expect(step.status).toBe('ok'); + expect(step.status).toBe('success'); expect(step.counters.events).toBe(3); expect(step.counters.actions).toBe(1); expect(step.offset_start).toBe(0); @@ -394,6 +405,24 @@ describe('Trace Indexing', () => { step_id: 'step-1', data: { message: 'Something failed' }, }, + { + v: 1, + type: 'step_end', + ts: '2025-12-29T10:00:02.000Z', + step_id: 'step-1', + data: { + v: 1, + step_id: 'step-1', + step_index: 1, + goal: 'Test goal', + attempt: 0, + pre: { url: 'https://example.com', snapshot_digest: 'sha256:test' }, + llm: { response_text: 'CLICK(42)', response_hash: 'sha256:test' }, + exec: { success: false, action: 'click', outcome: 'Action failed', duration_ms: 100 }, + post: { url: 'https://example.com' }, + verify: { passed: false, signals: {} }, + }, + }, ]; fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); @@ -401,7 +430,7 @@ describe('Trace Indexing', () => { const index = buildTraceIndex(tracePath); expect(index.summary.error_count).toBe(1); - expect(index.steps[0].status).toBe('error'); + expect(index.steps[0].status).toBe('failure'); // Updated to match new status format }); it('should count LLM calls correctly', () => { @@ -488,5 +517,309 @@ describe('Trace Indexing', () => { expect(indexData.summary).toBeDefined(); expect(indexData.steps).toBeDefined(); }); + + it('should track line numbers for each step', () => { + const tracePath = path.join(tmpDir, 'line-numbers.jsonl'); + + const events = [ + { + v: 1, + type: 'run_start', + ts: '2025-12-29T10:00:00.000Z', + data: { agent: 'TestAgent', llm_model: 'gpt-4' }, + }, + { + v: 1, + type: 'step_start', + ts: '2025-12-29T10:00:01.000Z', + step_id: 'step-1', + data: { goal: 'Test goal' }, + }, + { + v: 1, + type: 'action', + ts: '2025-12-29T10:00:02.000Z', + step_id: 'step-1', + data: { type: 'CLICK' }, + }, + ]; + + fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); + + const index = buildTraceIndex(tracePath); + + // run_start creates synthetic step-0 on line 1, step-1 has events on lines 2-3 + expect(index.steps.length).toBeGreaterThanOrEqual(2); + // Find step-1 (skip synthetic step-0 from run_start) + const step1 = index.steps.find((s) => s.step_id === 'step-1'); + expect(step1).toBeDefined(); + expect(step1!.line_number).toBe(3); // Last event (action) is on line 3 + expect(index.trace_file.line_count).toBeGreaterThanOrEqual(3); // May include trailing newline + }); + + it('should extract agent name from run_start event', () => { + const tracePath = path.join(tmpDir, 'agent-name.jsonl'); + + const events = [ + { + v: 1, + type: 'run_start', + ts: '2025-12-29T10:00:00.000Z', + data: { agent: 'MyTestAgent', llm_model: 'gpt-4' }, + }, + { + v: 1, + type: 'run_end', + ts: '2025-12-29T10:00:01.000Z', + data: {}, + }, + ]; + + fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); + + const index = buildTraceIndex(tracePath); + + expect(index.summary.agent_name).toBe('MyTestAgent'); + }); + + it('should calculate duration_ms from timestamps', () => { + const tracePath = path.join(tmpDir, 'duration.jsonl'); + + const events = [ + { + v: 1, + type: 'run_start', + ts: '2025-12-29T10:00:00.000Z', + data: {}, + }, + { + v: 1, + type: 'run_end', + ts: '2025-12-29T10:01:30.000Z', // 90 seconds later + data: {}, + }, + ]; + + fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); + + const index = buildTraceIndex(tracePath); + + expect(index.summary.duration_ms).toBe(90000); // 90 seconds = 90000ms + }); + + it('should aggregate counters across all steps', () => { + const tracePath = path.join(tmpDir, 'counters.jsonl'); + + const events = [ + { + v: 1, + type: 'step_start', + ts: '2025-12-29T10:00:00.000Z', + step_id: 'step-1', + data: {}, + }, + { + v: 1, + type: 'snapshot', + ts: '2025-12-29T10:00:01.000Z', + step_id: 'step-1', + data: { url: 'https://example.com' }, + }, + { + v: 1, + type: 'action', + ts: '2025-12-29T10:00:02.000Z', + step_id: 'step-1', + data: { type: 'CLICK' }, + }, + { + v: 1, + type: 'step_start', + ts: '2025-12-29T10:00:03.000Z', + step_id: 'step-2', + data: {}, + }, + { + v: 1, + type: 'snapshot', + ts: '2025-12-29T10:00:04.000Z', + step_id: 'step-2', + data: { url: 'https://example.com' }, + }, + { + v: 1, + type: 'action', + ts: '2025-12-29T10:00:05.000Z', + step_id: 'step-2', + data: { type: 'TYPE' }, + }, + ]; + + fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); + + const index = buildTraceIndex(tracePath); + + expect(index.summary.counters).toBeDefined(); + expect(index.summary.counters?.snapshot_count).toBe(2); + expect(index.summary.counters?.action_count).toBe(2); + expect(index.summary.counters?.error_count).toBe(0); + }); + + it('should default step status to failure when no step_end', () => { + const tracePath = path.join(tmpDir, 'default-status.jsonl'); + + const events = [ + { + v: 1, + type: 'step_start', + ts: '2025-12-29T10:00:00.000Z', + step_id: 'step-1', + data: {}, + }, + // No step_end event - should default to failure + ]; + + fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); + + const index = buildTraceIndex(tracePath); + + expect(index.steps[0].status).toBe('failure'); + }); + + it('should produce frontend-compatible format with toFrontendJSON', () => { + const tracePath = path.join(tmpDir, 'frontend-format.jsonl'); + + const events = [ + { + v: 1, + type: 'run_start', + ts: '2025-12-29T10:00:00.000Z', + data: { agent: 'TestAgent' }, + }, + { + v: 1, + type: 'step_start', + ts: '2025-12-29T10:00:01.000Z', + step_id: 'step-1', + data: { goal: 'Test goal' }, + }, + { + v: 1, + type: 'step_end', + ts: '2025-12-29T10:00:02.000Z', + step_id: 'step-1', + data: { + v: 1, + step_id: 'step-1', + step_index: 1, + goal: 'Test goal', + attempt: 0, + pre: { snapshot_digest: 'sha256:test' }, + llm: { response_text: 'CLICK(42)', response_hash: 'sha256:test' }, + exec: { success: true, action: 'click', outcome: 'Action executed', duration_ms: 100 }, + post: { url: 'https://example.com' }, + verify: { passed: true, signals: {} }, + }, + }, + { + v: 1, + type: 'run_end', + ts: '2025-12-29T10:00:03.000Z', + data: { steps: 1, status: 'success' }, + }, + ]; + + fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); + + const index = buildTraceIndex(tracePath); + const frontendJSON = index.toSentienceStudioJSON(); + + // Check field name mappings + expect(frontendJSON.generated_at).toBeDefined(); // Renamed from created_at + expect(frontendJSON.trace_file).toBeDefined(); + expect(frontendJSON.trace_file.line_count).toBeGreaterThanOrEqual(4); + expect(frontendJSON.summary).toBeDefined(); + expect(frontendJSON.summary.agent_name).toBe('TestAgent'); + expect(frontendJSON.summary.total_steps).toBeGreaterThanOrEqual(1); // Includes synthetic step + expect(frontendJSON.summary.start_time).toBeDefined(); // Renamed from first_ts + expect(frontendJSON.summary.end_time).toBeDefined(); // Renamed from last_ts + expect(frontendJSON.summary.duration_ms).toBeGreaterThan(0); + expect(frontendJSON.summary.counters).toBeDefined(); + expect(frontendJSON.steps).toBeDefined(); + // Find step-1 (skip synthetic step-0 from run_start) + const step1 = frontendJSON.steps.find((s: any) => s.action?.goal === 'Test goal'); + expect(step1).toBeDefined(); + expect(step1.step).toBeGreaterThanOrEqual(1); // Converted from 0-based to 1-based + expect(step1.line_number).toBeGreaterThanOrEqual(2); + expect(step1.status).toBe('success'); + expect(step1.action).toBeDefined(); + expect(step1.action.goal).toBe('Test goal'); // Goal moved into action + }); + + it('should write index in frontend format when frontendFormat=true', () => { + const tracePath = path.join(tmpDir, 'test.jsonl'); + + const event = { + v: 1, + type: 'run_start', + ts: '2025-12-29T10:00:00.000Z', + data: { agent: 'TestAgent' }, + }; + + fs.writeFileSync(tracePath, JSON.stringify(event) + '\n'); + + const indexPath = writeTraceIndex(tracePath, undefined, true); // frontendFormat = true + + const indexData = JSON.parse(fs.readFileSync(indexPath, 'utf-8')); + + // Check frontend format fields + expect(indexData.generated_at).toBeDefined(); // Frontend format + expect(indexData.summary).toBeDefined(); + expect(indexData.summary.agent_name).toBe('TestAgent'); + }); + + it('should handle both old and new event type names', () => { + const tracePath = path.join(tmpDir, 'event-types.jsonl'); + + const events = [ + { + v: 1, + type: 'step_start', + ts: '2025-12-29T10:00:00.000Z', + step_id: 'step-1', + data: {}, + }, + { + v: 1, + type: 'snapshot_taken', // New schema name + ts: '2025-12-29T10:00:01.000Z', + step_id: 'step-1', + data: { url: 'https://example.com' }, + }, + { + v: 1, + type: 'action_executed', // New schema name + ts: '2025-12-29T10:00:02.000Z', + step_id: 'step-1', + data: { type: 'CLICK' }, + }, + { + v: 1, + type: 'llm_called', // New schema name + ts: '2025-12-29T10:00:03.000Z', + step_id: 'step-1', + data: {}, + }, + ]; + + fs.writeFileSync(tracePath, events.map((e) => JSON.stringify(e)).join('\n') + '\n'); + + const index = buildTraceIndex(tracePath); + + // Should process all events correctly + expect(index.steps[0].counters.snapshots).toBe(1); + expect(index.steps[0].counters.actions).toBe(1); + expect(index.steps[0].counters.llm_calls).toBe(1); + }); }); }); diff --git a/tests/video-recording.test.ts b/tests/video-recording.test.ts index 4d5fb5ca..4b28d23b 100644 --- a/tests/video-recording.test.ts +++ b/tests/video-recording.test.ts @@ -201,7 +201,7 @@ describe('video recording', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const outputPath = path.join(videoDir, `video_${i}.webm`); const videoPath = await browser.close(outputPath); @@ -250,7 +250,7 @@ describe('video recording', () => { try { await browser.getPage().goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); + await browser.getPage().waitForLoadState('networkidle', { timeout: 10000 }); const videoPath = await browser.close();