diff --git a/package.json b/package.json index ed90a2a4..3c52481f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sentienceapi", - "version": "0.90.17", + "version": "0.90.19", "description": "TypeScript SDK for Sentience AI Agent Browser Automation", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/tracing/cloud-sink.ts b/src/tracing/cloud-sink.ts index bef4b4c8..7dbbfdde 100644 --- a/src/tracing/cloud-sink.ts +++ b/src/tracing/cloud-sink.ts @@ -79,6 +79,7 @@ export class CloudTraceSink extends TraceSink { private traceFileSizeBytes: number = 0; private screenshotTotalSizeBytes: number = 0; private screenshotCount: number = 0; // Track number of screenshots extracted + private indexFileSizeBytes: number = 0; // Track index file size // Upload success flag private uploadSuccessful: boolean = false; @@ -332,7 +333,164 @@ export class CloudTraceSink extends TraceSink { } /** - * Call /v1/traces/complete to report file sizes to gateway. + * Infer final status from trace events by reading the trace file. + * @returns Final status: "success", "failure", "partial", or "unknown" + */ + private _inferFinalStatusFromTrace(): string { + try { + // Read trace file to analyze events + const traceContent = fs.readFileSync(this.tempFilePath, 'utf-8'); + const lines = traceContent.split('\n').filter(line => line.trim()); + const events: any[] = []; + + for (const line of lines) { + try { + const event = JSON.parse(line); + events.push(event); + } catch { + continue; + } + } + + if (events.length === 0) { + return 'unknown'; + } + + // Check for run_end event with status + for (let i = events.length - 1; i >= 0; i--) { + const event = events[i]; + if (event.type === 'run_end') { + const status = event.data?.status; + if (['success', 'failure', 'partial', 'unknown'].includes(status)) { + return status; + } + } + } + + // Infer from error events + const hasErrors = events.some(e => e.type === 'error'); + if (hasErrors) { + // Check if there are successful steps too (partial success) + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + return 'partial'; + } + return 'failure'; + } + + // If we have step_end events and no errors, likely success + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + return 'success'; + } + + return 'unknown'; + } catch { + // If we can't read the trace, default to unknown + return 'unknown'; + } + } + + /** + * Extract execution statistics from trace file. + * @returns Dictionary with stats fields for /v1/traces/complete + */ + private _extractStatsFromTrace(): Record { + try { + // Read trace file to extract stats + const traceContent = fs.readFileSync(this.tempFilePath, 'utf-8'); + const lines = traceContent.split('\n').filter(line => line.trim()); + const events: any[] = []; + + for (const line of lines) { + try { + const event = JSON.parse(line); + events.push(event); + } catch { + continue; + } + } + + if (events.length === 0) { + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + + // Find run_start and run_end events + const runStart = events.find(e => e.type === 'run_start'); + const runEnd = events.find(e => e.type === 'run_end'); + + // Extract timestamps + const startedAt = runStart?.ts || null; + const endedAt = runEnd?.ts || null; + + // Calculate duration + let durationMs: number | null = null; + if (startedAt && endedAt) { + try { + const startDt = new Date(startedAt); + const endDt = new Date(endedAt); + durationMs = endDt.getTime() - startDt.getTime(); + } catch { + // Ignore parse errors + } + } + + // Count steps (from step_start events, only first attempt) + const stepIndices = new Set(); + for (const event of events) { + if (event.type === 'step_start') { + const stepIndex = event.data?.step_index; + if (stepIndex !== undefined) { + stepIndices.add(stepIndex); + } + } + } + let totalSteps = stepIndices.size; + + // If run_end has steps count, use that (more accurate) + if (runEnd) { + const stepsFromEnd = runEnd.data?.steps; + if (stepsFromEnd !== undefined) { + totalSteps = Math.max(totalSteps, stepsFromEnd); + } + } + + // Count total events + const totalEvents = events.length; + + // Infer final status + const finalStatus = this._inferFinalStatusFromTrace(); + + return { + total_steps: totalSteps, + total_events: totalEvents, + duration_ms: durationMs, + final_status: finalStatus, + started_at: startedAt, + ended_at: endedAt, + }; + } catch (error: any) { + this.logger?.warn(`Error extracting stats from trace: ${error.message}`); + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + } + + /** + * Call /v1/traces/complete to report file sizes and stats to gateway. * * This is a best-effort call - failures are logged but don't affect upload success. */ @@ -346,13 +504,21 @@ export class CloudTraceSink extends TraceSink { const url = new URL(`${this.apiUrl}/v1/traces/complete`); const protocol = url.protocol === 'https:' ? https : http; + // Extract stats from trace file + const stats = this._extractStatsFromTrace(); + + // Add file size fields + const completeStats = { + ...stats, + trace_file_size_bytes: this.traceFileSizeBytes, + screenshot_total_size_bytes: this.screenshotTotalSizeBytes, + screenshot_count: this.screenshotCount, + index_file_size_bytes: this.indexFileSizeBytes, + }; + const body = JSON.stringify({ run_id: this.runId, - stats: { - trace_file_size_bytes: this.traceFileSizeBytes, - screenshot_total_size_bytes: this.screenshotTotalSizeBytes, - screenshot_count: this.screenshotCount, - }, + stats: completeStats, }); const options = { @@ -447,6 +613,7 @@ export class CloudTraceSink extends TraceSink { const indexData = await fsPromises.readFile(indexPath); const compressedIndex = zlib.gzipSync(indexData); const indexSize = compressedIndex.length; + this.indexFileSizeBytes = indexSize; // Track index file size this.logger?.info(`Index file size: ${(indexSize / 1024).toFixed(2)} KB`); if (this.logger) { diff --git a/src/tracing/jsonl-sink.ts b/src/tracing/jsonl-sink.ts index 97ee62e5..942b76f9 100644 --- a/src/tracing/jsonl-sink.ts +++ b/src/tracing/jsonl-sink.ts @@ -115,17 +115,43 @@ export class JsonlTraceSink extends TraceSink { stream.removeAllListeners('error'); return new Promise((resolve) => { + // Check if stream is already closed + if (stream.destroyed || !stream.writable) { + // Stream already closed, generate index and resolve immediately + this.generateIndex(); + resolve(); + return; + } + + let resolved = false; + const doResolve = () => { + if (!resolved) { + resolved = true; + clearTimeout(timeout); + // Generate index after closing file + this.generateIndex(); + resolve(); + } + }; + + // Fallback timeout in case 'close' event doesn't fire (shouldn't happen, but safety) + const timeout = setTimeout(() => { + if (!resolved) { + doResolve(); + } + }, 500); + + // Wait for stream to fully close (Windows needs this) + // The 'close' event fires after all data is flushed and file handle is released + stream.once('close', doResolve); + stream.end((err?: Error | null) => { if (err) { // Silently ignore close errors in production // (they're logged during stream lifetime if needed) } - - // Generate index after closing file - this.generateIndex(); - - // Always resolve, don't reject on close errors - resolve(); + // Note: 'close' event will fire after end() completes + // Don't resolve here - wait for 'close' event }); }); } @@ -163,4 +189,124 @@ export class JsonlTraceSink extends TraceSink { isClosed(): boolean { return this.closed; } + + /** + * Extract execution statistics from trace file (for local traces). + * @returns Dictionary with stats fields (same format as Tracer.getStats()) + */ + getStats(): Record { + try { + // Read trace file to extract stats + const traceContent = fs.readFileSync(this.path, 'utf-8'); + const lines = traceContent.split('\n').filter(line => line.trim()); + const events: any[] = []; + + for (const line of lines) { + try { + const event = JSON.parse(line); + events.push(event); + } catch { + continue; + } + } + + if (events.length === 0) { + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + + // Find run_start and run_end events + const runStart = events.find(e => e.type === 'run_start'); + const runEnd = events.find(e => e.type === 'run_end'); + + // Extract timestamps + const startedAt = runStart?.ts || null; + const endedAt = runEnd?.ts || null; + + // Calculate duration + let durationMs: number | null = null; + if (startedAt && endedAt) { + try { + const startDt = new Date(startedAt); + const endDt = new Date(endedAt); + durationMs = endDt.getTime() - startDt.getTime(); + } catch { + // Ignore parse errors + } + } + + // Count steps (from step_start events, only first attempt) + const stepIndices = new Set(); + for (const event of events) { + if (event.type === 'step_start') { + const stepIndex = event.data?.step_index; + if (stepIndex !== undefined) { + stepIndices.add(stepIndex); + } + } + } + let totalSteps = stepIndices.size; + + // If run_end has steps count, use that (more accurate) + if (runEnd) { + const stepsFromEnd = runEnd.data?.steps; + if (stepsFromEnd !== undefined) { + totalSteps = Math.max(totalSteps, stepsFromEnd); + } + } + + // Count total events + const totalEvents = events.length; + + // Infer final status + let finalStatus = 'unknown'; + // Check for run_end event with status + if (runEnd) { + const status = runEnd.data?.status; + if (['success', 'failure', 'partial', 'unknown'].includes(status)) { + finalStatus = status; + } + } else { + // Infer from error events + const hasErrors = events.some(e => e.type === 'error'); + if (hasErrors) { + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + finalStatus = 'partial'; + } else { + finalStatus = 'failure'; + } + } else { + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + finalStatus = 'success'; + } + } + } + + return { + total_steps: totalSteps, + total_events: totalEvents, + duration_ms: durationMs, + final_status: finalStatus, + started_at: startedAt, + ended_at: endedAt, + }; + } catch { + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + } } diff --git a/src/tracing/tracer-factory.ts b/src/tracing/tracer-factory.ts index a3155e4d..12a69f2b 100644 --- a/src/tracing/tracer-factory.ts +++ b/src/tracing/tracer-factory.ts @@ -94,8 +94,17 @@ async function recoverOrphanedTraces(apiKey: string, apiUrl: string = SENTIENCE_ const sink = new CloudTraceSink(response.data.upload_url, runId); await sink.close(); // This will upload the existing file console.log(`✅ [Sentience] Uploaded orphaned trace: ${runId}`); + } else if (response.status === 409) { + // HTTP 409 means trace already exists (already uploaded) + // Treat as success and delete local file + console.log(`✅ [Sentience] Trace ${runId} already exists in cloud (skipping re-upload)`); + try { + fs.unlinkSync(filePath); + } catch { + // Ignore cleanup errors + } } - // Silently skip failures - don't log errors for orphan recovery + // Silently skip other failures - don't log errors for orphan recovery // These are expected in many scenarios (network issues, invalid API keys, etc.) } catch (error: any) { // Silently skip failures - don't log errors for orphan recovery diff --git a/src/tracing/tracer.ts b/src/tracing/tracer.ts index a20e8691..297e81f0 100644 --- a/src/tracing/tracer.ts +++ b/src/tracing/tracer.ts @@ -14,6 +14,17 @@ export class Tracer { private runId: string; private sink: TraceSink; private seq: number; + + // Stats tracking + private totalSteps: number = 0; + private totalEvents: number = 0; + private startedAt: Date | null = null; + private endedAt: Date | null = null; + private finalStatus: string = 'unknown'; + // Track step outcomes for automatic status inference + private stepSuccesses: number = 0; + private stepFailures: number = 0; + private hasErrors: boolean = false; /** * Create a new Tracer @@ -38,6 +49,7 @@ export class Tracer { stepId?: string ): void { this.seq += 1; + this.totalEvents += 1; // Generate timestamps const tsMs = Date.now(); @@ -58,6 +70,18 @@ export class Tracer { } this.sink.emit(event); + + // Track step outcomes for automatic status inference + if (eventType === 'step_end') { + const success = (data as any).success || false; + if (success) { + this.stepSuccesses += 1; + } else { + this.stepFailures += 1; + } + } else if (eventType === 'error') { + this.hasErrors = true; + } } /** @@ -71,6 +95,9 @@ export class Tracer { llmModel?: string, config?: Record ): void { + // Track start time + this.startedAt = new Date(); + const data: TraceEventData = { agent }; if (llmModel) data.llm_model = llmModel; if (config) data.config = config; @@ -93,6 +120,11 @@ export class Tracer { attempt: number = 0, preUrl?: string ): void { + // Track step count (only count first attempt of each step) + if (attempt === 0) { + this.totalSteps = Math.max(this.totalSteps, stepIndex); + } + const data: TraceEventData = { step_id: stepId, step_index: stepIndex, @@ -110,9 +142,25 @@ export class Tracer { /** * Emit run_end event * @param steps - Total number of steps executed + * @param status - Optional final status ("success", "failure", "partial", "unknown") + * If not provided, infers from tracked outcomes or uses this.finalStatus */ - emitRunEnd(steps: number): void { - this.emit('run_end', { steps }); + emitRunEnd(steps: number, status?: string): void { + // Track end time + this.endedAt = new Date(); + + // Auto-infer status if not provided and not explicitly set + if (status === undefined && this.finalStatus === 'unknown') { + this._inferFinalStatus(); + } + + // Use provided status or fallback to this.finalStatus + const finalStatus = status || this.finalStatus; + + // Ensure totalSteps is at least the provided steps value + this.totalSteps = Math.max(this.totalSteps, steps); + + this.emit('run_end', { steps, status: finalStatus }); } /** @@ -125,11 +173,44 @@ export class Tracer { this.emit('error', { step_id: stepId, error, attempt }, stepId); } + /** + * Automatically infer finalStatus from tracked step outcomes if not explicitly set. + * This is called automatically in close() if finalStatus is still "unknown". + */ + private _inferFinalStatus(): void { + if (this.finalStatus !== 'unknown') { + // Status already set explicitly, don't override + return; + } + + // Infer from tracked outcomes + if (this.hasErrors) { + // Has errors - check if there were successful steps too + if (this.stepSuccesses > 0) { + this.finalStatus = 'partial'; + } else { + this.finalStatus = 'failure'; + } + } else if (this.stepSuccesses > 0) { + // Has successful steps and no errors + this.finalStatus = 'success'; + } + // Otherwise stays "unknown" (no steps executed or no clear outcome) + } + /** * Close the underlying sink (flush buffered data) * @param blocking - If false, upload happens in background (default: true). Only applies to CloudTraceSink. */ async close(blocking: boolean = true): Promise { + // Auto-infer finalStatus if not explicitly set and we have step outcomes + if ( + this.finalStatus === 'unknown' && + (this.stepSuccesses > 0 || this.stepFailures > 0 || this.hasErrors) + ) { + this._inferFinalStatus(); + } + // Check if sink has a close method that accepts blocking parameter if (typeof (this.sink as any).close === 'function' && (this.sink as any).close.length > 0) { await (this.sink as any).close(blocking); @@ -158,4 +239,37 @@ export class Tracer { getSinkType(): string { return this.sink.getSinkType(); } + + /** + * Set the final status of the trace run + * @param status - Final status ("success", "failure", "partial", "unknown") + */ + setFinalStatus(status: string): void { + if (!['success', 'failure', 'partial', 'unknown'].includes(status)) { + throw new Error( + `Invalid status: ${status}. Must be one of: success, failure, partial, unknown` + ); + } + this.finalStatus = status; + } + + /** + * Get execution statistics for trace completion + * @returns Dictionary with stats fields for /v1/traces/complete + */ + getStats(): Record { + let durationMs: number | null = null; + if (this.startedAt && this.endedAt) { + durationMs = this.endedAt.getTime() - this.startedAt.getTime(); + } + + return { + total_steps: this.totalSteps, + total_events: this.totalEvents, + duration_ms: durationMs, + final_status: this.finalStatus, + started_at: this.startedAt ? this.startedAt.toISOString() : null, + ended_at: this.endedAt ? this.endedAt.toISOString() : null, + }; + } } diff --git a/tests/tracing/cloud-sink.test.ts b/tests/tracing/cloud-sink.test.ts index 64324b97..923a1c35 100644 --- a/tests/tracing/cloud-sink.test.ts +++ b/tests/tracing/cloud-sink.test.ts @@ -372,6 +372,8 @@ describe('CloudTraceSink', () => { res.writeHead(200); res.end('OK'); } else if (req.url === '/v1/traces/complete') { + // Store completion request body for verification + (indexServer as any).lastCompleteRequest = JSON.parse(requestBody.toString()); // Accept completion call res.writeHead(200); res.end('OK'); @@ -545,5 +547,79 @@ describe('CloudTraceSink', () => { // (This is expected behavior - we clean up after upload) // If the test runs fast enough, file might still exist briefly }); + + it('should include all required stats fields in completion request', async () => { + const runId = 'test-complete-stats-' + Date.now(); + const apiUrl = `http://localhost:${indexServerPort}`; + + const sink = new CloudTraceSink( + uploadUrl, + runId, + 'sk_test_123', + apiUrl + ); + + // Emit events with timestamps + const startTime = new Date().toISOString(); + sink.emit({ + v: 1, + type: 'run_start', + ts: startTime, + run_id: runId, + seq: 1, + data: { agent: 'TestAgent' }, + }); + + sink.emit({ + v: 1, + type: 'step_start', + ts: startTime, + run_id: runId, + seq: 2, + step_id: 'step-1', + data: { step_id: 'step-1', step_index: 1, goal: 'Test', attempt: 0 }, + }); + + const endTime = new Date().toISOString(); + sink.emit({ + v: 1, + type: 'run_end', + ts: endTime, + run_id: runId, + seq: 3, + data: { steps: 1, status: 'success' }, + }); + + await sink.close(); + + // Give async operations time to complete + await new Promise(resolve => setTimeout(resolve, 200)); + + // Verify completion request was made + const completeRequest = (indexServer as any).lastCompleteRequest; + expect(completeRequest).toBeDefined(); + expect(completeRequest.run_id).toBe(runId); + + const stats = completeRequest.stats; + expect(stats).toBeDefined(); + + // Verify all required fields are present + expect(stats.trace_file_size_bytes).toBeDefined(); + expect(stats.screenshot_total_size_bytes).toBeDefined(); + expect(stats.screenshot_count).toBeDefined(); + expect(stats.index_file_size_bytes).toBeDefined(); + expect(stats.total_steps).toBeDefined(); + expect(stats.total_steps).toBe(1); + expect(stats.total_events).toBeDefined(); + expect(stats.total_events).toBe(3); + expect(stats.duration_ms).toBeDefined(); + expect(stats.duration_ms).not.toBeNull(); + expect(stats.final_status).toBeDefined(); + expect(stats.final_status).toBe('success'); + expect(stats.started_at).toBeDefined(); + expect(stats.started_at).not.toBeNull(); + expect(stats.ended_at).toBeDefined(); + expect(stats.ended_at).not.toBeNull(); + }); }); }); diff --git a/tests/tracing/jsonl-sink.test.ts b/tests/tracing/jsonl-sink.test.ts index 46dbd4df..2ee35ebc 100644 --- a/tests/tracing/jsonl-sink.test.ts +++ b/tests/tracing/jsonl-sink.test.ts @@ -8,35 +8,72 @@ import { JsonlTraceSink } from '../../src/tracing/jsonl-sink'; describe('JsonlTraceSink', () => { const testDir = path.join(__dirname, 'test-traces'); - const testFile = path.join(testDir, 'test.jsonl'); + // Use unique filename for each test to avoid Windows file locking issues + let testFile: string; + + /** + * Helper function to read file with retry logic for Windows EPERM errors + * Windows file handles may not be released immediately after close() + */ + async function readFileWithRetry(filePath: string, maxAttempts: number = 10): Promise { + let attempts = 0; + while (attempts < maxAttempts) { + try { + return fs.readFileSync(filePath, 'utf-8'); + } catch (err: any) { + if (err.code === 'EPERM' && attempts < maxAttempts - 1) { + // File still locked, wait and retry + await new Promise(resolve => setTimeout(resolve, 50)); + attempts++; + } else { + throw err; // Re-throw if not EPERM or max attempts reached + } + } + } + throw new Error(`Failed to read file after ${maxAttempts} attempts`); + } + + beforeEach(async () => { + // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) + await new Promise(resolve => setTimeout(resolve, 150)); + + // Generate unique filename for this test to avoid Windows file locking issues + const uniqueId = Math.random().toString(36).substring(7); + testFile = path.join(testDir, `trace-${uniqueId}.jsonl`); - beforeEach(() => { - // Clean up test directory - if (fs.existsSync(testDir)) { - fs.rmSync(testDir, { recursive: true, force: true }); + // Ensure directory exists + if (!fs.existsSync(testDir)) { + fs.mkdirSync(testDir, { recursive: true }); } }); afterEach(async () => { - // Wait a bit for file handles to close (Windows needs this) - await new Promise(resolve => setTimeout(resolve, 100)); + // Wait longer for file handles to close (Windows needs more time) + await new Promise(resolve => setTimeout(resolve, 200)); - // Clean up test directory with retry logic for Windows - if (fs.existsSync(testDir)) { - // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 5; i++) { - try { - fs.rmSync(testDir, { recursive: true, force: true }); - break; // Success - } catch (err: any) { - if (i === 4) { - // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); - } else { - // Wait before retry - await new Promise(resolve => setTimeout(resolve, 50)); + // Clean up the specific file for this test + if (testFile) { + try { + if (fs.existsSync(testFile)) { + // Retry deletion on Windows (file may still be locked) + for (let i = 0; i < 5; i++) { + try { + fs.unlinkSync(testFile); + break; // Success + } catch (err: any) { + if (i === 4) { + // Last attempt failed, log but don't throw + console.warn(`Could not delete ${testFile}:`, err); + } else { + // Wait before retry + await new Promise(resolve => setTimeout(resolve, 50)); + } + } } } + } catch (err: any) { + // Ignore cleanup errors - don't fail tests + console.warn(`Could not delete ${testFile}:`, err); } } }); @@ -54,8 +91,10 @@ describe('JsonlTraceSink', () => { sink.emit({ type: 'test2', data: 'world' }); await sink.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const lines = content.trim().split('\n'); expect(lines.length).toBe(2); @@ -79,8 +118,10 @@ describe('JsonlTraceSink', () => { const sink2 = new JsonlTraceSink(testFile); sink2.emit({ seq: 2 }); await sink2.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const lines = content.trim().split('\n'); expect(lines.length).toBe(2); @@ -164,8 +205,10 @@ describe('JsonlTraceSink', () => { sink.emit(complexEvent); await sink.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const parsed = JSON.parse(content.trim()); expect(parsed).toEqual(complexEvent); diff --git a/tests/tracing/tracer.test.ts b/tests/tracing/tracer.test.ts index 953f4112..1255c4ff 100644 --- a/tests/tracing/tracer.test.ts +++ b/tests/tracing/tracer.test.ts @@ -11,36 +11,72 @@ import { TraceEvent } from '../../src/tracing/types'; describe('Tracer', () => { const testDir = path.join(__dirname, 'test-traces'); - const testFile = path.join(testDir, 'tracer-test.jsonl'); + // Use unique filename for each test to avoid Windows file locking issues + let testFile: string; + + /** + * Helper function to read file with retry logic for Windows EPERM errors + * Windows file handles may not be released immediately after close() + */ + async function readFileWithRetry(filePath: string, maxAttempts: number = 10): Promise { + let attempts = 0; + while (attempts < maxAttempts) { + try { + return fs.readFileSync(filePath, 'utf-8'); + } catch (err: any) { + if (err.code === 'EPERM' && attempts < maxAttempts - 1) { + // File still locked, wait and retry + await new Promise(resolve => setTimeout(resolve, 50)); + attempts++; + } else { + throw err; // Re-throw if not EPERM or max attempts reached + } + } + } + throw new Error(`Failed to read file after ${maxAttempts} attempts`); + } + + beforeEach(async () => { + // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) + await new Promise(resolve => setTimeout(resolve, 150)); - beforeEach(() => { - // Clean up and recreate test directory - if (fs.existsSync(testDir)) { - fs.rmSync(testDir, { recursive: true, force: true }); + // Generate unique filename for this test to avoid Windows file locking issues + const uniqueId = Math.random().toString(36).substring(7); + testFile = path.join(testDir, `trace-${uniqueId}.jsonl`); + + // Ensure directory exists + if (!fs.existsSync(testDir)) { + fs.mkdirSync(testDir, { recursive: true }); } - fs.mkdirSync(testDir, { recursive: true }); }); afterEach(async () => { - // Wait a bit for file handles to close (Windows needs this) - await new Promise(resolve => setTimeout(resolve, 100)); + // Wait longer for file handles to close (Windows needs more time) + await new Promise(resolve => setTimeout(resolve, 200)); - // Clean up test directory with retry logic for Windows - if (fs.existsSync(testDir)) { - // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 5; i++) { - try { - fs.rmSync(testDir, { recursive: true, force: true }); - break; // Success - } catch (err: any) { - if (i === 4) { - // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); - } else { - // Wait before retry - await new Promise(resolve => setTimeout(resolve, 50)); + // Clean up the specific file for this test + if (testFile) { + try { + if (fs.existsSync(testFile)) { + // Retry deletion on Windows (file may still be locked) + for (let i = 0; i < 5; i++) { + try { + fs.unlinkSync(testFile); + break; // Success + } catch (err: any) { + if (i === 4) { + // Last attempt failed, log but don't throw + console.warn(`Could not delete ${testFile}:`, err); + } else { + // Wait before retry + await new Promise(resolve => setTimeout(resolve, 50)); + } + } } } + } catch (err: any) { + // Ignore cleanup errors - don't fail tests + console.warn(`Could not delete ${testFile}:`, err); } } }); @@ -90,7 +126,7 @@ describe('Tracer', () => { throw new Error(`Trace file not created: ${testFile}`); } - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.ts).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/); @@ -105,8 +141,10 @@ describe('Tracer', () => { tracer.emit('test_event', { key: 'value' }, 'step-123'); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.v).toBe(1); @@ -126,8 +164,10 @@ describe('Tracer', () => { tracer.emit('test_event', { key: 'value' }); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.step_id).toBeUndefined(); @@ -142,8 +182,10 @@ describe('Tracer', () => { tracer.emitRunStart('SentienceAgent', 'gpt-4o', { timeout: 30000 }); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('run_start'); @@ -160,8 +202,10 @@ describe('Tracer', () => { tracer.emitRunStart('SentienceAgent'); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('run_start'); @@ -177,8 +221,10 @@ describe('Tracer', () => { tracer.emitStepStart('step-001', 1, 'Click the button', 0, 'https://example.com'); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('step_start'); @@ -197,8 +243,10 @@ describe('Tracer', () => { tracer.emitStepStart('step-002', 2, 'Type text'); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('step_start'); @@ -213,15 +261,105 @@ describe('Tracer', () => { tracer.emitRunEnd(5); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('run_end'); expect(event.data.steps).toBe(5); + expect(event.data.status).toBe('unknown'); // Default status expect(event.step_id).toBeUndefined(); }); + it('should emit run_end event with status parameter', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunEnd(5, 'success'); + + await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); + + const content = await readFileWithRetry(testFile); + const event = JSON.parse(content.trim()) as TraceEvent; + + expect(event.type).toBe('run_end'); + expect(event.data.steps).toBe(5); + expect(event.data.status).toBe('success'); + }); + + it('should track execution statistics', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + // Emit run_start (should track startedAt) + tracer.emitRunStart('TestAgent', 'gpt-4'); + expect(tracer.getStats().started_at).not.toBeNull(); + expect(tracer.getStats().total_events).toBe(1); + + // Emit step_start (should track totalSteps) + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + expect(tracer.getStats().total_steps).toBe(1); + expect(tracer.getStats().total_events).toBe(2); + + tracer.emitStepStart('step-2', 2, 'Goal 2', 0); + expect(tracer.getStats().total_steps).toBe(2); + expect(tracer.getStats().total_events).toBe(3); + + // Emit run_end (should track endedAt) + tracer.emitRunEnd(2); + expect(tracer.getStats().ended_at).not.toBeNull(); + expect(tracer.getStats().total_events).toBe(4); + + // Get stats + const stats = tracer.getStats(); + expect(stats.total_steps).toBe(2); + expect(stats.total_events).toBe(4); + expect(stats.final_status).toBe('unknown'); + expect(stats.started_at).not.toBeNull(); + expect(stats.ended_at).not.toBeNull(); + expect(stats.duration_ms).not.toBeNull(); + expect(stats.duration_ms).toBeGreaterThanOrEqual(0); + + await tracer.close(); + }); + + it('should set final status', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + // Default status is "unknown" + expect(tracer.getStats().final_status).toBe('unknown'); + + // Set status + tracer.setFinalStatus('success'); + expect(tracer.getStats().final_status).toBe('success'); + + // Status should be included in run_end + tracer.emitRunEnd(1); + + await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); + + const content = await readFileWithRetry(testFile); + const event = JSON.parse(content.trim()) as TraceEvent; + expect(event.data.status).toBe('success'); + }); + + it('should reject invalid final status', () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + // Invalid status should throw error + expect(() => { + tracer.setFinalStatus('invalid' as any); + }).toThrow('Invalid status'); + }); + it('should emit error event', async () => { const sink = new JsonlTraceSink(testFile); const tracer = new Tracer('test-run', sink); @@ -229,8 +367,10 @@ describe('Tracer', () => { tracer.emitError('step-003', 'Element not found', 2); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('error'); @@ -247,8 +387,10 @@ describe('Tracer', () => { tracer.emitError('step-004', 'Timeout'); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.data.attempt).toBe(0); @@ -270,8 +412,10 @@ describe('Tracer', () => { tracer.emitRunEnd(2); await tracer.close(); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const lines = content.trim().split('\n'); const events = lines.map(line => JSON.parse(line) as TraceEvent); @@ -359,4 +503,184 @@ describe('Tracer', () => { tracer.close(); }); }); + + describe('Automatic status inference', () => { + it('should automatically infer final status from step outcomes', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Emit another successful step + tracer.emitStepStart('step-2', 2, 'Goal 2', 0); + tracer.emit('step_end', { success: true, action: 'type' }, 'step-2'); + + // Close without explicitly setting status or calling emitRunEnd + // Status should be auto-inferred as "success" + await tracer.close(); + + // Verify status was auto-inferred + expect(tracer.getStats().final_status).toBe('success'); + + // Verify stats reflect the inferred status + const stats = tracer.getStats(); + expect(stats.final_status).toBe('success'); + expect(stats.total_steps).toBe(2); + }); + + it('should automatically infer "partial" status when there are both successes and errors', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Emit error + tracer.emitError('step-2', 'Element not found', 0); + + // Close without explicitly setting status + await tracer.close(); + + // Verify status was auto-inferred as "partial" (has both successes and errors) + expect(tracer.getStats().final_status).toBe('partial'); + }); + + it('should automatically infer "failure" status when there are only errors', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit error without any successful steps + tracer.emitError('step-1', 'Element not found', 0); + + // Close without explicitly setting status + await tracer.close(); + + // Verify status was auto-inferred as "failure" (only errors, no successes) + expect(tracer.getStats().final_status).toBe('failure'); + }); + + it('should not override explicitly set status', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Explicitly set status to "partial" (even though we have success) + tracer.setFinalStatus('partial'); + + // Close - should not override explicit status + await tracer.close(); + + // Verify explicit status was preserved + expect(tracer.getStats().final_status).toBe('partial'); + }); + + it('should set final_status automatically when close() is called', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful steps + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + tracer.emitStepStart('step-2', 2, 'Goal 2', 0); + tracer.emit('step_end', { success: true, action: 'type' }, 'step-2'); + + // Verify status is still "unknown" before close + expect(tracer.getStats().final_status).toBe('unknown'); + + // Close should auto-infer status + await tracer.close(); + + // Verify status was auto-inferred after close + expect(tracer.getStats().final_status).toBe('success'); + expect(tracer.getStats().total_steps).toBe(2); + }); + + it('should include auto-inferred final_status in run_end event when emitRunEnd is called', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Verify status is still "unknown" before emitRunEnd + expect(tracer.getStats().final_status).toBe('unknown'); + + // emitRunEnd should auto-infer status if not provided + tracer.emitRunEnd(1); + + // Verify status was auto-inferred + expect(tracer.getStats().final_status).toBe('success'); + + // Close the tracer + await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); + + // Read trace file and verify run_end event has the inferred status + const content = await readFileWithRetry(testFile); + const lines = content.trim().split('\n'); + const runEndEvents = lines + .map(line => JSON.parse(line)) + .filter((event: any) => event.type === 'run_end'); + + expect(runEndEvents.length).toBeGreaterThan(0); + // The run_end event should have the auto-inferred status + const lastRunEnd = runEndEvents[runEndEvents.length - 1]; + expect(lastRunEnd.data.status).toBe('success'); + }); + + it('should include auto-inferred final_status in stats when close() is called with CloudTraceSink', async () => { + const { CloudTraceSink } = await import('../../src/tracing/cloud-sink'); + + const uploadUrl = 'https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz'; + const runId = 'test-close-status-' + Date.now(); + const apiKey = 'sk_test_123'; + const apiUrl = 'https://api.sentience.ai'; + + const sink = new CloudTraceSink(uploadUrl, runId, apiKey, apiUrl); + const tracer = new Tracer(runId, sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Verify status is still "unknown" before close + expect(tracer.getStats().final_status).toBe('unknown'); + + // Note: We don't actually call close() here because it would try to upload + // Instead, we verify that the status inference logic works correctly + // The actual upload and completion request are tested in cloud-sink.test.ts + + // Manually trigger the inference logic (simulating what close() does) + if (tracer.getStats().final_status === 'unknown') { + // Access private method via type assertion (for testing only) + (tracer as any)._inferFinalStatus(); + } + + // Verify status was auto-inferred + expect(tracer.getStats().final_status).toBe('success'); + }); + }); });