From 37a1a48717c259186a63f57d93b611cf80c2ceea Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 18:44:36 -0800 Subject: [PATCH 1/7] set or infer final trace status --- package.json | 2 +- src/tracing/cloud-sink.ts | 179 ++++++++++++++++++++- src/tracing/jsonl-sink.ts | 120 ++++++++++++++ src/tracing/tracer.ts | 118 +++++++++++++- tests/tracing/cloud-sink.test.ts | 76 +++++++++ tests/tracing/tracer.test.ts | 262 +++++++++++++++++++++++++++++++ 6 files changed, 748 insertions(+), 9 deletions(-) diff --git a/package.json b/package.json index ed90a2a4..3c52481f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sentienceapi", - "version": "0.90.17", + "version": "0.90.19", "description": "TypeScript SDK for Sentience AI Agent Browser Automation", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/tracing/cloud-sink.ts b/src/tracing/cloud-sink.ts index bef4b4c8..7dbbfdde 100644 --- a/src/tracing/cloud-sink.ts +++ b/src/tracing/cloud-sink.ts @@ -79,6 +79,7 @@ export class CloudTraceSink extends TraceSink { private traceFileSizeBytes: number = 0; private screenshotTotalSizeBytes: number = 0; private screenshotCount: number = 0; // Track number of screenshots extracted + private indexFileSizeBytes: number = 0; // Track index file size // Upload success flag private uploadSuccessful: boolean = false; @@ -332,7 +333,164 @@ export class CloudTraceSink extends TraceSink { } /** - * Call /v1/traces/complete to report file sizes to gateway. + * Infer final status from trace events by reading the trace file. + * @returns Final status: "success", "failure", "partial", or "unknown" + */ + private _inferFinalStatusFromTrace(): string { + try { + // Read trace file to analyze events + const traceContent = fs.readFileSync(this.tempFilePath, 'utf-8'); + const lines = traceContent.split('\n').filter(line => line.trim()); + const events: any[] = []; + + for (const line of lines) { + try { + const event = JSON.parse(line); + events.push(event); + } catch { + continue; + } + } + + if (events.length === 0) { + return 'unknown'; + } + + // Check for run_end event with status + for (let i = events.length - 1; i >= 0; i--) { + const event = events[i]; + if (event.type === 'run_end') { + const status = event.data?.status; + if (['success', 'failure', 'partial', 'unknown'].includes(status)) { + return status; + } + } + } + + // Infer from error events + const hasErrors = events.some(e => e.type === 'error'); + if (hasErrors) { + // Check if there are successful steps too (partial success) + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + return 'partial'; + } + return 'failure'; + } + + // If we have step_end events and no errors, likely success + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + return 'success'; + } + + return 'unknown'; + } catch { + // If we can't read the trace, default to unknown + return 'unknown'; + } + } + + /** + * Extract execution statistics from trace file. + * @returns Dictionary with stats fields for /v1/traces/complete + */ + private _extractStatsFromTrace(): Record { + try { + // Read trace file to extract stats + const traceContent = fs.readFileSync(this.tempFilePath, 'utf-8'); + const lines = traceContent.split('\n').filter(line => line.trim()); + const events: any[] = []; + + for (const line of lines) { + try { + const event = JSON.parse(line); + events.push(event); + } catch { + continue; + } + } + + if (events.length === 0) { + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + + // Find run_start and run_end events + const runStart = events.find(e => e.type === 'run_start'); + const runEnd = events.find(e => e.type === 'run_end'); + + // Extract timestamps + const startedAt = runStart?.ts || null; + const endedAt = runEnd?.ts || null; + + // Calculate duration + let durationMs: number | null = null; + if (startedAt && endedAt) { + try { + const startDt = new Date(startedAt); + const endDt = new Date(endedAt); + durationMs = endDt.getTime() - startDt.getTime(); + } catch { + // Ignore parse errors + } + } + + // Count steps (from step_start events, only first attempt) + const stepIndices = new Set(); + for (const event of events) { + if (event.type === 'step_start') { + const stepIndex = event.data?.step_index; + if (stepIndex !== undefined) { + stepIndices.add(stepIndex); + } + } + } + let totalSteps = stepIndices.size; + + // If run_end has steps count, use that (more accurate) + if (runEnd) { + const stepsFromEnd = runEnd.data?.steps; + if (stepsFromEnd !== undefined) { + totalSteps = Math.max(totalSteps, stepsFromEnd); + } + } + + // Count total events + const totalEvents = events.length; + + // Infer final status + const finalStatus = this._inferFinalStatusFromTrace(); + + return { + total_steps: totalSteps, + total_events: totalEvents, + duration_ms: durationMs, + final_status: finalStatus, + started_at: startedAt, + ended_at: endedAt, + }; + } catch (error: any) { + this.logger?.warn(`Error extracting stats from trace: ${error.message}`); + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + } + + /** + * Call /v1/traces/complete to report file sizes and stats to gateway. * * This is a best-effort call - failures are logged but don't affect upload success. */ @@ -346,13 +504,21 @@ export class CloudTraceSink extends TraceSink { const url = new URL(`${this.apiUrl}/v1/traces/complete`); const protocol = url.protocol === 'https:' ? https : http; + // Extract stats from trace file + const stats = this._extractStatsFromTrace(); + + // Add file size fields + const completeStats = { + ...stats, + trace_file_size_bytes: this.traceFileSizeBytes, + screenshot_total_size_bytes: this.screenshotTotalSizeBytes, + screenshot_count: this.screenshotCount, + index_file_size_bytes: this.indexFileSizeBytes, + }; + const body = JSON.stringify({ run_id: this.runId, - stats: { - trace_file_size_bytes: this.traceFileSizeBytes, - screenshot_total_size_bytes: this.screenshotTotalSizeBytes, - screenshot_count: this.screenshotCount, - }, + stats: completeStats, }); const options = { @@ -447,6 +613,7 @@ export class CloudTraceSink extends TraceSink { const indexData = await fsPromises.readFile(indexPath); const compressedIndex = zlib.gzipSync(indexData); const indexSize = compressedIndex.length; + this.indexFileSizeBytes = indexSize; // Track index file size this.logger?.info(`Index file size: ${(indexSize / 1024).toFixed(2)} KB`); if (this.logger) { diff --git a/src/tracing/jsonl-sink.ts b/src/tracing/jsonl-sink.ts index 97ee62e5..f5060457 100644 --- a/src/tracing/jsonl-sink.ts +++ b/src/tracing/jsonl-sink.ts @@ -163,4 +163,124 @@ export class JsonlTraceSink extends TraceSink { isClosed(): boolean { return this.closed; } + + /** + * Extract execution statistics from trace file (for local traces). + * @returns Dictionary with stats fields (same format as Tracer.getStats()) + */ + getStats(): Record { + try { + // Read trace file to extract stats + const traceContent = fs.readFileSync(this.path, 'utf-8'); + const lines = traceContent.split('\n').filter(line => line.trim()); + const events: any[] = []; + + for (const line of lines) { + try { + const event = JSON.parse(line); + events.push(event); + } catch { + continue; + } + } + + if (events.length === 0) { + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + + // Find run_start and run_end events + const runStart = events.find(e => e.type === 'run_start'); + const runEnd = events.find(e => e.type === 'run_end'); + + // Extract timestamps + const startedAt = runStart?.ts || null; + const endedAt = runEnd?.ts || null; + + // Calculate duration + let durationMs: number | null = null; + if (startedAt && endedAt) { + try { + const startDt = new Date(startedAt); + const endDt = new Date(endedAt); + durationMs = endDt.getTime() - startDt.getTime(); + } catch { + // Ignore parse errors + } + } + + // Count steps (from step_start events, only first attempt) + const stepIndices = new Set(); + for (const event of events) { + if (event.type === 'step_start') { + const stepIndex = event.data?.step_index; + if (stepIndex !== undefined) { + stepIndices.add(stepIndex); + } + } + } + let totalSteps = stepIndices.size; + + // If run_end has steps count, use that (more accurate) + if (runEnd) { + const stepsFromEnd = runEnd.data?.steps; + if (stepsFromEnd !== undefined) { + totalSteps = Math.max(totalSteps, stepsFromEnd); + } + } + + // Count total events + const totalEvents = events.length; + + // Infer final status + let finalStatus = 'unknown'; + // Check for run_end event with status + if (runEnd) { + const status = runEnd.data?.status; + if (['success', 'failure', 'partial', 'unknown'].includes(status)) { + finalStatus = status; + } + } else { + // Infer from error events + const hasErrors = events.some(e => e.type === 'error'); + if (hasErrors) { + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + finalStatus = 'partial'; + } else { + finalStatus = 'failure'; + } + } else { + const stepEnds = events.filter(e => e.type === 'step_end'); + if (stepEnds.length > 0) { + finalStatus = 'success'; + } + } + } + + return { + total_steps: totalSteps, + total_events: totalEvents, + duration_ms: durationMs, + final_status: finalStatus, + started_at: startedAt, + ended_at: endedAt, + }; + } catch { + return { + total_steps: 0, + total_events: 0, + duration_ms: null, + final_status: 'unknown', + started_at: null, + ended_at: null, + }; + } + } } diff --git a/src/tracing/tracer.ts b/src/tracing/tracer.ts index a20e8691..297e81f0 100644 --- a/src/tracing/tracer.ts +++ b/src/tracing/tracer.ts @@ -14,6 +14,17 @@ export class Tracer { private runId: string; private sink: TraceSink; private seq: number; + + // Stats tracking + private totalSteps: number = 0; + private totalEvents: number = 0; + private startedAt: Date | null = null; + private endedAt: Date | null = null; + private finalStatus: string = 'unknown'; + // Track step outcomes for automatic status inference + private stepSuccesses: number = 0; + private stepFailures: number = 0; + private hasErrors: boolean = false; /** * Create a new Tracer @@ -38,6 +49,7 @@ export class Tracer { stepId?: string ): void { this.seq += 1; + this.totalEvents += 1; // Generate timestamps const tsMs = Date.now(); @@ -58,6 +70,18 @@ export class Tracer { } this.sink.emit(event); + + // Track step outcomes for automatic status inference + if (eventType === 'step_end') { + const success = (data as any).success || false; + if (success) { + this.stepSuccesses += 1; + } else { + this.stepFailures += 1; + } + } else if (eventType === 'error') { + this.hasErrors = true; + } } /** @@ -71,6 +95,9 @@ export class Tracer { llmModel?: string, config?: Record ): void { + // Track start time + this.startedAt = new Date(); + const data: TraceEventData = { agent }; if (llmModel) data.llm_model = llmModel; if (config) data.config = config; @@ -93,6 +120,11 @@ export class Tracer { attempt: number = 0, preUrl?: string ): void { + // Track step count (only count first attempt of each step) + if (attempt === 0) { + this.totalSteps = Math.max(this.totalSteps, stepIndex); + } + const data: TraceEventData = { step_id: stepId, step_index: stepIndex, @@ -110,9 +142,25 @@ export class Tracer { /** * Emit run_end event * @param steps - Total number of steps executed + * @param status - Optional final status ("success", "failure", "partial", "unknown") + * If not provided, infers from tracked outcomes or uses this.finalStatus */ - emitRunEnd(steps: number): void { - this.emit('run_end', { steps }); + emitRunEnd(steps: number, status?: string): void { + // Track end time + this.endedAt = new Date(); + + // Auto-infer status if not provided and not explicitly set + if (status === undefined && this.finalStatus === 'unknown') { + this._inferFinalStatus(); + } + + // Use provided status or fallback to this.finalStatus + const finalStatus = status || this.finalStatus; + + // Ensure totalSteps is at least the provided steps value + this.totalSteps = Math.max(this.totalSteps, steps); + + this.emit('run_end', { steps, status: finalStatus }); } /** @@ -125,11 +173,44 @@ export class Tracer { this.emit('error', { step_id: stepId, error, attempt }, stepId); } + /** + * Automatically infer finalStatus from tracked step outcomes if not explicitly set. + * This is called automatically in close() if finalStatus is still "unknown". + */ + private _inferFinalStatus(): void { + if (this.finalStatus !== 'unknown') { + // Status already set explicitly, don't override + return; + } + + // Infer from tracked outcomes + if (this.hasErrors) { + // Has errors - check if there were successful steps too + if (this.stepSuccesses > 0) { + this.finalStatus = 'partial'; + } else { + this.finalStatus = 'failure'; + } + } else if (this.stepSuccesses > 0) { + // Has successful steps and no errors + this.finalStatus = 'success'; + } + // Otherwise stays "unknown" (no steps executed or no clear outcome) + } + /** * Close the underlying sink (flush buffered data) * @param blocking - If false, upload happens in background (default: true). Only applies to CloudTraceSink. */ async close(blocking: boolean = true): Promise { + // Auto-infer finalStatus if not explicitly set and we have step outcomes + if ( + this.finalStatus === 'unknown' && + (this.stepSuccesses > 0 || this.stepFailures > 0 || this.hasErrors) + ) { + this._inferFinalStatus(); + } + // Check if sink has a close method that accepts blocking parameter if (typeof (this.sink as any).close === 'function' && (this.sink as any).close.length > 0) { await (this.sink as any).close(blocking); @@ -158,4 +239,37 @@ export class Tracer { getSinkType(): string { return this.sink.getSinkType(); } + + /** + * Set the final status of the trace run + * @param status - Final status ("success", "failure", "partial", "unknown") + */ + setFinalStatus(status: string): void { + if (!['success', 'failure', 'partial', 'unknown'].includes(status)) { + throw new Error( + `Invalid status: ${status}. Must be one of: success, failure, partial, unknown` + ); + } + this.finalStatus = status; + } + + /** + * Get execution statistics for trace completion + * @returns Dictionary with stats fields for /v1/traces/complete + */ + getStats(): Record { + let durationMs: number | null = null; + if (this.startedAt && this.endedAt) { + durationMs = this.endedAt.getTime() - this.startedAt.getTime(); + } + + return { + total_steps: this.totalSteps, + total_events: this.totalEvents, + duration_ms: durationMs, + final_status: this.finalStatus, + started_at: this.startedAt ? this.startedAt.toISOString() : null, + ended_at: this.endedAt ? this.endedAt.toISOString() : null, + }; + } } diff --git a/tests/tracing/cloud-sink.test.ts b/tests/tracing/cloud-sink.test.ts index 64324b97..923a1c35 100644 --- a/tests/tracing/cloud-sink.test.ts +++ b/tests/tracing/cloud-sink.test.ts @@ -372,6 +372,8 @@ describe('CloudTraceSink', () => { res.writeHead(200); res.end('OK'); } else if (req.url === '/v1/traces/complete') { + // Store completion request body for verification + (indexServer as any).lastCompleteRequest = JSON.parse(requestBody.toString()); // Accept completion call res.writeHead(200); res.end('OK'); @@ -545,5 +547,79 @@ describe('CloudTraceSink', () => { // (This is expected behavior - we clean up after upload) // If the test runs fast enough, file might still exist briefly }); + + it('should include all required stats fields in completion request', async () => { + const runId = 'test-complete-stats-' + Date.now(); + const apiUrl = `http://localhost:${indexServerPort}`; + + const sink = new CloudTraceSink( + uploadUrl, + runId, + 'sk_test_123', + apiUrl + ); + + // Emit events with timestamps + const startTime = new Date().toISOString(); + sink.emit({ + v: 1, + type: 'run_start', + ts: startTime, + run_id: runId, + seq: 1, + data: { agent: 'TestAgent' }, + }); + + sink.emit({ + v: 1, + type: 'step_start', + ts: startTime, + run_id: runId, + seq: 2, + step_id: 'step-1', + data: { step_id: 'step-1', step_index: 1, goal: 'Test', attempt: 0 }, + }); + + const endTime = new Date().toISOString(); + sink.emit({ + v: 1, + type: 'run_end', + ts: endTime, + run_id: runId, + seq: 3, + data: { steps: 1, status: 'success' }, + }); + + await sink.close(); + + // Give async operations time to complete + await new Promise(resolve => setTimeout(resolve, 200)); + + // Verify completion request was made + const completeRequest = (indexServer as any).lastCompleteRequest; + expect(completeRequest).toBeDefined(); + expect(completeRequest.run_id).toBe(runId); + + const stats = completeRequest.stats; + expect(stats).toBeDefined(); + + // Verify all required fields are present + expect(stats.trace_file_size_bytes).toBeDefined(); + expect(stats.screenshot_total_size_bytes).toBeDefined(); + expect(stats.screenshot_count).toBeDefined(); + expect(stats.index_file_size_bytes).toBeDefined(); + expect(stats.total_steps).toBeDefined(); + expect(stats.total_steps).toBe(1); + expect(stats.total_events).toBeDefined(); + expect(stats.total_events).toBe(3); + expect(stats.duration_ms).toBeDefined(); + expect(stats.duration_ms).not.toBeNull(); + expect(stats.final_status).toBeDefined(); + expect(stats.final_status).toBe('success'); + expect(stats.started_at).toBeDefined(); + expect(stats.started_at).not.toBeNull(); + expect(stats.ended_at).toBeDefined(); + expect(stats.ended_at).not.toBeNull(); + }); }); }); diff --git a/tests/tracing/tracer.test.ts b/tests/tracing/tracer.test.ts index 953f4112..5bd37432 100644 --- a/tests/tracing/tracer.test.ts +++ b/tests/tracing/tracer.test.ts @@ -219,9 +219,93 @@ describe('Tracer', () => { expect(event.type).toBe('run_end'); expect(event.data.steps).toBe(5); + expect(event.data.status).toBe('unknown'); // Default status expect(event.step_id).toBeUndefined(); }); + it('should emit run_end event with status parameter', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunEnd(5, 'success'); + + await tracer.close(); + + const content = fs.readFileSync(testFile, 'utf-8'); + const event = JSON.parse(content.trim()) as TraceEvent; + + expect(event.type).toBe('run_end'); + expect(event.data.steps).toBe(5); + expect(event.data.status).toBe('success'); + }); + + it('should track execution statistics', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + // Emit run_start (should track startedAt) + tracer.emitRunStart('TestAgent', 'gpt-4'); + expect(tracer.getStats().started_at).not.toBeNull(); + expect(tracer.getStats().total_events).toBe(1); + + // Emit step_start (should track totalSteps) + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + expect(tracer.getStats().total_steps).toBe(1); + expect(tracer.getStats().total_events).toBe(2); + + tracer.emitStepStart('step-2', 2, 'Goal 2', 0); + expect(tracer.getStats().total_steps).toBe(2); + expect(tracer.getStats().total_events).toBe(3); + + // Emit run_end (should track endedAt) + tracer.emitRunEnd(2); + expect(tracer.getStats().ended_at).not.toBeNull(); + expect(tracer.getStats().total_events).toBe(4); + + // Get stats + const stats = tracer.getStats(); + expect(stats.total_steps).toBe(2); + expect(stats.total_events).toBe(4); + expect(stats.final_status).toBe('unknown'); + expect(stats.started_at).not.toBeNull(); + expect(stats.ended_at).not.toBeNull(); + expect(stats.duration_ms).not.toBeNull(); + expect(stats.duration_ms).toBeGreaterThanOrEqual(0); + + await tracer.close(); + }); + + it('should set final status', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + // Default status is "unknown" + expect(tracer.getStats().final_status).toBe('unknown'); + + // Set status + tracer.setFinalStatus('success'); + expect(tracer.getStats().final_status).toBe('success'); + + // Status should be included in run_end + tracer.emitRunEnd(1); + + await tracer.close(); + + const content = fs.readFileSync(testFile, 'utf-8'); + const event = JSON.parse(content.trim()) as TraceEvent; + expect(event.data.status).toBe('success'); + }); + + it('should reject invalid final status', () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + // Invalid status should throw error + expect(() => { + tracer.setFinalStatus('invalid' as any); + }).toThrow('Invalid status'); + }); + it('should emit error event', async () => { const sink = new JsonlTraceSink(testFile); const tracer = new Tracer('test-run', sink); @@ -359,4 +443,182 @@ describe('Tracer', () => { tracer.close(); }); }); + + describe('Automatic status inference', () => { + it('should automatically infer final status from step outcomes', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Emit another successful step + tracer.emitStepStart('step-2', 2, 'Goal 2', 0); + tracer.emit('step_end', { success: true, action: 'type' }, 'step-2'); + + // Close without explicitly setting status or calling emitRunEnd + // Status should be auto-inferred as "success" + await tracer.close(); + + // Verify status was auto-inferred + expect(tracer.getStats().final_status).toBe('success'); + + // Verify stats reflect the inferred status + const stats = tracer.getStats(); + expect(stats.final_status).toBe('success'); + expect(stats.total_steps).toBe(2); + }); + + it('should automatically infer "partial" status when there are both successes and errors', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Emit error + tracer.emitError('step-2', 'Element not found', 0); + + // Close without explicitly setting status + await tracer.close(); + + // Verify status was auto-inferred as "partial" (has both successes and errors) + expect(tracer.getStats().final_status).toBe('partial'); + }); + + it('should automatically infer "failure" status when there are only errors', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit error without any successful steps + tracer.emitError('step-1', 'Element not found', 0); + + // Close without explicitly setting status + await tracer.close(); + + // Verify status was auto-inferred as "failure" (only errors, no successes) + expect(tracer.getStats().final_status).toBe('failure'); + }); + + it('should not override explicitly set status', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Explicitly set status to "partial" (even though we have success) + tracer.setFinalStatus('partial'); + + // Close - should not override explicit status + await tracer.close(); + + // Verify explicit status was preserved + expect(tracer.getStats().final_status).toBe('partial'); + }); + + it('should set final_status automatically when close() is called', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful steps + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + tracer.emitStepStart('step-2', 2, 'Goal 2', 0); + tracer.emit('step_end', { success: true, action: 'type' }, 'step-2'); + + // Verify status is still "unknown" before close + expect(tracer.getStats().final_status).toBe('unknown'); + + // Close should auto-infer status + await tracer.close(); + + // Verify status was auto-inferred after close + expect(tracer.getStats().final_status).toBe('success'); + expect(tracer.getStats().total_steps).toBe(2); + }); + + it('should include auto-inferred final_status in run_end event when emitRunEnd is called', async () => { + const sink = new JsonlTraceSink(testFile); + const tracer = new Tracer('test-run', sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Verify status is still "unknown" before emitRunEnd + expect(tracer.getStats().final_status).toBe('unknown'); + + // emitRunEnd should auto-infer status if not provided + tracer.emitRunEnd(1); + + // Verify status was auto-inferred + expect(tracer.getStats().final_status).toBe('success'); + + // Close the tracer + await tracer.close(); + + // Read trace file and verify run_end event has the inferred status + const content = fs.readFileSync(testFile, 'utf-8'); + const lines = content.trim().split('\n'); + const runEndEvents = lines + .map(line => JSON.parse(line)) + .filter((event: any) => event.type === 'run_end'); + + expect(runEndEvents.length).toBeGreaterThan(0); + // The run_end event should have the auto-inferred status + const lastRunEnd = runEndEvents[runEndEvents.length - 1]; + expect(lastRunEnd.data.status).toBe('success'); + }); + + it('should include auto-inferred final_status in stats when close() is called with CloudTraceSink', async () => { + const { CloudTraceSink } = await import('../../src/tracing/cloud-sink'); + + const uploadUrl = 'https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz'; + const runId = 'test-close-status-' + Date.now(); + const apiKey = 'sk_test_123'; + const apiUrl = 'https://api.sentience.ai'; + + const sink = new CloudTraceSink(uploadUrl, runId, apiKey, apiUrl); + const tracer = new Tracer(runId, sink); + + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // Emit successful step + tracer.emitStepStart('step-1', 1, 'Goal 1', 0); + tracer.emit('step_end', { success: true, action: 'click' }, 'step-1'); + + // Verify status is still "unknown" before close + expect(tracer.getStats().final_status).toBe('unknown'); + + // Note: We don't actually call close() here because it would try to upload + // Instead, we verify that the status inference logic works correctly + // The actual upload and completion request are tested in cloud-sink.test.ts + + // Manually trigger the inference logic (simulating what close() does) + if (tracer.getStats().final_status === 'unknown') { + // Access private method via type assertion (for testing only) + (tracer as any)._inferFinalStatus(); + } + + // Verify status was auto-inferred + expect(tracer.getStats().final_status).toBe('success'); + }); + }); }); From 205e7b0317d58ee40b885186e7dd0fc2a0381fcc Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 18:55:02 -0800 Subject: [PATCH 2/7] fix tests --- tests/tracing/jsonl-sink.test.ts | 44 ++++++++++++++++++++++++++++---- tests/tracing/tracer.test.ts | 44 ++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/tests/tracing/jsonl-sink.test.ts b/tests/tracing/jsonl-sink.test.ts index 46dbd4df..bc8401e5 100644 --- a/tests/tracing/jsonl-sink.test.ts +++ b/tests/tracing/jsonl-sink.test.ts @@ -10,10 +10,34 @@ describe('JsonlTraceSink', () => { const testDir = path.join(__dirname, 'test-traces'); const testFile = path.join(testDir, 'test.jsonl'); - beforeEach(() => { - // Clean up test directory - if (fs.existsSync(testDir)) { - fs.rmSync(testDir, { recursive: true, force: true }); + beforeEach(async () => { + // Clean up test directory with retry logic for Windows + // Use try-catch to handle EPERM errors when checking existence + let exists = false; + try { + exists = fs.existsSync(testDir); + } catch (err: any) { + // On Windows, existsSync can throw EPERM if file is locked + // Assume it exists and try to delete it + exists = true; + } + + if (exists) { + // Retry deletion on Windows (files may still be locked) + for (let i = 0; i < 5; i++) { + try { + fs.rmSync(testDir, { recursive: true, force: true }); + break; // Success + } catch (err: any) { + if (i === 4) { + // Last attempt failed, log but don't throw + console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); + } else { + // Wait before retry + await new Promise(resolve => setTimeout(resolve, 50)); + } + } + } } }); @@ -22,7 +46,17 @@ describe('JsonlTraceSink', () => { await new Promise(resolve => setTimeout(resolve, 100)); // Clean up test directory with retry logic for Windows - if (fs.existsSync(testDir)) { + // Use try-catch to handle EPERM errors when checking existence + let exists = false; + try { + exists = fs.existsSync(testDir); + } catch (err: any) { + // On Windows, existsSync can throw EPERM if file is locked + // Assume it exists and try to delete it + exists = true; + } + + if (exists) { // Retry deletion on Windows (files may still be locked) for (let i = 0; i < 5; i++) { try { diff --git a/tests/tracing/tracer.test.ts b/tests/tracing/tracer.test.ts index 5bd37432..841f111b 100644 --- a/tests/tracing/tracer.test.ts +++ b/tests/tracing/tracer.test.ts @@ -13,10 +13,34 @@ describe('Tracer', () => { const testDir = path.join(__dirname, 'test-traces'); const testFile = path.join(testDir, 'tracer-test.jsonl'); - beforeEach(() => { - // Clean up and recreate test directory - if (fs.existsSync(testDir)) { - fs.rmSync(testDir, { recursive: true, force: true }); + beforeEach(async () => { + // Clean up and recreate test directory with retry logic for Windows + // Use try-catch to handle EPERM errors when checking existence + let exists = false; + try { + exists = fs.existsSync(testDir); + } catch (err: any) { + // On Windows, existsSync can throw EPERM if file is locked + // Assume it exists and try to delete it + exists = true; + } + + if (exists) { + // Retry deletion on Windows (files may still be locked) + for (let i = 0; i < 5; i++) { + try { + fs.rmSync(testDir, { recursive: true, force: true }); + break; // Success + } catch (err: any) { + if (i === 4) { + // Last attempt failed, log but don't throw + console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); + } else { + // Wait before retry + await new Promise(resolve => setTimeout(resolve, 50)); + } + } + } } fs.mkdirSync(testDir, { recursive: true }); }); @@ -26,7 +50,17 @@ describe('Tracer', () => { await new Promise(resolve => setTimeout(resolve, 100)); // Clean up test directory with retry logic for Windows - if (fs.existsSync(testDir)) { + // Use try-catch to handle EPERM errors when checking existence + let exists = false; + try { + exists = fs.existsSync(testDir); + } catch (err: any) { + // On Windows, existsSync can throw EPERM if file is locked + // Assume it exists and try to delete it + exists = true; + } + + if (exists) { // Retry deletion on Windows (files may still be locked) for (let i = 0; i < 5; i++) { try { From e79c85d08cc1faf950f7b02efae3661e9325821d Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 19:12:19 -0800 Subject: [PATCH 3/7] more fixes --- tests/tracing/jsonl-sink.test.ts | 49 +++++++++++++++++------ tests/tracing/tracer.test.ts | 69 ++++++++++++++++++++++++++------ 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/tests/tracing/jsonl-sink.test.ts b/tests/tracing/jsonl-sink.test.ts index bc8401e5..ad6b2196 100644 --- a/tests/tracing/jsonl-sink.test.ts +++ b/tests/tracing/jsonl-sink.test.ts @@ -11,6 +11,9 @@ describe('JsonlTraceSink', () => { const testFile = path.join(testDir, 'test.jsonl'); beforeEach(async () => { + // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) + await new Promise(resolve => setTimeout(resolve, 150)); + // Clean up test directory with retry logic for Windows // Use try-catch to handle EPERM errors when checking existence let exists = false; @@ -24,17 +27,25 @@ describe('JsonlTraceSink', () => { if (exists) { // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 5; i++) { + for (let i = 0; i < 10; i++) { try { + // Try to delete individual files first if directory deletion fails + if (fs.existsSync(testFile)) { + try { + fs.unlinkSync(testFile); + } catch (unlinkErr: any) { + // File might be locked, continue to directory deletion + } + } fs.rmSync(testDir, { recursive: true, force: true }); break; // Success } catch (err: any) { - if (i === 4) { + if (i === 9) { // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); + console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); } else { - // Wait before retry - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait before retry (longer wait for Windows) + await new Promise(resolve => setTimeout(resolve, 100)); } } } @@ -42,8 +53,8 @@ describe('JsonlTraceSink', () => { }); afterEach(async () => { - // Wait a bit for file handles to close (Windows needs this) - await new Promise(resolve => setTimeout(resolve, 100)); + // Wait longer for file handles to close (Windows needs more time) + await new Promise(resolve => setTimeout(resolve, 200)); // Clean up test directory with retry logic for Windows // Use try-catch to handle EPERM errors when checking existence @@ -58,17 +69,25 @@ describe('JsonlTraceSink', () => { if (exists) { // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 5; i++) { + for (let i = 0; i < 10; i++) { try { + // Try to delete individual files first if directory deletion fails + if (fs.existsSync(testFile)) { + try { + fs.unlinkSync(testFile); + } catch (unlinkErr: any) { + // File might be locked, continue to directory deletion + } + } fs.rmSync(testDir, { recursive: true, force: true }); break; // Success } catch (err: any) { - if (i === 4) { + if (i === 9) { // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); + console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); } else { - // Wait before retry - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait before retry (longer wait for Windows) + await new Promise(resolve => setTimeout(resolve, 100)); } } } @@ -88,6 +107,8 @@ describe('JsonlTraceSink', () => { sink.emit({ type: 'test2', data: 'world' }); await sink.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const lines = content.trim().split('\n'); @@ -113,6 +134,8 @@ describe('JsonlTraceSink', () => { const sink2 = new JsonlTraceSink(testFile); sink2.emit({ seq: 2 }); await sink2.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const lines = content.trim().split('\n'); @@ -198,6 +221,8 @@ describe('JsonlTraceSink', () => { sink.emit(complexEvent); await sink.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const parsed = JSON.parse(content.trim()); diff --git a/tests/tracing/tracer.test.ts b/tests/tracing/tracer.test.ts index 841f111b..03b7133b 100644 --- a/tests/tracing/tracer.test.ts +++ b/tests/tracing/tracer.test.ts @@ -14,6 +14,9 @@ describe('Tracer', () => { const testFile = path.join(testDir, 'tracer-test.jsonl'); beforeEach(async () => { + // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) + await new Promise(resolve => setTimeout(resolve, 150)); + // Clean up and recreate test directory with retry logic for Windows // Use try-catch to handle EPERM errors when checking existence let exists = false; @@ -27,17 +30,25 @@ describe('Tracer', () => { if (exists) { // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 5; i++) { + for (let i = 0; i < 10; i++) { try { + // Try to delete individual files first if directory deletion fails + if (fs.existsSync(testFile)) { + try { + fs.unlinkSync(testFile); + } catch (unlinkErr: any) { + // File might be locked, continue to directory deletion + } + } fs.rmSync(testDir, { recursive: true, force: true }); break; // Success } catch (err: any) { - if (i === 4) { + if (i === 9) { // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); + console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); } else { - // Wait before retry - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait before retry (longer wait for Windows) + await new Promise(resolve => setTimeout(resolve, 100)); } } } @@ -46,8 +57,8 @@ describe('Tracer', () => { }); afterEach(async () => { - // Wait a bit for file handles to close (Windows needs this) - await new Promise(resolve => setTimeout(resolve, 100)); + // Wait longer for file handles to close (Windows needs more time) + await new Promise(resolve => setTimeout(resolve, 200)); // Clean up test directory with retry logic for Windows // Use try-catch to handle EPERM errors when checking existence @@ -62,17 +73,25 @@ describe('Tracer', () => { if (exists) { // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 5; i++) { + for (let i = 0; i < 10; i++) { try { + // Try to delete individual files first if directory deletion fails + if (fs.existsSync(testFile)) { + try { + fs.unlinkSync(testFile); + } catch (unlinkErr: any) { + // File might be locked, continue to directory deletion + } + } fs.rmSync(testDir, { recursive: true, force: true }); break; // Success } catch (err: any) { - if (i === 4) { + if (i === 9) { // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 5 attempts: ${testDir}`); + console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); } else { - // Wait before retry - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait before retry (longer wait for Windows) + await new Promise(resolve => setTimeout(resolve, 100)); } } } @@ -139,6 +158,8 @@ describe('Tracer', () => { tracer.emit('test_event', { key: 'value' }, 'step-123'); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -160,6 +181,8 @@ describe('Tracer', () => { tracer.emit('test_event', { key: 'value' }); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -176,6 +199,8 @@ describe('Tracer', () => { tracer.emitRunStart('SentienceAgent', 'gpt-4o', { timeout: 30000 }); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -194,6 +219,8 @@ describe('Tracer', () => { tracer.emitRunStart('SentienceAgent'); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -211,6 +238,8 @@ describe('Tracer', () => { tracer.emitStepStart('step-001', 1, 'Click the button', 0, 'https://example.com'); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -231,6 +260,8 @@ describe('Tracer', () => { tracer.emitStepStart('step-002', 2, 'Type text'); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -247,6 +278,8 @@ describe('Tracer', () => { tracer.emitRunEnd(5); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -264,6 +297,8 @@ describe('Tracer', () => { tracer.emitRunEnd(5, 'success'); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -324,6 +359,8 @@ describe('Tracer', () => { tracer.emitRunEnd(1); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -347,6 +384,8 @@ describe('Tracer', () => { tracer.emitError('step-003', 'Element not found', 2); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -365,6 +404,8 @@ describe('Tracer', () => { tracer.emitError('step-004', 'Timeout'); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const event = JSON.parse(content.trim()) as TraceEvent; @@ -388,6 +429,8 @@ describe('Tracer', () => { tracer.emitRunEnd(2); await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); const content = fs.readFileSync(testFile, 'utf-8'); const lines = content.trim().split('\n'); @@ -607,6 +650,8 @@ describe('Tracer', () => { // Close the tracer await tracer.close(); + // Wait for file handle to be released on Windows + await new Promise(resolve => setTimeout(resolve, 50)); // Read trace file and verify run_end event has the inferred status const content = fs.readFileSync(testFile, 'utf-8'); From a4c26b0d1afb77432caa24ef7afcc77ace1d403c Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 19:19:07 -0800 Subject: [PATCH 4/7] handle 409 --- src/tracing/tracer-factory.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/tracing/tracer-factory.ts b/src/tracing/tracer-factory.ts index a3155e4d..12a69f2b 100644 --- a/src/tracing/tracer-factory.ts +++ b/src/tracing/tracer-factory.ts @@ -94,8 +94,17 @@ async function recoverOrphanedTraces(apiKey: string, apiUrl: string = SENTIENCE_ const sink = new CloudTraceSink(response.data.upload_url, runId); await sink.close(); // This will upload the existing file console.log(`✅ [Sentience] Uploaded orphaned trace: ${runId}`); + } else if (response.status === 409) { + // HTTP 409 means trace already exists (already uploaded) + // Treat as success and delete local file + console.log(`✅ [Sentience] Trace ${runId} already exists in cloud (skipping re-upload)`); + try { + fs.unlinkSync(filePath); + } catch { + // Ignore cleanup errors + } } - // Silently skip failures - don't log errors for orphan recovery + // Silently skip other failures - don't log errors for orphan recovery // These are expected in many scenarios (network issues, invalid API keys, etc.) } catch (error: any) { // Silently skip failures - don't log errors for orphan recovery From aa649c28b2b51c6262dc28344c6efad68d497c9e Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 19:36:31 -0800 Subject: [PATCH 5/7] fix tests --- src/tracing/jsonl-sink.ts | 38 +++++++++++-- tests/tracing/jsonl-sink.test.ts | 36 +++++++++--- tests/tracing/tracer.test.ts | 98 +++++++++++++++++++------------- 3 files changed, 121 insertions(+), 51 deletions(-) diff --git a/src/tracing/jsonl-sink.ts b/src/tracing/jsonl-sink.ts index f5060457..942b76f9 100644 --- a/src/tracing/jsonl-sink.ts +++ b/src/tracing/jsonl-sink.ts @@ -115,17 +115,43 @@ export class JsonlTraceSink extends TraceSink { stream.removeAllListeners('error'); return new Promise((resolve) => { + // Check if stream is already closed + if (stream.destroyed || !stream.writable) { + // Stream already closed, generate index and resolve immediately + this.generateIndex(); + resolve(); + return; + } + + let resolved = false; + const doResolve = () => { + if (!resolved) { + resolved = true; + clearTimeout(timeout); + // Generate index after closing file + this.generateIndex(); + resolve(); + } + }; + + // Fallback timeout in case 'close' event doesn't fire (shouldn't happen, but safety) + const timeout = setTimeout(() => { + if (!resolved) { + doResolve(); + } + }, 500); + + // Wait for stream to fully close (Windows needs this) + // The 'close' event fires after all data is flushed and file handle is released + stream.once('close', doResolve); + stream.end((err?: Error | null) => { if (err) { // Silently ignore close errors in production // (they're logged during stream lifetime if needed) } - - // Generate index after closing file - this.generateIndex(); - - // Always resolve, don't reject on close errors - resolve(); + // Note: 'close' event will fire after end() completes + // Don't resolve here - wait for 'close' event }); }); } diff --git a/tests/tracing/jsonl-sink.test.ts b/tests/tracing/jsonl-sink.test.ts index ad6b2196..6f31a419 100644 --- a/tests/tracing/jsonl-sink.test.ts +++ b/tests/tracing/jsonl-sink.test.ts @@ -10,6 +10,28 @@ describe('JsonlTraceSink', () => { const testDir = path.join(__dirname, 'test-traces'); const testFile = path.join(testDir, 'test.jsonl'); + /** + * Helper function to read file with retry logic for Windows EPERM errors + * Windows file handles may not be released immediately after close() + */ + async function readFileWithRetry(filePath: string, maxAttempts: number = 10): Promise { + let attempts = 0; + while (attempts < maxAttempts) { + try { + return fs.readFileSync(filePath, 'utf-8'); + } catch (err: any) { + if (err.code === 'EPERM' && attempts < maxAttempts - 1) { + // File still locked, wait and retry + await new Promise(resolve => setTimeout(resolve, 50)); + attempts++; + } else { + throw err; // Re-throw if not EPERM or max attempts reached + } + } + } + throw new Error(`Failed to read file after ${maxAttempts} attempts`); + } + beforeEach(async () => { // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) await new Promise(resolve => setTimeout(resolve, 150)); @@ -107,10 +129,10 @@ describe('JsonlTraceSink', () => { sink.emit({ type: 'test2', data: 'world' }); await sink.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const lines = content.trim().split('\n'); expect(lines.length).toBe(2); @@ -137,7 +159,7 @@ describe('JsonlTraceSink', () => { // Wait for file handle to be released on Windows await new Promise(resolve => setTimeout(resolve, 50)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const lines = content.trim().split('\n'); expect(lines.length).toBe(2); @@ -221,10 +243,10 @@ describe('JsonlTraceSink', () => { sink.emit(complexEvent); await sink.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const parsed = JSON.parse(content.trim()); expect(parsed).toEqual(complexEvent); diff --git a/tests/tracing/tracer.test.ts b/tests/tracing/tracer.test.ts index 03b7133b..651bc780 100644 --- a/tests/tracing/tracer.test.ts +++ b/tests/tracing/tracer.test.ts @@ -13,6 +13,28 @@ describe('Tracer', () => { const testDir = path.join(__dirname, 'test-traces'); const testFile = path.join(testDir, 'tracer-test.jsonl'); + /** + * Helper function to read file with retry logic for Windows EPERM errors + * Windows file handles may not be released immediately after close() + */ + async function readFileWithRetry(filePath: string, maxAttempts: number = 10): Promise { + let attempts = 0; + while (attempts < maxAttempts) { + try { + return fs.readFileSync(filePath, 'utf-8'); + } catch (err: any) { + if (err.code === 'EPERM' && attempts < maxAttempts - 1) { + // File still locked, wait and retry + await new Promise(resolve => setTimeout(resolve, 50)); + attempts++; + } else { + throw err; // Re-throw if not EPERM or max attempts reached + } + } + } + throw new Error(`Failed to read file after ${maxAttempts} attempts`); + } + beforeEach(async () => { // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) await new Promise(resolve => setTimeout(resolve, 150)); @@ -143,7 +165,7 @@ describe('Tracer', () => { throw new Error(`Trace file not created: ${testFile}`); } - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.ts).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/); @@ -158,10 +180,10 @@ describe('Tracer', () => { tracer.emit('test_event', { key: 'value' }, 'step-123'); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.v).toBe(1); @@ -181,10 +203,10 @@ describe('Tracer', () => { tracer.emit('test_event', { key: 'value' }); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.step_id).toBeUndefined(); @@ -199,10 +221,10 @@ describe('Tracer', () => { tracer.emitRunStart('SentienceAgent', 'gpt-4o', { timeout: 30000 }); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('run_start'); @@ -219,10 +241,10 @@ describe('Tracer', () => { tracer.emitRunStart('SentienceAgent'); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('run_start'); @@ -238,10 +260,10 @@ describe('Tracer', () => { tracer.emitStepStart('step-001', 1, 'Click the button', 0, 'https://example.com'); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('step_start'); @@ -260,10 +282,10 @@ describe('Tracer', () => { tracer.emitStepStart('step-002', 2, 'Type text'); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('step_start'); @@ -278,10 +300,10 @@ describe('Tracer', () => { tracer.emitRunEnd(5); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('run_end'); @@ -297,10 +319,10 @@ describe('Tracer', () => { tracer.emitRunEnd(5, 'success'); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('run_end'); @@ -359,10 +381,10 @@ describe('Tracer', () => { tracer.emitRunEnd(1); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.data.status).toBe('success'); }); @@ -384,10 +406,10 @@ describe('Tracer', () => { tracer.emitError('step-003', 'Element not found', 2); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.type).toBe('error'); @@ -404,10 +426,10 @@ describe('Tracer', () => { tracer.emitError('step-004', 'Timeout'); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const event = JSON.parse(content.trim()) as TraceEvent; expect(event.data.attempt).toBe(0); @@ -429,10 +451,10 @@ describe('Tracer', () => { tracer.emitRunEnd(2); await tracer.close(); - // Wait for file handle to be released on Windows - await new Promise(resolve => setTimeout(resolve, 50)); + // Wait for file handle to be released on Windows (increased wait time) + await new Promise(resolve => setTimeout(resolve, 100)); - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const lines = content.trim().split('\n'); const events = lines.map(line => JSON.parse(line) as TraceEvent); @@ -654,7 +676,7 @@ describe('Tracer', () => { await new Promise(resolve => setTimeout(resolve, 50)); // Read trace file and verify run_end event has the inferred status - const content = fs.readFileSync(testFile, 'utf-8'); + const content = await readFileWithRetry(testFile); const lines = content.trim().split('\n'); const runEndEvents = lines .map(line => JSON.parse(line)) From f80669d079adb6621cebb4b5ff94fb26fefb0018 Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 19:50:00 -0800 Subject: [PATCH 6/7] fix tests --- tests/tracing/jsonl-sink.test.ts | 112 +++++++++++++------------------ tests/tracing/tracer.test.ts | 91 +++++++------------------ 2 files changed, 74 insertions(+), 129 deletions(-) diff --git a/tests/tracing/jsonl-sink.test.ts b/tests/tracing/jsonl-sink.test.ts index 6f31a419..7267b136 100644 --- a/tests/tracing/jsonl-sink.test.ts +++ b/tests/tracing/jsonl-sink.test.ts @@ -8,7 +8,30 @@ import { JsonlTraceSink } from '../../src/tracing/jsonl-sink'; describe('JsonlTraceSink', () => { const testDir = path.join(__dirname, 'test-traces'); - const testFile = path.join(testDir, 'test.jsonl'); + // Use unique filename for each test to avoid Windows file locking issues + let testFile: string; + + /** + * Helper function to read file with retry logic for Windows EPERM errors + * Windows file handles may not be released immediately after close() + */ + async function readFileWithRetry(filePath: string, maxAttempts: number = 10): Promise { + let attempts = 0; + while (attempts < maxAttempts) { + try { + return fs.readFileSync(filePath, 'utf-8'); + } catch (err: any) { + if (err.code === 'EPERM' && attempts < maxAttempts - 1) { + // File still locked, wait and retry + await new Promise(resolve => setTimeout(resolve, 50)); + attempts++; + } else { + throw err; // Re-throw if not EPERM or max attempts reached + } + } + } + throw new Error(`Failed to read file after ${maxAttempts} attempts`); + } /** * Helper function to read file with retry logic for Windows EPERM errors @@ -36,41 +59,13 @@ describe('JsonlTraceSink', () => { // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) await new Promise(resolve => setTimeout(resolve, 150)); - // Clean up test directory with retry logic for Windows - // Use try-catch to handle EPERM errors when checking existence - let exists = false; - try { - exists = fs.existsSync(testDir); - } catch (err: any) { - // On Windows, existsSync can throw EPERM if file is locked - // Assume it exists and try to delete it - exists = true; - } + // Generate unique filename for this test to avoid Windows file locking issues + const uniqueId = Math.random().toString(36).substring(7); + testFile = path.join(testDir, `trace-${uniqueId}.jsonl`); - if (exists) { - // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 10; i++) { - try { - // Try to delete individual files first if directory deletion fails - if (fs.existsSync(testFile)) { - try { - fs.unlinkSync(testFile); - } catch (unlinkErr: any) { - // File might be locked, continue to directory deletion - } - } - fs.rmSync(testDir, { recursive: true, force: true }); - break; // Success - } catch (err: any) { - if (i === 9) { - // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); - } else { - // Wait before retry (longer wait for Windows) - await new Promise(resolve => setTimeout(resolve, 100)); - } - } - } + // Ensure directory exists + if (!fs.existsSync(testDir)) { + fs.mkdirSync(testDir, { recursive: true }); } }); @@ -78,40 +73,29 @@ describe('JsonlTraceSink', () => { // Wait longer for file handles to close (Windows needs more time) await new Promise(resolve => setTimeout(resolve, 200)); - // Clean up test directory with retry logic for Windows - // Use try-catch to handle EPERM errors when checking existence - let exists = false; - try { - exists = fs.existsSync(testDir); - } catch (err: any) { - // On Windows, existsSync can throw EPERM if file is locked - // Assume it exists and try to delete it - exists = true; - } - - if (exists) { - // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 10; i++) { - try { - // Try to delete individual files first if directory deletion fails - if (fs.existsSync(testFile)) { + // Clean up the specific file for this test + if (testFile) { + try { + if (fs.existsSync(testFile)) { + // Retry deletion on Windows (file may still be locked) + for (let i = 0; i < 5; i++) { try { fs.unlinkSync(testFile); - } catch (unlinkErr: any) { - // File might be locked, continue to directory deletion + break; // Success + } catch (err: any) { + if (i === 4) { + // Last attempt failed, log but don't throw + console.warn(`Could not delete ${testFile}:`, err); + } else { + // Wait before retry + await new Promise(resolve => setTimeout(resolve, 50)); + } } } - fs.rmSync(testDir, { recursive: true, force: true }); - break; // Success - } catch (err: any) { - if (i === 9) { - // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); - } else { - // Wait before retry (longer wait for Windows) - await new Promise(resolve => setTimeout(resolve, 100)); - } } + } catch (err: any) { + // Ignore cleanup errors - don't fail tests + console.warn(`Could not delete ${testFile}:`, err); } } }); diff --git a/tests/tracing/tracer.test.ts b/tests/tracing/tracer.test.ts index 651bc780..1255c4ff 100644 --- a/tests/tracing/tracer.test.ts +++ b/tests/tracing/tracer.test.ts @@ -11,7 +11,8 @@ import { TraceEvent } from '../../src/tracing/types'; describe('Tracer', () => { const testDir = path.join(__dirname, 'test-traces'); - const testFile = path.join(testDir, 'tracer-test.jsonl'); + // Use unique filename for each test to avoid Windows file locking issues + let testFile: string; /** * Helper function to read file with retry logic for Windows EPERM errors @@ -39,83 +40,43 @@ describe('Tracer', () => { // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) await new Promise(resolve => setTimeout(resolve, 150)); - // Clean up and recreate test directory with retry logic for Windows - // Use try-catch to handle EPERM errors when checking existence - let exists = false; - try { - exists = fs.existsSync(testDir); - } catch (err: any) { - // On Windows, existsSync can throw EPERM if file is locked - // Assume it exists and try to delete it - exists = true; - } + // Generate unique filename for this test to avoid Windows file locking issues + const uniqueId = Math.random().toString(36).substring(7); + testFile = path.join(testDir, `trace-${uniqueId}.jsonl`); - if (exists) { - // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 10; i++) { - try { - // Try to delete individual files first if directory deletion fails - if (fs.existsSync(testFile)) { - try { - fs.unlinkSync(testFile); - } catch (unlinkErr: any) { - // File might be locked, continue to directory deletion - } - } - fs.rmSync(testDir, { recursive: true, force: true }); - break; // Success - } catch (err: any) { - if (i === 9) { - // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); - } else { - // Wait before retry (longer wait for Windows) - await new Promise(resolve => setTimeout(resolve, 100)); - } - } - } + // Ensure directory exists + if (!fs.existsSync(testDir)) { + fs.mkdirSync(testDir, { recursive: true }); } - fs.mkdirSync(testDir, { recursive: true }); }); afterEach(async () => { // Wait longer for file handles to close (Windows needs more time) await new Promise(resolve => setTimeout(resolve, 200)); - // Clean up test directory with retry logic for Windows - // Use try-catch to handle EPERM errors when checking existence - let exists = false; - try { - exists = fs.existsSync(testDir); - } catch (err: any) { - // On Windows, existsSync can throw EPERM if file is locked - // Assume it exists and try to delete it - exists = true; - } - - if (exists) { - // Retry deletion on Windows (files may still be locked) - for (let i = 0; i < 10; i++) { - try { - // Try to delete individual files first if directory deletion fails - if (fs.existsSync(testFile)) { + // Clean up the specific file for this test + if (testFile) { + try { + if (fs.existsSync(testFile)) { + // Retry deletion on Windows (file may still be locked) + for (let i = 0; i < 5; i++) { try { fs.unlinkSync(testFile); - } catch (unlinkErr: any) { - // File might be locked, continue to directory deletion + break; // Success + } catch (err: any) { + if (i === 4) { + // Last attempt failed, log but don't throw + console.warn(`Could not delete ${testFile}:`, err); + } else { + // Wait before retry + await new Promise(resolve => setTimeout(resolve, 50)); + } } } - fs.rmSync(testDir, { recursive: true, force: true }); - break; // Success - } catch (err: any) { - if (i === 9) { - // Last attempt failed, log but don't throw - console.warn(`Failed to delete test directory after 10 attempts: ${testDir}`); - } else { - // Wait before retry (longer wait for Windows) - await new Promise(resolve => setTimeout(resolve, 100)); - } } + } catch (err: any) { + // Ignore cleanup errors - don't fail tests + console.warn(`Could not delete ${testFile}:`, err); } } }); From b2304167d450aac6e61489222656b61ce4dbcbe9 Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 19:56:44 -0800 Subject: [PATCH 7/7] fix tests --- tests/tracing/jsonl-sink.test.ts | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/tests/tracing/jsonl-sink.test.ts b/tests/tracing/jsonl-sink.test.ts index 7267b136..2ee35ebc 100644 --- a/tests/tracing/jsonl-sink.test.ts +++ b/tests/tracing/jsonl-sink.test.ts @@ -33,28 +33,6 @@ describe('JsonlTraceSink', () => { throw new Error(`Failed to read file after ${maxAttempts} attempts`); } - /** - * Helper function to read file with retry logic for Windows EPERM errors - * Windows file handles may not be released immediately after close() - */ - async function readFileWithRetry(filePath: string, maxAttempts: number = 10): Promise { - let attempts = 0; - while (attempts < maxAttempts) { - try { - return fs.readFileSync(filePath, 'utf-8'); - } catch (err: any) { - if (err.code === 'EPERM' && attempts < maxAttempts - 1) { - // File still locked, wait and retry - await new Promise(resolve => setTimeout(resolve, 50)); - attempts++; - } else { - throw err; // Re-throw if not EPERM or max attempts reached - } - } - } - throw new Error(`Failed to read file after ${maxAttempts} attempts`); - } - beforeEach(async () => { // Wait a bit to ensure previous test's file handles are fully released (Windows needs this) await new Promise(resolve => setTimeout(resolve, 150));