From 4b98bf9682641176cd70eb85f2427722126bb768 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 09:53:41 -0800 Subject: [PATCH 1/4] add baseline safety net testing --- .github/workflows/test.yml | 8 ++- package-lock.json | 4 +- tests/agent-runtime-assertions.test.ts | 77 ++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 tests/agent-runtime-assertions.test.ts diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f046dc0..3e00f00 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -63,7 +63,13 @@ jobs: echo "Warning: src/extension not found, tests may fail" fi - - name: Run tests + - name: Phase 0 regression safety net + run: | + npm test -- agent-runtime-assertions.test.ts + env: + CI: true + + - name: Run full test suite run: | npm test env: diff --git a/package-lock.json b/package-lock.json index 6180bdd..f44ab75 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "sentienceapi", - "version": "0.94.0", + "version": "0.96.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "sentienceapi", - "version": "0.94.0", + "version": "0.96.1", "license": "(MIT OR Apache-2.0)", "dependencies": { "canvas": "^3.2.1", diff --git a/tests/agent-runtime-assertions.test.ts b/tests/agent-runtime-assertions.test.ts new file mode 100644 index 0000000..e1bfc08 --- /dev/null +++ b/tests/agent-runtime-assertions.test.ts @@ -0,0 +1,77 @@ +import { AgentRuntime } from '../src/agent-runtime'; +import { TraceSink } from '../src/tracing/sink'; +import { Tracer } from '../src/tracing/tracer'; +import { isDisabled, isEnabled, valueEquals } from '../src/verification'; +import { BBox, Element, Snapshot, VisualCues } from '../src/types'; +import { MockPage } from './mocks/browser-mock'; + +class MockSink extends TraceSink { + public events: any[] = []; + emit(event: Record): void { + this.events.push(event); + } + async close(): Promise { + // no-op + } + getSinkType(): string { + return 'MockSink'; + } +} + +function makeElement( + id: number, + role: string, + text: string | null, + extras: Partial = {} +): Element { + const cues: VisualCues = { + is_primary: false, + background_color_name: null, + is_clickable: true, + }; + return { + id, + role, + text: text ?? undefined, + importance: 10, + bbox: { x: 0, y: 0, width: 100, height: 40 } as BBox, + visual_cues: cues, + ...extras, + } as Element; +} + +describe('AgentRuntime.assert() with state predicates', () => { + it('uses snapshot context for enabled/disabled/value assertions', () => { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + + const elements: Element[] = [ + makeElement(1, 'button', 'Submit', { disabled: false }), + makeElement(2, 'textbox', null, { value: 'hello', input_type: 'text' }), + makeElement(3, 'button', 'Disabled', { disabled: true }), + ]; + + const snapshot: Snapshot = { + status: 'success', + url: 'https://example.com', + elements, + timestamp: 't1', + }; + + const browserLike = { + snapshot: async () => snapshot, + }; + + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.beginStep('Test'); + runtime.lastSnapshot = snapshot; + + expect(runtime.assert(isEnabled("text~'Submit'"), 'enabled')).toBe(true); + expect(runtime.assert(isDisabled("text~'Disabled'"), 'disabled')).toBe(true); + expect(runtime.assert(valueEquals('role=textbox', 'hello'), 'value')).toBe(true); + + const stepEnd = runtime.getAssertionsForStepEnd(); + expect(stepEnd.assertions.length).toBe(3); + }); +}); From f7e7bbe4ef1431af610fee0a7a781a14a81242b3 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:04:41 -0800 Subject: [PATCH 2/4] p1 --- README.md | 11 +++ src/agent-runtime.ts | 105 ++++++++++++++++++++++++ src/failure-artifacts.ts | 136 ++++++++++++++++++++++++++++++++ tests/failure-artifacts.test.ts | 44 +++++++++++ 4 files changed, 296 insertions(+) create mode 100644 src/failure-artifacts.ts create mode 100644 tests/failure-artifacts.test.ts diff --git a/README.md b/README.md index 774744a..1b7ca83 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,17 @@ if (runtime.assertDone(exists("text~'Example'"), 'task_complete')) { console.log(`Task done: ${runtime.isTaskDone}`); ``` +### Failure Artifact Buffer (Phase 1) + +Capture a short ring buffer of screenshots and persist them when a required assertion fails. + +```typescript +runtime.enableFailureArtifacts({ bufferSeconds: 15, captureOnAction: true, fps: 0 }); + +// After each action, record it (best-effort). +await runtime.recordAction('CLICK'); +``` + **See examples:** [`examples/asserts/`](examples/asserts/) ## 🚀 Quick Start: Choose Your Abstraction Level diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts index 1b5a5cf..94e0bea 100644 --- a/src/agent-runtime.ts +++ b/src/agent-runtime.ts @@ -44,6 +44,7 @@ import { Snapshot } from './types'; import { AssertContext, Predicate } from './verification'; import { Tracer } from './tracing/tracer'; import { LLMProvider } from './llm-provider'; +import { FailureArtifactBuffer, FailureArtifactsOptions } from './failure-artifacts'; // Define a minimal browser interface to avoid circular dependencies interface BrowserLike { @@ -185,6 +186,11 @@ export class AssertionHandle { }, true ); + if (this.required && !passed) { + (this.runtime as any).persistFailureArtifacts( + `assert_eventually_failed:${this.label}` + ); + } return passed; } catch { // fall through to snapshot_exhausted @@ -216,6 +222,9 @@ export class AssertionHandle { }, true ); + if (this.required) { + (this.runtime as any).persistFailureArtifacts(`assert_eventually_failed:${this.label}`); + } return false; } @@ -233,6 +242,11 @@ export class AssertionHandle { }, true ); + if (this.required) { + (this.runtime as any).persistFailureArtifacts( + `assert_eventually_timeout:${this.label}` + ); + } return false; } @@ -272,6 +286,9 @@ export class AssertionHandle { { eventually: true, attempt, final: true, timeout: true }, true ); + if (this.required) { + (this.runtime as any).persistFailureArtifacts(`assert_eventually_timeout:${this.label}`); + } return false; } @@ -306,6 +323,10 @@ export class AgentRuntime { /** Most recent snapshot (for assertion context) */ lastSnapshot: Snapshot | null = null; + /** Failure artifact buffer (Phase 1) */ + private artifactBuffer: FailureArtifactBuffer | null = null; + private artifactTimer: NodeJS.Timeout | null = null; + /** Assertions accumulated during current step */ private assertionsThisStep: AssertionRecord[] = []; /** Task completion tracking */ @@ -432,6 +453,85 @@ export class AgentRuntime { return this.lastSnapshot; } + /** + * Enable failure artifact buffer (Phase 1). + */ + enableFailureArtifacts(options: FailureArtifactsOptions = {}): void { + this.artifactBuffer = new FailureArtifactBuffer(this.tracer.getRunId(), options); + const fps = this.artifactBuffer.getOptions().fps; + if (fps && fps > 0) { + const intervalMs = Math.max(1, Math.floor(1000 / fps)); + this.artifactTimer = setInterval(() => { + this.captureArtifactFrame().catch(() => { + // best-effort + }); + }, intervalMs); + } + } + + /** + * Disable failure artifact buffer and stop background capture. + */ + disableFailureArtifacts(): void { + if (this.artifactTimer) { + clearInterval(this.artifactTimer); + this.artifactTimer = null; + } + } + + /** + * Record an action in the artifact timeline and capture a frame if enabled. + */ + async recordAction(action: string, url?: string): Promise { + if (!this.artifactBuffer) { + return; + } + this.artifactBuffer.recordStep(action, this.stepId, this.stepIndex, url); + if (this.artifactBuffer.getOptions().captureOnAction) { + await this.captureArtifactFrame(); + } + } + + private async captureArtifactFrame(): Promise { + if (!this.artifactBuffer) { + return; + } + try { + const image = await this.page.screenshot({ type: 'jpeg', quality: 80 }); + await this.artifactBuffer.addFrame(image, 'jpeg'); + } catch { + // best-effort + } + } + + /** + * Finalize artifact buffer at end of run. + */ + async finalizeRun(success: boolean): Promise { + if (!this.artifactBuffer) { + return; + } + if (success) { + if (this.artifactBuffer.getOptions().persistMode === 'always') { + await this.artifactBuffer.persist('success', 'success'); + } + await this.artifactBuffer.cleanup(); + } else { + await this.persistFailureArtifacts('finalize_failure'); + } + } + + private async persistFailureArtifacts(reason: string): Promise { + if (!this.artifactBuffer) { + return; + } + await this.artifactBuffer.persist(reason, 'failure'); + await this.artifactBuffer.cleanup(); + if (this.artifactBuffer.getOptions().persistMode === 'onFail') { + this.disableFailureArtifacts(); + } + } + /** * Begin a new step in the verification loop. * @@ -476,6 +576,11 @@ export class AgentRuntime { assert(predicate: Predicate, label: string, required: boolean = false): boolean { const outcome = predicate(this.ctx()); this._recordOutcome(outcome, label, required, null, true); + if (required && !outcome.passed) { + this.persistFailureArtifacts(`assert_failed:${label}`).catch(() => { + // best-effort + }); + } return outcome.passed; } diff --git a/src/failure-artifacts.ts b/src/failure-artifacts.ts new file mode 100644 index 0000000..0e9c3b8 --- /dev/null +++ b/src/failure-artifacts.ts @@ -0,0 +1,136 @@ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; + +export type PersistMode = 'onFail' | 'always'; + +export interface FailureArtifactsOptions { + bufferSeconds?: number; + captureOnAction?: boolean; + fps?: number; + persistMode?: PersistMode; + outputDir?: string; +} + +interface FrameRecord { + ts: number; + fileName: string; + filePath: string; +} + +export class FailureArtifactBuffer { + private runId: string; + private options: Required; + private frames: FrameRecord[] = []; + private steps: Record[] = []; + private persisted = false; + private timeNow: () => number; + private tempDir: string; + private framesDir: string; + + constructor( + runId: string, + options: FailureArtifactsOptions = {}, + timeNow: () => number = () => Date.now() + ) { + this.runId = runId; + this.options = { + bufferSeconds: options.bufferSeconds ?? 15, + captureOnAction: options.captureOnAction ?? true, + fps: options.fps ?? 0, + persistMode: options.persistMode ?? 'onFail', + outputDir: options.outputDir ?? '.sentience/artifacts', + }; + this.timeNow = timeNow; + this.tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-artifacts-')); + this.framesDir = path.join(this.tempDir, 'frames'); + fs.mkdirSync(this.framesDir, { recursive: true }); + } + + getOptions(): Required { + return this.options; + } + + recordStep(action: string, stepId: string | null, stepIndex: number, url?: string): void { + this.steps.push({ + ts: this.timeNow(), + action, + step_id: stepId, + step_index: stepIndex, + url, + }); + } + + async addFrame(image: Buffer, fmt: 'jpeg' | 'png' = 'jpeg'): Promise { + const ts = this.timeNow(); + const fileName = `frame_${ts}.${fmt}`; + const filePath = path.join(this.framesDir, fileName); + await fs.promises.writeFile(filePath, image); + this.frames.push({ ts, fileName, filePath }); + this.prune(); + } + + frameCount(): number { + return this.frames.length; + } + + private prune(): void { + const cutoff = this.timeNow() - this.options.bufferSeconds * 1000; + const keep: FrameRecord[] = []; + for (const frame of this.frames) { + if (frame.ts >= cutoff) { + keep.push(frame); + } else { + try { + fs.unlinkSync(frame.filePath); + } catch { + // ignore + } + } + } + this.frames = keep; + } + + async persist(reason: string | null, status: 'failure' | 'success'): Promise { + if (this.persisted) { + return null; + } + + const outDir = this.options.outputDir; + await fs.promises.mkdir(outDir, { recursive: true }); + const ts = this.timeNow(); + const runDir = path.join(outDir, `${this.runId}-${ts}`); + const framesOut = path.join(runDir, 'frames'); + await fs.promises.mkdir(framesOut, { recursive: true }); + + for (const frame of this.frames) { + await fs.promises.copyFile(frame.filePath, path.join(framesOut, frame.fileName)); + } + + await fs.promises.writeFile( + path.join(runDir, 'steps.json'), + JSON.stringify(this.steps, null, 2) + ); + + const manifest = { + run_id: this.runId, + created_at_ms: ts, + status, + reason, + buffer_seconds: this.options.bufferSeconds, + frame_count: this.frames.length, + frames: this.frames.map(frame => ({ file: frame.fileName, ts: frame.ts })), + }; + await fs.promises.writeFile( + path.join(runDir, 'manifest.json'), + JSON.stringify(manifest, null, 2) + ); + + this.persisted = true; + return runDir; + } + + async cleanup(): Promise { + await fs.promises.rm(this.tempDir, { recursive: true, force: true }); + } +} diff --git a/tests/failure-artifacts.test.ts b/tests/failure-artifacts.test.ts new file mode 100644 index 0000000..63a7241 --- /dev/null +++ b/tests/failure-artifacts.test.ts @@ -0,0 +1,44 @@ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import { FailureArtifactBuffer } from '../src/failure-artifacts'; + +function makeTempDir(prefix: string): string { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); +} + +describe('FailureArtifactBuffer', () => { + it('prunes frames by time window', async () => { + const tmp = makeTempDir('sentience-test-'); + const now = { t: 0 }; + const timeNow = () => now.t; + const buf = new FailureArtifactBuffer('run-1', { bufferSeconds: 1, outputDir: tmp }, timeNow); + + await buf.addFrame(Buffer.from('one'), 'png'); + expect(buf.frameCount()).toBe(1); + + now.t = 2000; + await buf.addFrame(Buffer.from('two'), 'png'); + expect(buf.frameCount()).toBe(1); + }); + + it('persists manifest and steps', async () => { + const tmp = makeTempDir('sentience-test-'); + const now = { t: 1000 }; + const timeNow = () => now.t; + const buf = new FailureArtifactBuffer('run-2', { outputDir: tmp }, timeNow); + + buf.recordStep('CLICK', 's1', 1, 'https://example.com'); + await buf.addFrame(Buffer.from('frame'), 'png'); + + const runDir = await buf.persist('assert_failed', 'failure'); + expect(runDir).toBeTruthy(); + const manifest = JSON.parse( + fs.readFileSync(path.join(runDir as string, 'manifest.json'), 'utf-8') + ); + const steps = JSON.parse(fs.readFileSync(path.join(runDir as string, 'steps.json'), 'utf-8')); + expect(manifest.run_id).toBe('run-2'); + expect(manifest.frame_count).toBe(1); + expect(steps.length).toBe(1); + }); +}); From e4d86456f4f0967d72ea4465f06e02a08dd3749e Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:07:17 -0800 Subject: [PATCH 3/4] p2 --- src/agent-runtime.ts | 24 ++++++++++++++++++-- src/failure-artifacts.ts | 39 +++++++++++++++++++++++++-------- tests/failure-artifacts.test.ts | 18 ++++++++++++++- 3 files changed, 69 insertions(+), 12 deletions(-) diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts index 94e0bea..3c5a951 100644 --- a/src/agent-runtime.ts +++ b/src/agent-runtime.ts @@ -513,7 +513,13 @@ export class AgentRuntime { } if (success) { if (this.artifactBuffer.getOptions().persistMode === 'always') { - await this.artifactBuffer.persist('success', 'success'); + await this.artifactBuffer.persist( + 'success', + 'success', + this.lastSnapshot ?? undefined, + this.lastSnapshot?.diagnostics, + this.artifactMetadata() + ); } await this.artifactBuffer.cleanup(); } else { @@ -525,13 +531,27 @@ export class AgentRuntime { if (!this.artifactBuffer) { return; } - await this.artifactBuffer.persist(reason, 'failure'); + await this.artifactBuffer.persist( + reason, + 'failure', + this.lastSnapshot ?? undefined, + this.lastSnapshot?.diagnostics, + this.artifactMetadata() + ); await this.artifactBuffer.cleanup(); if (this.artifactBuffer.getOptions().persistMode === 'onFail') { this.disableFailureArtifacts(); } } + private artifactMetadata(): Record { + const url = this.lastSnapshot?.url ?? this.page?.url?.(); + return { + backend: 'playwright', + url, + }; + } + /** * Begin a new step in the verification loop. * diff --git a/src/failure-artifacts.ts b/src/failure-artifacts.ts index 0e9c3b8..49f44f7 100644 --- a/src/failure-artifacts.ts +++ b/src/failure-artifacts.ts @@ -18,6 +18,12 @@ interface FrameRecord { filePath: string; } +async function writeJsonAtomic(filePath: string, data: any): Promise { + const tmpPath = `${filePath}.tmp`; + await fs.promises.writeFile(tmpPath, JSON.stringify(data, null, 2)); + await fs.promises.rename(tmpPath, filePath); +} + export class FailureArtifactBuffer { private runId: string; private options: Required; @@ -91,7 +97,13 @@ export class FailureArtifactBuffer { this.frames = keep; } - async persist(reason: string | null, status: 'failure' | 'success'): Promise { + async persist( + reason: string | null, + status: 'failure' | 'success', + snapshot?: any, + diagnostics?: any, + metadata?: Record + ): Promise { if (this.persisted) { return null; } @@ -107,10 +119,19 @@ export class FailureArtifactBuffer { await fs.promises.copyFile(frame.filePath, path.join(framesOut, frame.fileName)); } - await fs.promises.writeFile( - path.join(runDir, 'steps.json'), - JSON.stringify(this.steps, null, 2) - ); + await writeJsonAtomic(path.join(runDir, 'steps.json'), this.steps); + + let snapshotWritten = false; + if (snapshot) { + await writeJsonAtomic(path.join(runDir, 'snapshot.json'), snapshot); + snapshotWritten = true; + } + + let diagnosticsWritten = false; + if (diagnostics) { + await writeJsonAtomic(path.join(runDir, 'diagnostics.json'), diagnostics); + diagnosticsWritten = true; + } const manifest = { run_id: this.runId, @@ -120,11 +141,11 @@ export class FailureArtifactBuffer { buffer_seconds: this.options.bufferSeconds, frame_count: this.frames.length, frames: this.frames.map(frame => ({ file: frame.fileName, ts: frame.ts })), + snapshot: snapshotWritten ? 'snapshot.json' : null, + diagnostics: diagnosticsWritten ? 'diagnostics.json' : null, + metadata: metadata ?? {}, }; - await fs.promises.writeFile( - path.join(runDir, 'manifest.json'), - JSON.stringify(manifest, null, 2) - ); + await writeJsonAtomic(path.join(runDir, 'manifest.json'), manifest); this.persisted = true; return runDir; diff --git a/tests/failure-artifacts.test.ts b/tests/failure-artifacts.test.ts index 63a7241..ef0c409 100644 --- a/tests/failure-artifacts.test.ts +++ b/tests/failure-artifacts.test.ts @@ -31,14 +31,30 @@ describe('FailureArtifactBuffer', () => { buf.recordStep('CLICK', 's1', 1, 'https://example.com'); await buf.addFrame(Buffer.from('frame'), 'png'); - const runDir = await buf.persist('assert_failed', 'failure'); + const snapshot = { status: 'success', url: 'https://example.com', elements: [] }; + const diagnostics = { confidence: 0.8, reasons: ['ok'], metrics: { quiet_ms: 10 } }; + const runDir = await buf.persist('assert_failed', 'failure', snapshot, diagnostics, { + backend: 'MockBackend', + url: 'https://example.com', + }); expect(runDir).toBeTruthy(); const manifest = JSON.parse( fs.readFileSync(path.join(runDir as string, 'manifest.json'), 'utf-8') ); const steps = JSON.parse(fs.readFileSync(path.join(runDir as string, 'steps.json'), 'utf-8')); + const snapJson = JSON.parse( + fs.readFileSync(path.join(runDir as string, 'snapshot.json'), 'utf-8') + ); + const diagJson = JSON.parse( + fs.readFileSync(path.join(runDir as string, 'diagnostics.json'), 'utf-8') + ); expect(manifest.run_id).toBe('run-2'); expect(manifest.frame_count).toBe(1); + expect(manifest.snapshot).toBe('snapshot.json'); + expect(manifest.diagnostics).toBe('diagnostics.json'); + expect(manifest.metadata.backend).toBe('MockBackend'); expect(steps.length).toBe(1); + expect(snapJson.url).toBe('https://example.com'); + expect(diagJson.confidence).toBe(0.8); }); }); From 581e30b9297622b07f0282adb90c456a44f708c3 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:18:22 -0800 Subject: [PATCH 4/4] P3 --- README.md | 14 +++++ src/failure-artifacts.ts | 99 +++++++++++++++++++++++++++++++-- tests/failure-artifacts.test.ts | 28 +++++++++- 3 files changed, 134 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1b7ca83..b1cbaa5 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,20 @@ runtime.enableFailureArtifacts({ bufferSeconds: 15, captureOnAction: true, fps: await runtime.recordAction('CLICK'); ``` +### Redaction callback (Phase 3) + +Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction. + +```typescript +import { RedactionContext, RedactionResult } from 'sentienceapi'; + +const redact = (_ctx: RedactionContext): RedactionResult => { + return { dropFrames: true }; +}; + +runtime.enableFailureArtifacts({ onBeforePersist: redact }); +``` + **See examples:** [`examples/asserts/`](examples/asserts/) ## 🚀 Quick Start: Choose Your Abstraction Level diff --git a/src/failure-artifacts.ts b/src/failure-artifacts.ts index 49f44f7..a2cacab 100644 --- a/src/failure-artifacts.ts +++ b/src/failure-artifacts.ts @@ -10,6 +10,8 @@ export interface FailureArtifactsOptions { fps?: number; persistMode?: PersistMode; outputDir?: string; + onBeforePersist?: ((ctx: RedactionContext) => RedactionResult) | null; + redactSnapshotValues?: boolean; } interface FrameRecord { @@ -18,12 +20,48 @@ interface FrameRecord { filePath: string; } +export interface RedactionContext { + runId: string; + reason: string | null; + status: 'failure' | 'success'; + snapshot: any; + diagnostics: any; + framePaths: string[]; + metadata: Record; +} + +export interface RedactionResult { + snapshot?: any; + diagnostics?: any; + framePaths?: string[]; + dropFrames?: boolean; +} + async function writeJsonAtomic(filePath: string, data: any): Promise { const tmpPath = `${filePath}.tmp`; await fs.promises.writeFile(tmpPath, JSON.stringify(data, null, 2)); await fs.promises.rename(tmpPath, filePath); } +function redactSnapshotDefaults(payload: any): any { + if (!payload || typeof payload !== 'object') { + return payload; + } + const elements = Array.isArray(payload.elements) ? payload.elements : null; + if (!elements) { + return payload; + } + const redactedElements = elements.map((el: any) => { + if (!el || typeof el !== 'object') return el; + const inputType = String(el.input_type || '').toLowerCase(); + if (['password', 'email', 'tel'].includes(inputType) && 'value' in el) { + return { ...el, value: null, value_redacted: true }; + } + return el; + }); + return { ...payload, elements: redactedElements }; +} + export class FailureArtifactBuffer { private runId: string; private options: Required; @@ -46,6 +84,8 @@ export class FailureArtifactBuffer { fps: options.fps ?? 0, persistMode: options.persistMode ?? 'onFail', outputDir: options.outputDir ?? '.sentience/artifacts', + onBeforePersist: options.onBeforePersist ?? null, + redactSnapshotValues: options.redactSnapshotValues ?? true, }; this.timeNow = timeNow; this.tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-artifacts-')); @@ -121,15 +161,60 @@ export class FailureArtifactBuffer { await writeJsonAtomic(path.join(runDir, 'steps.json'), this.steps); + let snapshotPayload = snapshot; + if (snapshotPayload && this.options.redactSnapshotValues) { + snapshotPayload = redactSnapshotDefaults(snapshotPayload); + } + + let diagnosticsPayload = diagnostics; + let framePaths = this.frames.map(frame => frame.filePath); + let dropFrames = false; + + if (this.options.onBeforePersist) { + try { + const result = this.options.onBeforePersist({ + runId: this.runId, + reason, + status, + snapshot: snapshotPayload, + diagnostics: diagnosticsPayload, + framePaths, + metadata: metadata ?? {}, + }); + if (result.snapshot !== undefined) { + snapshotPayload = result.snapshot; + } + if (result.diagnostics !== undefined) { + diagnosticsPayload = result.diagnostics; + } + if (result.framePaths) { + framePaths = result.framePaths; + } + dropFrames = Boolean(result.dropFrames); + } catch { + dropFrames = true; + } + } + + if (!dropFrames) { + for (const framePath of framePaths) { + if (!fs.existsSync(framePath)) { + continue; + } + const fileName = path.basename(framePath); + await fs.promises.copyFile(framePath, path.join(framesOut, fileName)); + } + } + let snapshotWritten = false; - if (snapshot) { - await writeJsonAtomic(path.join(runDir, 'snapshot.json'), snapshot); + if (snapshotPayload) { + await writeJsonAtomic(path.join(runDir, 'snapshot.json'), snapshotPayload); snapshotWritten = true; } let diagnosticsWritten = false; - if (diagnostics) { - await writeJsonAtomic(path.join(runDir, 'diagnostics.json'), diagnostics); + if (diagnosticsPayload) { + await writeJsonAtomic(path.join(runDir, 'diagnostics.json'), diagnosticsPayload); diagnosticsWritten = true; } @@ -139,11 +224,13 @@ export class FailureArtifactBuffer { status, reason, buffer_seconds: this.options.bufferSeconds, - frame_count: this.frames.length, - frames: this.frames.map(frame => ({ file: frame.fileName, ts: frame.ts })), + frame_count: dropFrames ? 0 : framePaths.length, + frames: dropFrames ? [] : framePaths.map(p => ({ file: path.basename(p), ts: null })), snapshot: snapshotWritten ? 'snapshot.json' : null, diagnostics: diagnosticsWritten ? 'diagnostics.json' : null, metadata: metadata ?? {}, + frames_redacted: !dropFrames && Boolean(this.options.onBeforePersist), + frames_dropped: dropFrames, }; await writeJsonAtomic(path.join(runDir, 'manifest.json'), manifest); diff --git a/tests/failure-artifacts.test.ts b/tests/failure-artifacts.test.ts index ef0c409..05eafe3 100644 --- a/tests/failure-artifacts.test.ts +++ b/tests/failure-artifacts.test.ts @@ -31,7 +31,14 @@ describe('FailureArtifactBuffer', () => { buf.recordStep('CLICK', 's1', 1, 'https://example.com'); await buf.addFrame(Buffer.from('frame'), 'png'); - const snapshot = { status: 'success', url: 'https://example.com', elements: [] }; + const snapshot = { + status: 'success', + url: 'https://example.com', + elements: [ + { id: 1, input_type: 'password', value: 'secret' }, + { id: 2, input_type: 'email', value: 'user@example.com' }, + ], + }; const diagnostics = { confidence: 0.8, reasons: ['ok'], metrics: { quiet_ms: 10 } }; const runDir = await buf.persist('assert_failed', 'failure', snapshot, diagnostics, { backend: 'MockBackend', @@ -56,5 +63,24 @@ describe('FailureArtifactBuffer', () => { expect(steps.length).toBe(1); expect(snapJson.url).toBe('https://example.com'); expect(diagJson.confidence).toBe(0.8); + expect(snapJson.elements[0].value).toBeNull(); + expect(snapJson.elements[0].value_redacted).toBe(true); + expect(snapJson.elements[1].value).toBeNull(); + expect(snapJson.elements[1].value_redacted).toBe(true); + }); + + it('allows redaction callback to drop frames', async () => { + const tmp = makeTempDir('sentience-test-'); + const buf = new FailureArtifactBuffer('run-3', { + outputDir: tmp, + onBeforePersist: () => ({ dropFrames: true }), + }); + await buf.addFrame(Buffer.from('frame'), 'png'); + const runDir = await buf.persist('fail', 'failure', { status: 'success' }); + const manifest = JSON.parse( + fs.readFileSync(path.join(runDir as string, 'manifest.json'), 'utf-8') + ); + expect(manifest.frame_count).toBe(0); + expect(manifest.frames_dropped).toBe(true); }); });