Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,13 @@ jobs:
echo "Warning: src/extension not found, tests may fail"
fi

- name: Run tests
- name: Phase 0 regression safety net
run: |
npm test -- agent-runtime-assertions.test.ts
env:
CI: true

- name: Run full test suite
run: |
npm test
env:
Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,31 @@ if (runtime.assertDone(exists("text~'Example'"), 'task_complete')) {
console.log(`Task done: ${runtime.isTaskDone}`);
```

### Failure Artifact Buffer (Phase 1)

Capture a short ring buffer of screenshots and persist them when a required assertion fails.

```typescript
runtime.enableFailureArtifacts({ bufferSeconds: 15, captureOnAction: true, fps: 0 });

// After each action, record it (best-effort).
await runtime.recordAction('CLICK');
```

### Redaction callback (Phase 3)

Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction.

```typescript
import { RedactionContext, RedactionResult } from 'sentienceapi';

const redact = (_ctx: RedactionContext): RedactionResult => {
return { dropFrames: true };
};

runtime.enableFailureArtifacts({ onBeforePersist: redact });
```

**See examples:** [`examples/asserts/`](examples/asserts/)

## 🚀 Quick Start: Choose Your Abstraction Level
Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

125 changes: 125 additions & 0 deletions src/agent-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import { Snapshot } from './types';
import { AssertContext, Predicate } from './verification';
import { Tracer } from './tracing/tracer';
import { LLMProvider } from './llm-provider';
import { FailureArtifactBuffer, FailureArtifactsOptions } from './failure-artifacts';

// Define a minimal browser interface to avoid circular dependencies
interface BrowserLike {
Expand Down Expand Up @@ -185,6 +186,11 @@ export class AssertionHandle {
},
true
);
if (this.required && !passed) {
(this.runtime as any).persistFailureArtifacts(
`assert_eventually_failed:${this.label}`
);
}
return passed;
} catch {
// fall through to snapshot_exhausted
Expand Down Expand Up @@ -216,6 +222,9 @@ export class AssertionHandle {
},
true
);
if (this.required) {
(this.runtime as any).persistFailureArtifacts(`assert_eventually_failed:${this.label}`);
}
return false;
}

Expand All @@ -233,6 +242,11 @@ export class AssertionHandle {
},
true
);
if (this.required) {
(this.runtime as any).persistFailureArtifacts(
`assert_eventually_timeout:${this.label}`
);
}
return false;
}

Expand Down Expand Up @@ -272,6 +286,9 @@ export class AssertionHandle {
{ eventually: true, attempt, final: true, timeout: true },
true
);
if (this.required) {
(this.runtime as any).persistFailureArtifacts(`assert_eventually_timeout:${this.label}`);
}
return false;
}

Expand Down Expand Up @@ -306,6 +323,10 @@ export class AgentRuntime {
/** Most recent snapshot (for assertion context) */
lastSnapshot: Snapshot | null = null;

/** Failure artifact buffer (Phase 1) */
private artifactBuffer: FailureArtifactBuffer | null = null;
private artifactTimer: NodeJS.Timeout | null = null;

/** Assertions accumulated during current step */
private assertionsThisStep: AssertionRecord[] = [];
/** Task completion tracking */
Expand Down Expand Up @@ -432,6 +453,105 @@ export class AgentRuntime {
return this.lastSnapshot;
}

/**
* Enable failure artifact buffer (Phase 1).
*/
enableFailureArtifacts(options: FailureArtifactsOptions = {}): void {
this.artifactBuffer = new FailureArtifactBuffer(this.tracer.getRunId(), options);
const fps = this.artifactBuffer.getOptions().fps;
if (fps && fps > 0) {
const intervalMs = Math.max(1, Math.floor(1000 / fps));
this.artifactTimer = setInterval(() => {
this.captureArtifactFrame().catch(() => {
// best-effort
});
}, intervalMs);
}
}

/**
* Disable failure artifact buffer and stop background capture.
*/
disableFailureArtifacts(): void {
if (this.artifactTimer) {
clearInterval(this.artifactTimer);
this.artifactTimer = null;
}
}

/**
* Record an action in the artifact timeline and capture a frame if enabled.
*/
async recordAction(action: string, url?: string): Promise<void> {
if (!this.artifactBuffer) {
return;
}
this.artifactBuffer.recordStep(action, this.stepId, this.stepIndex, url);
if (this.artifactBuffer.getOptions().captureOnAction) {
await this.captureArtifactFrame();
}
}

private async captureArtifactFrame(): Promise<void> {
if (!this.artifactBuffer) {
return;
}
try {
const image = await this.page.screenshot({ type: 'jpeg', quality: 80 });
await this.artifactBuffer.addFrame(image, 'jpeg');
} catch {
// best-effort
}
}

/**
* Finalize artifact buffer at end of run.
*/
async finalizeRun(success: boolean): Promise<void> {
if (!this.artifactBuffer) {
return;
}
if (success) {
if (this.artifactBuffer.getOptions().persistMode === 'always') {
await this.artifactBuffer.persist(
'success',
'success',
this.lastSnapshot ?? undefined,
this.lastSnapshot?.diagnostics,
this.artifactMetadata()
);
}
await this.artifactBuffer.cleanup();
} else {
await this.persistFailureArtifacts('finalize_failure');
}
}

private async persistFailureArtifacts(reason: string): Promise<void> {
if (!this.artifactBuffer) {
return;
}
await this.artifactBuffer.persist(
reason,
'failure',
this.lastSnapshot ?? undefined,
this.lastSnapshot?.diagnostics,
this.artifactMetadata()
);
await this.artifactBuffer.cleanup();
if (this.artifactBuffer.getOptions().persistMode === 'onFail') {
this.disableFailureArtifacts();
}
}

private artifactMetadata(): Record<string, any> {
const url = this.lastSnapshot?.url ?? this.page?.url?.();
return {
backend: 'playwright',
url,
};
}

/**
* Begin a new step in the verification loop.
*
Expand Down Expand Up @@ -476,6 +596,11 @@ export class AgentRuntime {
assert(predicate: Predicate, label: string, required: boolean = false): boolean {
const outcome = predicate(this.ctx());
this._recordOutcome(outcome, label, required, null, true);
if (required && !outcome.passed) {
this.persistFailureArtifacts(`assert_failed:${label}`).catch(() => {
// best-effort
});
}
return outcome.passed;
}

Expand Down
Loading
Loading