diff --git a/README.md b/README.md index b1cbaa5..488b065 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify brow import { SentienceBrowser, AgentRuntime, + HumanHandoffSolver, urlContains, exists, allOf, @@ -75,6 +76,12 @@ const ok = await runtime .eventually({ timeoutMs: 10_000, pollMs: 250, minConfidence: 0.7, maxSnapshotAttempts: 3 }); console.log('eventually() result:', ok); +// CAPTCHA handling (detection + handoff + verify) +runtime.setCaptchaOptions({ + policy: 'callback', + handler: HumanHandoffSolver(), +}); + // Check task completion if (runtime.assertDone(exists("text~'Example'"), 'task_complete')) { console.log('✅ Task completed!'); @@ -83,6 +90,30 @@ if (runtime.assertDone(exists("text~'Example'"), 'task_complete')) { console.log(`Task done: ${runtime.isTaskDone}`); ``` +#### CAPTCHA strategies (Batteries Included) + +```typescript +import { ExternalSolver, HumanHandoffSolver, VisionSolver } from 'sentienceapi'; + +// Human-in-loop +runtime.setCaptchaOptions({ policy: 'callback', handler: HumanHandoffSolver() }); + +// Vision verification only +runtime.setCaptchaOptions({ policy: 'callback', handler: VisionSolver() }); + +// External system/webhook +runtime.setCaptchaOptions({ + policy: 'callback', + handler: ExternalSolver(async ctx => { + await fetch(process.env.CAPTCHA_WEBHOOK_URL!, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ runId: ctx.runId, url: ctx.url }), + }); + }), +}); +``` + ### Failure Artifact Buffer (Phase 1) Capture a short ring buffer of screenshots and persist them when a required assertion fails. diff --git a/examples/agent-runtime-captcha-strategies.ts b/examples/agent-runtime-captcha-strategies.ts new file mode 100644 index 0000000..91cfa0e --- /dev/null +++ b/examples/agent-runtime-captcha-strategies.ts @@ -0,0 +1,48 @@ +import { + AgentRuntime, + CaptchaOptions, + ExternalSolver, + HumanHandoffSolver, + SentienceBrowser, + VisionSolver, +} from 'sentienceapi'; +import { createTracer } from 'sentienceapi'; + +async function notifyWebhook(ctx: any): Promise { + console.log(`[captcha] external resolver notified: url=${ctx.url} run_id=${ctx.runId}`); +} + +async function main(): Promise { + const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY }); + const tracer = await createTracer({ runId: 'captcha-demo', uploadTrace: false }); + + const browserAdapter = { + snapshot: async (_page: any, options?: Record) => { + return await browser.snapshot(options); + }, + }; + const runtime = new AgentRuntime(browserAdapter as any, browser.getPage() as any, tracer); + + // Option 1: Human-in-loop + runtime.setCaptchaOptions({ policy: 'callback', handler: HumanHandoffSolver() }); + + // Option 2: Vision-only verification (no actions) + runtime.setCaptchaOptions({ policy: 'callback', handler: VisionSolver() }); + + // Option 3: External resolver orchestration + runtime.setCaptchaOptions({ + policy: 'callback', + handler: ExternalSolver(async ctx => notifyWebhook(ctx)), + }); + + await browser.getPage().goto(process.env.CAPTCHA_TEST_URL ?? 'https://example.com'); + runtime.beginStep('Captcha-aware snapshot'); + await runtime.snapshot(); + + await browser.close(); +} + +main().catch(err => { + console.error(err); + process.exit(1); +}); diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts index 3c5a951..d74d363 100644 --- a/src/agent-runtime.ts +++ b/src/agent-runtime.ts @@ -45,12 +45,27 @@ import { AssertContext, Predicate } from './verification'; import { Tracer } from './tracing/tracer'; import { LLMProvider } from './llm-provider'; import { FailureArtifactBuffer, FailureArtifactsOptions } from './failure-artifacts'; +import { + CaptchaContext, + CaptchaHandlingError, + CaptchaOptions, + CaptchaResolution, + CaptchaSource, +} from './captcha/types'; // Define a minimal browser interface to avoid circular dependencies interface BrowserLike { snapshot(page: Page, options?: Record): Promise; } +const DEFAULT_CAPTCHA_OPTIONS: Required> = { + policy: 'abort', + minConfidence: 0.7, + timeoutMs: 120_000, + pollMs: 1_000, + maxRetriesNewSession: 1, +}; + /** * Assertion record for accumulation and step_end emission. */ @@ -333,6 +348,10 @@ export class AgentRuntime { private taskDone: boolean = false; private taskDoneLabel: string | null = null; + /** CAPTCHA handling (optional, disabled by default) */ + private captchaOptions: CaptchaOptions | null = null; + private captchaRetryCount: number = 0; + private static similarity(a: string, b: string): number { const s1 = a.toLowerCase(); const s2 = b.toLowerCase(); @@ -422,6 +441,17 @@ export class AgentRuntime { this.tracer = tracer; } + /** + * Configure CAPTCHA handling (disabled by default unless set). + */ + setCaptchaOptions(options: CaptchaOptions): void { + this.captchaOptions = { + ...DEFAULT_CAPTCHA_OPTIONS, + ...options, + }; + this.captchaRetryCount = 0; + } + /** * Build assertion context from current state. */ @@ -449,10 +479,159 @@ export class AgentRuntime { * @returns Snapshot of current page state */ async snapshot(options?: Record): Promise { - this.lastSnapshot = await this.browser.snapshot(this.page, options); + const { _skipCaptchaHandling, ...snapshotOptions } = options || {}; + this.lastSnapshot = await this.browser.snapshot(this.page, snapshotOptions); + if (!_skipCaptchaHandling) { + await this.handleCaptchaIfNeeded(this.lastSnapshot, 'gateway'); + } return this.lastSnapshot; } + private isCaptchaDetected(snapshot: Snapshot): boolean { + const options = this.captchaOptions; + if (!options) { + return false; + } + const captcha = snapshot.diagnostics?.captcha; + if (!captcha || !captcha.detected) { + return false; + } + const confidence = captcha.confidence ?? 0; + const minConfidence = options.minConfidence ?? DEFAULT_CAPTCHA_OPTIONS.minConfidence; + return confidence >= minConfidence; + } + + private buildCaptchaContext(snapshot: Snapshot, source: CaptchaSource): CaptchaContext { + return { + runId: this.tracer.getRunId(), + stepIndex: this.stepIndex, + url: snapshot.url, + source, + captcha: snapshot.diagnostics?.captcha ?? null, + }; + } + + private emitCaptchaEvent(reasonCode: string, details: Record = {}): void { + this.tracer.emit( + 'verification', + { + kind: 'captcha', + passed: false, + label: reasonCode, + details: { reason_code: reasonCode, ...details }, + }, + this.stepId || undefined + ); + } + + private async handleCaptchaIfNeeded(snapshot: Snapshot, source: CaptchaSource): Promise { + if (!this.captchaOptions) { + return; + } + if (!this.isCaptchaDetected(snapshot)) { + return; + } + + const options = this.captchaOptions; + const minConfidence = options.minConfidence ?? DEFAULT_CAPTCHA_OPTIONS.minConfidence; + const captcha = snapshot.diagnostics?.captcha ?? null; + + this.emitCaptchaEvent('captcha_detected', { captcha, min_confidence: minConfidence }); + + let resolution: CaptchaResolution; + if (options.policy === 'callback') { + if (!options.handler) { + this.emitCaptchaEvent('captcha_handler_error'); + throw new CaptchaHandlingError( + 'captcha_handler_error', + 'Captcha handler is required for policy="callback".' + ); + } + try { + resolution = await options.handler(this.buildCaptchaContext(snapshot, source)); + } catch (err: any) { + this.emitCaptchaEvent('captcha_handler_error', { error: String(err?.message || err) }); + throw new CaptchaHandlingError('captcha_handler_error', 'Captcha handler failed.', { + error: String(err?.message || err), + }); + } + if (!resolution || !resolution.action) { + this.emitCaptchaEvent('captcha_handler_error'); + throw new CaptchaHandlingError( + 'captcha_handler_error', + 'Captcha handler returned an invalid resolution.' + ); + } + } else { + resolution = { action: 'abort' }; + } + + await this.applyCaptchaResolution(resolution, snapshot, source); + } + + private async applyCaptchaResolution( + resolution: CaptchaResolution, + snapshot: Snapshot, + source: CaptchaSource + ): Promise { + const options = this.captchaOptions || DEFAULT_CAPTCHA_OPTIONS; + if (resolution.action === 'abort') { + this.emitCaptchaEvent('captcha_policy_abort', { message: resolution.message }); + throw new CaptchaHandlingError( + 'captcha_policy_abort', + resolution.message || 'Captcha detected. Aborting per policy.' + ); + } + + if (resolution.action === 'retry_new_session') { + this.captchaRetryCount += 1; + this.emitCaptchaEvent('captcha_retry_new_session'); + if (this.captchaRetryCount > (options.maxRetriesNewSession ?? 1)) { + this.emitCaptchaEvent('captcha_retry_exhausted'); + throw new CaptchaHandlingError( + 'captcha_retry_exhausted', + 'Captcha retry_new_session exhausted.' + ); + } + const resetSession = this.captchaOptions?.resetSession; + if (!resetSession) { + throw new CaptchaHandlingError( + 'captcha_retry_new_session', + 'resetSession callback is required for retry_new_session.' + ); + } + await resetSession(); + return; + } + + if (resolution.action === 'wait_until_cleared') { + const timeoutMs = + resolution.timeoutMs ?? options.timeoutMs ?? DEFAULT_CAPTCHA_OPTIONS.timeoutMs; + const pollMs = resolution.pollMs ?? options.pollMs ?? DEFAULT_CAPTCHA_OPTIONS.pollMs; + await this.waitUntilCleared(timeoutMs, pollMs, snapshot, source); + this.emitCaptchaEvent('captcha_resumed'); + } + } + + private async waitUntilCleared( + timeoutMs: number, + pollMs: number, + snapshot: Snapshot, + source: CaptchaSource + ): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() <= deadline) { + await new Promise(res => setTimeout(res, pollMs)); + const next = await this.snapshot({ _skipCaptchaHandling: true }); + if (!this.isCaptchaDetected(next)) { + this.emitCaptchaEvent('captcha_cleared', { source }); + return; + } + } + this.emitCaptchaEvent('captcha_wait_timeout', { timeout_ms: timeoutMs }); + throw new CaptchaHandlingError('captcha_wait_timeout', 'Captcha wait_until_cleared timed out.'); + } + /** * Enable failure artifact buffer (Phase 1). */ diff --git a/src/captcha/strategies.ts b/src/captcha/strategies.ts new file mode 100644 index 0000000..e872735 --- /dev/null +++ b/src/captcha/strategies.ts @@ -0,0 +1,51 @@ +import { CaptchaHandler, CaptchaResolution } from './types'; + +type StrategyOptions = { + message?: string; + handledBy?: 'human' | 'customer_system' | 'unknown'; + timeoutMs?: number; + pollMs?: number; +}; + +export function HumanHandoffSolver(options: StrategyOptions = {}): CaptchaHandler { + return () => { + const resolution: CaptchaResolution = { + action: 'wait_until_cleared', + message: options.message ?? 'Solve CAPTCHA in the live session, then resume.', + handledBy: options.handledBy ?? 'human', + timeoutMs: options.timeoutMs, + pollMs: options.pollMs, + }; + return Promise.resolve(resolution); + }; +} + +export function VisionSolver(options: StrategyOptions = {}): CaptchaHandler { + return () => { + const resolution: CaptchaResolution = { + action: 'wait_until_cleared', + message: options.message ?? 'Waiting for CAPTCHA to clear (vision verification).', + handledBy: options.handledBy ?? 'customer_system', + timeoutMs: options.timeoutMs, + pollMs: options.pollMs, + }; + return Promise.resolve(resolution); + }; +} + +export function ExternalSolver( + resolver: (ctx: any) => Promise, + options: StrategyOptions = {} +): CaptchaHandler { + return async ctx => { + await resolver(ctx); + const resolution: CaptchaResolution = { + action: 'wait_until_cleared', + message: options.message ?? 'External solver invoked; waiting for clearance.', + handledBy: options.handledBy ?? 'customer_system', + timeoutMs: options.timeoutMs, + pollMs: options.pollMs, + }; + return resolution; + }; +} diff --git a/src/captcha/types.ts b/src/captcha/types.ts new file mode 100644 index 0000000..e57b46a --- /dev/null +++ b/src/captcha/types.ts @@ -0,0 +1,52 @@ +import { CaptchaDiagnostics } from '../types'; + +export type CaptchaPolicy = 'abort' | 'callback'; +export type CaptchaAction = 'abort' | 'retry_new_session' | 'wait_until_cleared'; + +export type CaptchaSource = 'extension' | 'gateway' | 'runtime'; + +export interface CaptchaContext { + runId: string; + stepIndex: number; + url: string; + source: CaptchaSource; + captcha: CaptchaDiagnostics | null; + screenshotPath?: string; + framesDir?: string; + snapshotPath?: string; + liveSessionUrl?: string; + meta?: Record; +} + +export interface CaptchaResolution { + action: CaptchaAction; + message?: string; + handledBy?: 'human' | 'customer_system' | 'unknown'; + timeoutMs?: number; + pollMs?: number; +} + +export type CaptchaHandler = ( + ctx: CaptchaContext +) => CaptchaResolution | Promise; + +export interface CaptchaOptions { + policy?: CaptchaPolicy; + minConfidence?: number; + timeoutMs?: number; + pollMs?: number; + maxRetriesNewSession?: number; + handler?: CaptchaHandler; + resetSession?: () => Promise; +} + +export class CaptchaHandlingError extends Error { + reasonCode: string; + details?: Record; + + constructor(reasonCode: string, message: string, details?: Record) { + super(message); + this.reasonCode = reasonCode; + this.details = details; + } +} diff --git a/src/index.ts b/src/index.ts index 3ae5fa0..7f6478f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -70,6 +70,8 @@ export { isCollapsed, } from './verification'; export { AgentRuntime, AssertionHandle, AssertionRecord, EventuallyOptions } from './agent-runtime'; +export * from './captcha/types'; +export * from './captcha/strategies'; // Ordinal Support (Phase 3) export { diff --git a/src/snapshot.ts b/src/snapshot.ts index 821cd5d..08e38aa 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -239,6 +239,9 @@ async function snapshotViaApi( // Use raw_elements (raw data) instead of elements (processed data) // Server validates API key and applies proprietary ranking logic const clientMetrics = rawResult?.diagnostics?.metrics; + const clientDiagnostics = rawResult?.diagnostics?.captcha + ? { captcha: rawResult.diagnostics.captcha } + : undefined; const payload = { raw_elements: rawResult.raw_elements || [], // Raw data needed for server processing url: rawResult.url || '', @@ -249,6 +252,7 @@ async function snapshotViaApi( filter: options.filter, }, client_metrics: clientMetrics || undefined, + client_diagnostics: clientDiagnostics, }; // Check payload size before sending (server has 10MB limit) diff --git a/src/tracing/types.ts b/src/tracing/types.ts index 3c133ea..79448cf 100644 --- a/src/tracing/types.ts +++ b/src/tracing/types.ts @@ -184,7 +184,7 @@ export interface TraceEventData { verify?: VerifyData; // Verification event fields (for assertion loop) - kind?: 'assert' | 'task_done'; + kind?: 'assert' | 'task_done' | 'captcha'; label?: string; passed?: boolean; required?: boolean; diff --git a/src/types.ts b/src/types.ts index 896dc40..5132443 100644 --- a/src/types.ts +++ b/src/types.ts @@ -157,10 +157,25 @@ export interface SnapshotDiagnosticsMetrics { raw_elements_count?: number | null; } +export interface CaptchaEvidence { + text_hits: string[]; + selector_hits: string[]; + iframe_src_hits: string[]; + url_hits: string[]; +} + +export interface CaptchaDiagnostics { + detected: boolean; + provider_hint?: 'recaptcha' | 'hcaptcha' | 'turnstile' | 'arkose' | 'awswaf' | 'unknown' | null; + confidence: number; + evidence: CaptchaEvidence; +} + export interface SnapshotDiagnostics { confidence?: number | null; reasons?: string[]; metrics?: SnapshotDiagnosticsMetrics; + captcha?: CaptchaDiagnostics; } /** diff --git a/tests/agent-runtime-regression-safety.test.ts b/tests/agent-runtime-regression-safety.test.ts new file mode 100644 index 0000000..64e322a --- /dev/null +++ b/tests/agent-runtime-regression-safety.test.ts @@ -0,0 +1,215 @@ +import { AgentRuntime } from '../src/agent-runtime'; +import { ActionExecutor } from '../src/utils/action-executor'; +import { TraceSink } from '../src/tracing/sink'; +import { Tracer } from '../src/tracing/tracer'; +import { isChecked, isDisabled, isEnabled, valueEquals } from '../src/verification'; +import { BBox, Element, Snapshot, VisualCues } from '../src/types'; +import { MockPage } from './mocks/browser-mock'; +import * as actionsModule from '../src/actions'; + +jest.mock('../src/actions'); + +class MockSink extends TraceSink { + public events: any[] = []; + emit(event: Record): void { + this.events.push(event); + } + async close(): Promise { + // no-op + } + getSinkType(): string { + return 'MockSink'; + } +} + +function makeElement( + id: number, + role: string, + text: string | null, + extras: Partial = {} +): Element { + const cues: VisualCues = { + is_primary: false, + background_color_name: null, + is_clickable: true, + }; + return { + id, + role, + text: text ?? undefined, + importance: 10, + bbox: { x: 0, y: 0, width: 100, height: 40 } as BBox, + visual_cues: cues, + ...extras, + } as Element; +} + +describe('AgentRuntime regression safety net', () => { + it('v1 state assertions: enabled/disabled/checked/value', () => { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + + const elements: Element[] = [ + makeElement(1, 'button', 'Submit', { disabled: false }), + makeElement(2, 'checkbox', null, { checked: true }), + makeElement(3, 'textbox', null, { value: 'hello', input_type: 'text' }), + makeElement(4, 'button', 'Disabled', { disabled: true }), + ]; + + const snapshot: Snapshot = { + status: 'success', + url: 'https://example.com', + elements, + timestamp: 't1', + }; + + const browserLike = { + snapshot: async () => snapshot, + }; + + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.beginStep('Test'); + runtime.lastSnapshot = snapshot; + + expect(runtime.assert(isEnabled("text~'Submit'"), 'enabled')).toBe(true); + expect(runtime.assert(isDisabled("text~'Disabled'"), 'disabled')).toBe(true); + expect(runtime.assert(isChecked('role=checkbox'), 'checked')).toBe(true); + expect(runtime.assert(valueEquals('role=textbox', 'hello'), 'value')).toBe(true); + }); + + it('v2 eventually retry loop succeeds on later snapshot', async () => { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + + const snapshots: Snapshot[] = [ + { status: 'success', url: 'https://example.com', elements: [], timestamp: 't1' }, + { status: 'success', url: 'https://example.com', elements: [], timestamp: 't2' }, + { status: 'success', url: 'https://example.com/done', elements: [], timestamp: 't3' }, + ]; + + const browserLike = { + snapshot: async () => snapshots.shift() as Snapshot, + }; + + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.beginStep('Test'); + + const ok = await runtime + .check(ctx => { + const done = (ctx.url || '').endsWith('/done'); + return { + passed: done, + reason: done ? '' : 'not done', + details: { reason_code: done ? 'ok' : 'no_match' }, + }; + }, 'eventually_done') + .eventually({ timeoutMs: 2000, pollMs: 0 }); + + expect(ok).toBe(true); + }); + + it('minConfidence gating yields snapshot_exhausted', async () => { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + + const snapshots: Snapshot[] = [ + { + status: 'success', + url: 'https://example.com', + elements: [], + timestamp: 't1', + diagnostics: { confidence: 0.1 } as any, + }, + { + status: 'success', + url: 'https://example.com', + elements: [], + timestamp: 't2', + diagnostics: { confidence: 0.1 } as any, + }, + ]; + + const browserLike = { + snapshot: async () => snapshots.shift() as Snapshot, + }; + + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.beginStep('Test'); + + const ok = await runtime + .check(() => ({ passed: true, reason: '', details: {} }), 'min_confidence') + .eventually({ + timeoutMs: 2000, + pollMs: 0, + minConfidence: 0.7, + maxSnapshotAttempts: 2, + }); + + expect(ok).toBe(false); + const stepEnd = runtime.getAssertionsForStepEnd(); + expect((stepEnd.assertions[0] as any).details.reason_code).toBe('snapshot_exhausted'); + }); + + it('golden: same snapshots/actions yield same outcome (no captcha)', async () => { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + + const mockClick = actionsModule.click as jest.MockedFunction; + const mockTypeText = actionsModule.typeText as jest.MockedFunction< + typeof actionsModule.typeText + >; + mockClick.mockResolvedValue({ + success: true, + duration_ms: 10, + outcome: 'dom_updated', + url_changed: false, + }); + mockTypeText.mockResolvedValue({ + success: true, + duration_ms: 10, + outcome: 'dom_updated', + url_changed: false, + }); + + const snap: Snapshot = { + status: 'success', + url: 'https://example.com', + elements: [makeElement(1, 'button', 'Go'), makeElement(2, 'textbox', null)], + timestamp: 't1', + }; + + const mockBrowser = {} as any; + const executor = new ActionExecutor(mockBrowser, false); + await executor.executeAction('CLICK(1)', snap); + await executor.executeAction('TYPE(2, "hello")', snap); + + expect(mockClick).toHaveBeenCalledWith(mockBrowser, 1); + expect(mockTypeText).toHaveBeenCalledWith(mockBrowser, 2, 'hello'); + + const snapshots: Snapshot[] = [ + { status: 'success', url: 'https://example.com', elements: [], timestamp: 't1' }, + { status: 'success', url: 'https://example.com/after', elements: [], timestamp: 't2' }, + { status: 'success', url: 'https://example.com/done', elements: [], timestamp: 't3' }, + ]; + + const browserLike = { + snapshot: async () => snapshots.shift() as Snapshot, + }; + + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.beginStep('Test'); + + const ok = await runtime + .check(ctx => { + const done = (ctx.url || '').endsWith('/done'); + return { passed: done, reason: done ? '' : 'not done', details: {} }; + }, 'golden_flow') + .eventually({ timeoutMs: 2000, pollMs: 0 }); + + expect(ok).toBe(true); + }); +});