diff --git a/src/agent-runtime.ts b/src/agent-runtime.ts index a2d51f4..8025e3f 100644 --- a/src/agent-runtime.ts +++ b/src/agent-runtime.ts @@ -863,9 +863,43 @@ export class AgentRuntime { const iframeHits = evidence?.iframe_src_hits ?? []; const urlHits = evidence?.url_hits ?? []; const textHits = evidence?.text_hits ?? []; + const selectorHits = evidence?.selector_hits ?? []; if (iframeHits.length === 0 && urlHits.length === 0 && textHits.length === 0) { return false; } + // Heuristic: many sites include passive reCAPTCHA badges (v3) that should not block. + // Only block when there is evidence of an interactive challenge. + const hitsAll = [...iframeHits, ...urlHits, ...textHits, ...selectorHits]; + const hitsLower = hitsAll.map(hit => String(hit || '').toLowerCase()).filter(Boolean); + const joinedHits = hitsLower.join(' '); + const strongText = [ + "i'm not a robot", + 'verify you are human', + 'human verification', + 'complete the security check', + 'please verify', + ].some(needle => joinedHits.includes(needle)); + const strongIframe = hitsLower.some(hit => + ['api2/bframe', 'hcaptcha', 'turnstile'].some(needle => hit.includes(needle)) + ); + const strongSelector = hitsLower.some(hit => + [ + 'g-recaptcha-response', + 'h-captcha-response', + 'cf-turnstile-response', + 'recaptcha-checkbox', + 'hcaptcha-checkbox', + ].some(needle => hit.includes(needle)) + ); + const onlyGeneric = + !strongText && + !strongIframe && + !strongSelector && + hitsLower.length > 0 && + hitsLower.every(hit => hit.includes('captcha') || hit.includes('recaptcha')); + if (onlyGeneric) { + return false; + } const confidence = captcha.confidence ?? 0; const minConfidence = options.minConfidence ?? DEFAULT_CAPTCHA_OPTIONS.minConfidence; return confidence >= minConfidence; diff --git a/src/backends/snapshot.ts b/src/backends/snapshot.ts index 33cf7e5..c86057c 100644 --- a/src/backends/snapshot.ts +++ b/src/backends/snapshot.ts @@ -104,6 +104,8 @@ export interface SnapshotOptions { gridId?: number | null; /** Use server-side API (Pro/Enterprise tier) */ useApi?: boolean; + /** Gateway snapshot timeout (milliseconds) */ + gatewayTimeoutMs?: number; /** API key for server-side processing */ sentienceApiKey?: string; /** Goal/task description for ordinal support and gateway reranking */ diff --git a/src/snapshot.ts b/src/snapshot.ts index 2e07ac9..fdb552b 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -58,6 +58,7 @@ export interface SnapshotOptions { min_z_index?: number; }; use_api?: boolean; // Force use of server-side API if True, local extension if False + gatewayTimeoutMs?: number; // Gateway snapshot timeout in milliseconds save_trace?: boolean; // Save raw_elements to JSON for benchmarking/training trace_path?: string; // Path to save trace file (default: "trace_{timestamp}.json") goal?: string; // Optional goal/task description for the snapshot @@ -317,11 +318,20 @@ async function snapshotViaApi( }; try { - const response = await fetch(gatewayUrl, { - method: 'POST', - headers, - body: payloadJson, - }); + const timeoutMs = options.gatewayTimeoutMs ?? 30000; + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + let response: Response; + try { + response = await fetch(gatewayUrl, { + method: 'POST', + headers, + body: payloadJson, + signal: controller.signal, + }); + } finally { + clearTimeout(timeoutId); + } if (!response.ok) { let errorText: string | undefined = undefined; diff --git a/tests/agent-runtime-captcha-detection.test.ts b/tests/agent-runtime-captcha-detection.test.ts new file mode 100644 index 0000000..6907802 --- /dev/null +++ b/tests/agent-runtime-captcha-detection.test.ts @@ -0,0 +1,83 @@ +import { AgentRuntime } from '../src/agent-runtime'; +import { TraceSink } from '../src/tracing/sink'; +import { Tracer } from '../src/tracing/tracer'; +import { CaptchaDiagnostics, Snapshot } from '../src/types'; +import { MockPage } from './mocks/browser-mock'; + +class MockSink extends TraceSink { + public events: any[] = []; + emit(event: Record): void { + this.events.push(event); + } + async close(): Promise { + // no-op + } + getSinkType(): string { + return 'MockSink'; + } +} + +function makeRuntime(): AgentRuntime { + const sink = new MockSink(); + const tracer = new Tracer('test-run', sink); + const page = new MockPage('https://example.com') as any; + const browserLike = { + snapshot: async () => ({ + status: 'success', + url: 'https://example.com', + elements: [], + timestamp: 't1', + }), + }; + const runtime = new AgentRuntime(browserLike as any, page as any, tracer); + runtime.setCaptchaOptions({ minConfidence: 0.1, policy: 'abort' }); + return runtime; +} + +function makeSnapshot(captcha: CaptchaDiagnostics): Snapshot { + return { + status: 'success', + url: 'https://example.com', + elements: [], + diagnostics: { captcha }, + timestamp: 't1', + }; +} + +describe('AgentRuntime captcha detection', () => { + it('ignores passive recaptcha badges', () => { + const runtime = makeRuntime(); + const captcha: CaptchaDiagnostics = { + detected: true, + confidence: 0.95, + provider_hint: 'recaptcha', + evidence: { + iframe_src_hits: ['https://www.google.com/recaptcha/api2/anchor?ar=1'], + selector_hits: [], + text_hits: [], + url_hits: [], + }, + }; + + const detected = (runtime as any).isCaptchaDetected(makeSnapshot(captcha)); + expect(detected).toBe(false); + }); + + it('detects interactive captcha challenges', () => { + const runtime = makeRuntime(); + const captcha: CaptchaDiagnostics = { + detected: true, + confidence: 0.95, + provider_hint: 'recaptcha', + evidence: { + iframe_src_hits: [], + selector_hits: [], + text_hits: ["I'm not a robot"], + url_hits: [], + }, + }; + + const detected = (runtime as any).isCaptchaDetected(makeSnapshot(captcha)); + expect(detected).toBe(true); + }); +}); diff --git a/tests/snapshot-gateway-timeout.test.ts b/tests/snapshot-gateway-timeout.test.ts new file mode 100644 index 0000000..b0abcc6 --- /dev/null +++ b/tests/snapshot-gateway-timeout.test.ts @@ -0,0 +1,86 @@ +import { snapshot } from '../src'; +import { BrowserEvaluator } from '../src/utils/browser-evaluator'; + +jest.mock('../src/utils/browser-evaluator', () => ({ + BrowserEvaluator: { + waitForCondition: jest.fn().mockResolvedValue(true), + evaluate: jest.fn(), + }, +})); + +const mockedBrowserEvaluator = BrowserEvaluator as jest.Mocked; + +function makeBrowser() { + return { + getApiKey: () => 'sk_test', + getApiUrl: () => 'https://api.sentienceapi.com', + getPage: () => ({}), + } as any; +} + +describe('Snapshot gateway timeout', () => { + const rawResult = { + raw_elements: [], + url: 'https://example.com', + viewport: { width: 800, height: 600 }, + diagnostics: {}, + }; + + beforeEach(() => { + mockedBrowserEvaluator.evaluate.mockResolvedValue(rawResult as any); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('uses default gateway timeout when not provided', async () => { + const fetchMock = jest.fn().mockResolvedValue({ + ok: true, + json: async () => ({ status: 'success', elements: [], url: 'https://example.com' }), + headers: new Headers(), + }); + (global as any).fetch = fetchMock; + + const setTimeoutSpy = jest.spyOn(global, 'setTimeout').mockImplementation((( + fn: (...args: any[]) => void, + _ms?: number + ) => { + return 123 as any; + }) as any); + const clearTimeoutSpy = jest.spyOn(global, 'clearTimeout').mockImplementation(() => {}); + + await snapshot(makeBrowser(), { screenshot: false, limit: 10 }); + + expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 30000); + expect(clearTimeoutSpy).toHaveBeenCalledWith(123); + expect(fetchMock).toHaveBeenCalledWith( + 'https://api.sentienceapi.com/v1/snapshot', + expect.objectContaining({ + signal: expect.any(AbortSignal), + }) + ); + }); + + it('uses custom gateway timeout when provided', async () => { + const fetchMock = jest.fn().mockResolvedValue({ + ok: true, + json: async () => ({ status: 'success', elements: [], url: 'https://example.com' }), + headers: new Headers(), + }); + (global as any).fetch = fetchMock; + + const setTimeoutSpy = jest.spyOn(global, 'setTimeout').mockImplementation((( + fn: (...args: any[]) => void, + _ms?: number + ) => { + return 456 as any; + }) as any); + const clearTimeoutSpy = jest.spyOn(global, 'clearTimeout').mockImplementation(() => {}); + + await snapshot(makeBrowser(), { screenshot: false, limit: 10, gatewayTimeoutMs: 12345 }); + + expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345); + expect(clearTimeoutSpy).toHaveBeenCalledWith(456); + }); +});