Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/agent-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -863,9 +863,43 @@ export class AgentRuntime {
const iframeHits = evidence?.iframe_src_hits ?? [];
const urlHits = evidence?.url_hits ?? [];
const textHits = evidence?.text_hits ?? [];
const selectorHits = evidence?.selector_hits ?? [];
if (iframeHits.length === 0 && urlHits.length === 0 && textHits.length === 0) {
return false;
}
// Heuristic: many sites include passive reCAPTCHA badges (v3) that should not block.
// Only block when there is evidence of an interactive challenge.
const hitsAll = [...iframeHits, ...urlHits, ...textHits, ...selectorHits];
const hitsLower = hitsAll.map(hit => String(hit || '').toLowerCase()).filter(Boolean);
const joinedHits = hitsLower.join(' ');
const strongText = [
"i'm not a robot",
'verify you are human',
'human verification',
'complete the security check',
'please verify',
].some(needle => joinedHits.includes(needle));
const strongIframe = hitsLower.some(hit =>
['api2/bframe', 'hcaptcha', 'turnstile'].some(needle => hit.includes(needle))
);
const strongSelector = hitsLower.some(hit =>
[
'g-recaptcha-response',
'h-captcha-response',
'cf-turnstile-response',
'recaptcha-checkbox',
'hcaptcha-checkbox',
].some(needle => hit.includes(needle))
);
const onlyGeneric =
!strongText &&
!strongIframe &&
!strongSelector &&
hitsLower.length > 0 &&
hitsLower.every(hit => hit.includes('captcha') || hit.includes('recaptcha'));
if (onlyGeneric) {
return false;
}
const confidence = captcha.confidence ?? 0;
const minConfidence = options.minConfidence ?? DEFAULT_CAPTCHA_OPTIONS.minConfidence;
return confidence >= minConfidence;
Expand Down
2 changes: 2 additions & 0 deletions src/backends/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@
gridId?: number | null;
/** Use server-side API (Pro/Enterprise tier) */
useApi?: boolean;
/** Gateway snapshot timeout (milliseconds) */
gatewayTimeoutMs?: number;
/** API key for server-side processing */
sentienceApiKey?: string;
/** Goal/task description for ordinal support and gateway reranking */
Expand Down Expand Up @@ -284,7 +286,7 @@

while (true) {
const elapsed = Date.now() - startTime;
pollCount++;

Check warning on line 289 in src/backends/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (windows-latest, 20)

'pollCount' is assigned a value but never used. Allowed unused vars must match /^_/u

Check warning on line 289 in src/backends/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 20)

'pollCount' is assigned a value but never used. Allowed unused vars must match /^_/u

Check warning on line 289 in src/backends/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (macos-latest, 20)

'pollCount' is assigned a value but never used. Allowed unused vars must match /^_/u

if (elapsed >= timeoutMs) {
// Gather diagnostics
Expand Down
20 changes: 15 additions & 5 deletions src/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
min_z_index?: number;
};
use_api?: boolean; // Force use of server-side API if True, local extension if False
gatewayTimeoutMs?: number; // Gateway snapshot timeout in milliseconds
save_trace?: boolean; // Save raw_elements to JSON for benchmarking/training
trace_path?: string; // Path to save trace file (default: "trace_{timestamp}.json")
goal?: string; // Optional goal/task description for the snapshot
Expand Down Expand Up @@ -250,7 +251,7 @@
() => typeof (window as any).sentience !== 'undefined',
5000
);
} catch (_e) {

Check warning on line 254 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (windows-latest, 20)

'_e' is defined but never used

Check warning on line 254 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 20)

'_e' is defined but never used

Check warning on line 254 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (macos-latest, 20)

'_e' is defined but never used
throw new Error(
'Sentience extension failed to inject. Cannot collect raw data for API processing.'
);
Expand Down Expand Up @@ -317,17 +318,26 @@
};

try {
const response = await fetch(gatewayUrl, {
method: 'POST',
headers,
body: payloadJson,
});
const timeoutMs = options.gatewayTimeoutMs ?? 30000;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
let response: Response;
try {
response = await fetch(gatewayUrl, {
method: 'POST',
headers,
body: payloadJson,
signal: controller.signal,
});
} finally {
clearTimeout(timeoutId);
}

if (!response.ok) {
let errorText: string | undefined = undefined;
try {
errorText = await response.text();
} catch (_e) {

Check warning on line 340 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (windows-latest, 20)

'_e' is defined but never used

Check warning on line 340 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 20)

'_e' is defined but never used

Check warning on line 340 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (macos-latest, 20)

'_e' is defined but never used
errorText = undefined;
}
const requestId =
Expand Down
83 changes: 83 additions & 0 deletions tests/agent-runtime-captcha-detection.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import { AgentRuntime } from '../src/agent-runtime';
import { TraceSink } from '../src/tracing/sink';
import { Tracer } from '../src/tracing/tracer';
import { CaptchaDiagnostics, Snapshot } from '../src/types';
import { MockPage } from './mocks/browser-mock';

class MockSink extends TraceSink {
public events: any[] = [];
emit(event: Record<string, any>): void {
this.events.push(event);
}
async close(): Promise<void> {
// no-op
}
getSinkType(): string {
return 'MockSink';
}
}

function makeRuntime(): AgentRuntime {
const sink = new MockSink();
const tracer = new Tracer('test-run', sink);
const page = new MockPage('https://example.com') as any;
const browserLike = {
snapshot: async () => ({
status: 'success',
url: 'https://example.com',
elements: [],
timestamp: 't1',
}),
};
const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
runtime.setCaptchaOptions({ minConfidence: 0.1, policy: 'abort' });
return runtime;
}

function makeSnapshot(captcha: CaptchaDiagnostics): Snapshot {
return {
status: 'success',
url: 'https://example.com',
elements: [],
diagnostics: { captcha },
timestamp: 't1',
};
}

describe('AgentRuntime captcha detection', () => {
it('ignores passive recaptcha badges', () => {
const runtime = makeRuntime();
const captcha: CaptchaDiagnostics = {
detected: true,
confidence: 0.95,
provider_hint: 'recaptcha',
evidence: {
iframe_src_hits: ['https://www.google.com/recaptcha/api2/anchor?ar=1'],
selector_hits: [],
text_hits: [],
url_hits: [],
},
};

const detected = (runtime as any).isCaptchaDetected(makeSnapshot(captcha));
expect(detected).toBe(false);
});

it('detects interactive captcha challenges', () => {
const runtime = makeRuntime();
const captcha: CaptchaDiagnostics = {
detected: true,
confidence: 0.95,
provider_hint: 'recaptcha',
evidence: {
iframe_src_hits: [],
selector_hits: [],
text_hits: ["I'm not a robot"],
url_hits: [],
},
};

const detected = (runtime as any).isCaptchaDetected(makeSnapshot(captcha));
expect(detected).toBe(true);
});
});
86 changes: 86 additions & 0 deletions tests/snapshot-gateway-timeout.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import { snapshot } from '../src';
import { BrowserEvaluator } from '../src/utils/browser-evaluator';

jest.mock('../src/utils/browser-evaluator', () => ({
BrowserEvaluator: {
waitForCondition: jest.fn().mockResolvedValue(true),
evaluate: jest.fn(),
},
}));

const mockedBrowserEvaluator = BrowserEvaluator as jest.Mocked<typeof BrowserEvaluator>;

function makeBrowser() {
return {
getApiKey: () => 'sk_test',
getApiUrl: () => 'https://api.sentienceapi.com',
getPage: () => ({}),
} as any;
}

describe('Snapshot gateway timeout', () => {
const rawResult = {
raw_elements: [],
url: 'https://example.com',
viewport: { width: 800, height: 600 },
diagnostics: {},
};

beforeEach(() => {
mockedBrowserEvaluator.evaluate.mockResolvedValue(rawResult as any);
});

afterEach(() => {
jest.restoreAllMocks();
});

it('uses default gateway timeout when not provided', async () => {
const fetchMock = jest.fn().mockResolvedValue({
ok: true,
json: async () => ({ status: 'success', elements: [], url: 'https://example.com' }),
headers: new Headers(),
});
(global as any).fetch = fetchMock;

const setTimeoutSpy = jest.spyOn(global, 'setTimeout').mockImplementation(((
fn: (...args: any[]) => void,
_ms?: number
) => {
return 123 as any;
}) as any);
const clearTimeoutSpy = jest.spyOn(global, 'clearTimeout').mockImplementation(() => {});

await snapshot(makeBrowser(), { screenshot: false, limit: 10 });

expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 30000);
expect(clearTimeoutSpy).toHaveBeenCalledWith(123);
expect(fetchMock).toHaveBeenCalledWith(
'https://api.sentienceapi.com/v1/snapshot',
expect.objectContaining({
signal: expect.any(AbortSignal),
})
);
});

it('uses custom gateway timeout when provided', async () => {
const fetchMock = jest.fn().mockResolvedValue({
ok: true,
json: async () => ({ status: 'success', elements: [], url: 'https://example.com' }),
headers: new Headers(),
});
(global as any).fetch = fetchMock;

const setTimeoutSpy = jest.spyOn(global, 'setTimeout').mockImplementation(((
fn: (...args: any[]) => void,
_ms?: number
) => {
return 456 as any;
}) as any);
const clearTimeoutSpy = jest.spyOn(global, 'clearTimeout').mockImplementation(() => {});

await snapshot(makeBrowser(), { screenshot: false, limit: 10, gatewayTimeoutMs: 12345 });

expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345);
expect(clearTimeoutSpy).toHaveBeenCalledWith(456);
});
});
Loading