Skip to content

Commit f69a8ca

Browse files
authored
Merge pull request #174 from SentienceAPI/tweak_time_recaptcha
timeout param + passive captcha handling
2 parents 196b87e + 89744d9 commit f69a8ca

File tree

5 files changed

+220
-5
lines changed

5 files changed

+220
-5
lines changed

src/agent-runtime.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,9 +863,43 @@ export class AgentRuntime {
863863
const iframeHits = evidence?.iframe_src_hits ?? [];
864864
const urlHits = evidence?.url_hits ?? [];
865865
const textHits = evidence?.text_hits ?? [];
866+
const selectorHits = evidence?.selector_hits ?? [];
866867
if (iframeHits.length === 0 && urlHits.length === 0 && textHits.length === 0) {
867868
return false;
868869
}
870+
// Heuristic: many sites include passive reCAPTCHA badges (v3) that should not block.
871+
// Only block when there is evidence of an interactive challenge.
872+
const hitsAll = [...iframeHits, ...urlHits, ...textHits, ...selectorHits];
873+
const hitsLower = hitsAll.map(hit => String(hit || '').toLowerCase()).filter(Boolean);
874+
const joinedHits = hitsLower.join(' ');
875+
const strongText = [
876+
"i'm not a robot",
877+
'verify you are human',
878+
'human verification',
879+
'complete the security check',
880+
'please verify',
881+
].some(needle => joinedHits.includes(needle));
882+
const strongIframe = hitsLower.some(hit =>
883+
['api2/bframe', 'hcaptcha', 'turnstile'].some(needle => hit.includes(needle))
884+
);
885+
const strongSelector = hitsLower.some(hit =>
886+
[
887+
'g-recaptcha-response',
888+
'h-captcha-response',
889+
'cf-turnstile-response',
890+
'recaptcha-checkbox',
891+
'hcaptcha-checkbox',
892+
].some(needle => hit.includes(needle))
893+
);
894+
const onlyGeneric =
895+
!strongText &&
896+
!strongIframe &&
897+
!strongSelector &&
898+
hitsLower.length > 0 &&
899+
hitsLower.every(hit => hit.includes('captcha') || hit.includes('recaptcha'));
900+
if (onlyGeneric) {
901+
return false;
902+
}
869903
const confidence = captcha.confidence ?? 0;
870904
const minConfidence = options.minConfidence ?? DEFAULT_CAPTCHA_OPTIONS.minConfidence;
871905
return confidence >= minConfidence;

src/backends/snapshot.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ export interface SnapshotOptions {
104104
gridId?: number | null;
105105
/** Use server-side API (Pro/Enterprise tier) */
106106
useApi?: boolean;
107+
/** Gateway snapshot timeout (milliseconds) */
108+
gatewayTimeoutMs?: number;
107109
/** API key for server-side processing */
108110
sentienceApiKey?: string;
109111
/** Goal/task description for ordinal support and gateway reranking */

src/snapshot.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ export interface SnapshotOptions {
5858
min_z_index?: number;
5959
};
6060
use_api?: boolean; // Force use of server-side API if True, local extension if False
61+
gatewayTimeoutMs?: number; // Gateway snapshot timeout in milliseconds
6162
save_trace?: boolean; // Save raw_elements to JSON for benchmarking/training
6263
trace_path?: string; // Path to save trace file (default: "trace_{timestamp}.json")
6364
goal?: string; // Optional goal/task description for the snapshot
@@ -317,11 +318,20 @@ async function snapshotViaApi(
317318
};
318319

319320
try {
320-
const response = await fetch(gatewayUrl, {
321-
method: 'POST',
322-
headers,
323-
body: payloadJson,
324-
});
321+
const timeoutMs = options.gatewayTimeoutMs ?? 30000;
322+
const controller = new AbortController();
323+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
324+
let response: Response;
325+
try {
326+
response = await fetch(gatewayUrl, {
327+
method: 'POST',
328+
headers,
329+
body: payloadJson,
330+
signal: controller.signal,
331+
});
332+
} finally {
333+
clearTimeout(timeoutId);
334+
}
325335

326336
if (!response.ok) {
327337
let errorText: string | undefined = undefined;
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { AgentRuntime } from '../src/agent-runtime';
2+
import { TraceSink } from '../src/tracing/sink';
3+
import { Tracer } from '../src/tracing/tracer';
4+
import { CaptchaDiagnostics, Snapshot } from '../src/types';
5+
import { MockPage } from './mocks/browser-mock';
6+
7+
class MockSink extends TraceSink {
8+
public events: any[] = [];
9+
emit(event: Record<string, any>): void {
10+
this.events.push(event);
11+
}
12+
async close(): Promise<void> {
13+
// no-op
14+
}
15+
getSinkType(): string {
16+
return 'MockSink';
17+
}
18+
}
19+
20+
function makeRuntime(): AgentRuntime {
21+
const sink = new MockSink();
22+
const tracer = new Tracer('test-run', sink);
23+
const page = new MockPage('https://example.com') as any;
24+
const browserLike = {
25+
snapshot: async () => ({
26+
status: 'success',
27+
url: 'https://example.com',
28+
elements: [],
29+
timestamp: 't1',
30+
}),
31+
};
32+
const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
33+
runtime.setCaptchaOptions({ minConfidence: 0.1, policy: 'abort' });
34+
return runtime;
35+
}
36+
37+
function makeSnapshot(captcha: CaptchaDiagnostics): Snapshot {
38+
return {
39+
status: 'success',
40+
url: 'https://example.com',
41+
elements: [],
42+
diagnostics: { captcha },
43+
timestamp: 't1',
44+
};
45+
}
46+
47+
describe('AgentRuntime captcha detection', () => {
48+
it('ignores passive recaptcha badges', () => {
49+
const runtime = makeRuntime();
50+
const captcha: CaptchaDiagnostics = {
51+
detected: true,
52+
confidence: 0.95,
53+
provider_hint: 'recaptcha',
54+
evidence: {
55+
iframe_src_hits: ['https://www.google.com/recaptcha/api2/anchor?ar=1'],
56+
selector_hits: [],
57+
text_hits: [],
58+
url_hits: [],
59+
},
60+
};
61+
62+
const detected = (runtime as any).isCaptchaDetected(makeSnapshot(captcha));
63+
expect(detected).toBe(false);
64+
});
65+
66+
it('detects interactive captcha challenges', () => {
67+
const runtime = makeRuntime();
68+
const captcha: CaptchaDiagnostics = {
69+
detected: true,
70+
confidence: 0.95,
71+
provider_hint: 'recaptcha',
72+
evidence: {
73+
iframe_src_hits: [],
74+
selector_hits: [],
75+
text_hits: ["I'm not a robot"],
76+
url_hits: [],
77+
},
78+
};
79+
80+
const detected = (runtime as any).isCaptchaDetected(makeSnapshot(captcha));
81+
expect(detected).toBe(true);
82+
});
83+
});
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { snapshot } from '../src';
2+
import { BrowserEvaluator } from '../src/utils/browser-evaluator';
3+
4+
jest.mock('../src/utils/browser-evaluator', () => ({
5+
BrowserEvaluator: {
6+
waitForCondition: jest.fn().mockResolvedValue(true),
7+
evaluate: jest.fn(),
8+
},
9+
}));
10+
11+
const mockedBrowserEvaluator = BrowserEvaluator as jest.Mocked<typeof BrowserEvaluator>;
12+
13+
function makeBrowser() {
14+
return {
15+
getApiKey: () => 'sk_test',
16+
getApiUrl: () => 'https://api.sentienceapi.com',
17+
getPage: () => ({}),
18+
} as any;
19+
}
20+
21+
describe('Snapshot gateway timeout', () => {
22+
const rawResult = {
23+
raw_elements: [],
24+
url: 'https://example.com',
25+
viewport: { width: 800, height: 600 },
26+
diagnostics: {},
27+
};
28+
29+
beforeEach(() => {
30+
mockedBrowserEvaluator.evaluate.mockResolvedValue(rawResult as any);
31+
});
32+
33+
afterEach(() => {
34+
jest.restoreAllMocks();
35+
});
36+
37+
it('uses default gateway timeout when not provided', async () => {
38+
const fetchMock = jest.fn().mockResolvedValue({
39+
ok: true,
40+
json: async () => ({ status: 'success', elements: [], url: 'https://example.com' }),
41+
headers: new Headers(),
42+
});
43+
(global as any).fetch = fetchMock;
44+
45+
const setTimeoutSpy = jest.spyOn(global, 'setTimeout').mockImplementation(((
46+
fn: (...args: any[]) => void,
47+
_ms?: number
48+
) => {
49+
return 123 as any;
50+
}) as any);
51+
const clearTimeoutSpy = jest.spyOn(global, 'clearTimeout').mockImplementation(() => {});
52+
53+
await snapshot(makeBrowser(), { screenshot: false, limit: 10 });
54+
55+
expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 30000);
56+
expect(clearTimeoutSpy).toHaveBeenCalledWith(123);
57+
expect(fetchMock).toHaveBeenCalledWith(
58+
'https://api.sentienceapi.com/v1/snapshot',
59+
expect.objectContaining({
60+
signal: expect.any(AbortSignal),
61+
})
62+
);
63+
});
64+
65+
it('uses custom gateway timeout when provided', async () => {
66+
const fetchMock = jest.fn().mockResolvedValue({
67+
ok: true,
68+
json: async () => ({ status: 'success', elements: [], url: 'https://example.com' }),
69+
headers: new Headers(),
70+
});
71+
(global as any).fetch = fetchMock;
72+
73+
const setTimeoutSpy = jest.spyOn(global, 'setTimeout').mockImplementation(((
74+
fn: (...args: any[]) => void,
75+
_ms?: number
76+
) => {
77+
return 456 as any;
78+
}) as any);
79+
const clearTimeoutSpy = jest.spyOn(global, 'clearTimeout').mockImplementation(() => {});
80+
81+
await snapshot(makeBrowser(), { screenshot: false, limit: 10, gatewayTimeoutMs: 12345 });
82+
83+
expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345);
84+
expect(clearTimeoutSpy).toHaveBeenCalledWith(456);
85+
});
86+
});

0 commit comments

Comments
 (0)