From 4819642882d13ac66cbf80dfdd084b7c1ca91be3 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 28 Dec 2025 09:50:54 -0800 Subject: [PATCH 1/2] visual overlay on elements --- src/index.ts | 1 + src/overlay.ts | 103 ++++++++++++++++++++++++++++++++++++++++++++++++ src/snapshot.ts | 19 +++++++++ 3 files changed, 123 insertions(+) create mode 100644 src/overlay.ts diff --git a/src/index.ts b/src/index.ts index 90f6910f..f99b5d8f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,6 +13,7 @@ export { Recorder, Trace, TraceStep, record } from './recorder'; export { ScriptGenerator, generate } from './generator'; export { read, ReadOptions, ReadResult } from './read'; export { screenshot, ScreenshotOptions } from './screenshot'; +export { showOverlay, clearOverlay } from './overlay'; export * from './types'; export { saveStorageState } from './utils'; diff --git a/src/overlay.ts b/src/overlay.ts new file mode 100644 index 00000000..d78eb187 --- /dev/null +++ b/src/overlay.ts @@ -0,0 +1,103 @@ +/** + * Visual overlay utilities - show/clear element highlights in browser + */ + +import { SentienceBrowser } from './browser'; +import { Element, Snapshot } from './types'; + +/** + * Display visual overlay highlighting elements in the browser + * + * This function shows a Shadow DOM overlay with color-coded borders around + * detected elements. Useful for debugging, learning, and validating element detection. + * + * @param browser - SentienceBrowser instance + * @param elements - Can be: + * - List of Element objects (from snapshot.elements) + * - List of raw element objects (from snapshot result or API response) + * - Snapshot object (will use snapshot.elements) + * @param targetElementId - Optional ID of element to highlight in red (default: null) + * + * Color Coding: + * - Red: Target element (when targetElementId is specified) + * - Blue: Primary elements (is_primary=true) + * - Green: Regular interactive elements + * + * Visual Indicators: + * - Border thickness and opacity scale with importance score + * - Semi-transparent fill for better visibility + * - Importance badges showing scores + * - Star icon for primary elements + * - Target emoji for the target element + * + * Auto-clear: Overlay automatically disappears after 5 seconds + * + * @example + * // Show overlay from snapshot + * const snap = await snapshot(browser); + * await showOverlay(browser, snap); + * + * @example + * // Show overlay with custom elements + * const elements = [{id: 1, bbox: {x: 100, y: 100, width: 200, height: 50}, ...}]; + * await showOverlay(browser, elements); + * + * @example + * // Show overlay with target element highlighted in red + * await showOverlay(browser, snap, 42); + * + * @example + * // Clear overlay manually before 5 seconds + * await clearOverlay(browser); + */ +export async function showOverlay( + browser: SentienceBrowser, + elements: Element[] | any[] | Snapshot, + targetElementId: number | null = null +): Promise { + const page = browser.getPage(); + + // Handle different input types + let elementsList: any[]; + if ('elements' in elements && Array.isArray(elements.elements)) { + // It's a Snapshot object + elementsList = elements.elements; + } else if (Array.isArray(elements)) { + // It's already an array + elementsList = elements; + } else { + throw new Error('elements must be a Snapshot object or array of elements'); + } + + // Call extension API + await page.evaluate( + (args: { elements: any[]; targetId: number | null }) => { + if ((window as any).sentience && (window as any).sentience.showOverlay) { + (window as any).sentience.showOverlay(args.elements, args.targetId); + } else { + console.warn('[Sentience SDK] showOverlay not available - is extension loaded?'); + } + }, + { elements: elementsList, targetId: targetElementId } + ); +} + +/** + * Clear the visual overlay manually (before 5-second auto-clear) + * + * @param browser - SentienceBrowser instance + * + * @example + * await showOverlay(browser, snap); + * // ... inspect overlay ... + * await clearOverlay(browser); // Remove immediately + */ +export async function clearOverlay(browser: SentienceBrowser): Promise { + const page = browser.getPage(); + + await page.evaluate(() => { + if ((window as any).sentience && (window as any).sentience.clearOverlay) { + (window as any).sentience.clearOverlay(); + } + }); +} diff --git a/src/snapshot.ts b/src/snapshot.ts index 72bfe89f..2b3b12b0 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -19,6 +19,7 @@ export interface SnapshotOptions { save_trace?: boolean; // Save raw_elements to JSON for benchmarking/training trace_path?: string; // Path to save trace file (default: "trace_{timestamp}.json") goal?: string; // Optional goal/task description for the snapshot + show_overlay?: boolean; // Show visual overlay highlighting elements in browser } /** @@ -115,6 +116,15 @@ async function snapshotViaExtension( _saveTraceToFile(result.raw_elements, options.trace_path); } + // Show visual overlay if requested + if (options.show_overlay && result.raw_elements) { + await page.evaluate((elements: any[]) => { + if ((window as any).sentience && (window as any).sentience.showOverlay) { + (window as any).sentience.showOverlay(elements, null); + } + }, result.raw_elements); + } + // Basic validation if (result.status !== 'success' && result.status !== 'error') { throw new Error(`Invalid snapshot status: ${result.status}`); @@ -204,6 +214,15 @@ async function snapshotViaApi( error: apiResult.error, }; + // Show visual overlay if requested (use API-ranked elements) + if (options.show_overlay && apiResult.elements) { + await page.evaluate((elements: any[]) => { + if ((window as any).sentience && (window as any).sentience.showOverlay) { + (window as any).sentience.showOverlay(elements, null); + } + }, apiResult.elements); + } + return snapshotData; } catch (e: any) { throw new Error(`API request failed: ${e.message}`); From 41450f90c2984b4d2702fe22116c2c07d7ca5c3b Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 28 Dec 2025 10:12:12 -0800 Subject: [PATCH 2/2] bump version; reorg --- README.md | 510 ++++++++++++++++++++++++--------------------------- package.json | 2 +- 2 files changed, 237 insertions(+), 275 deletions(-) diff --git a/README.md b/README.md index fd03b147..d5fca783 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The SDK is open under ELv2; the core semantic geometry and reliability logic runs in Sentience-hosted services. -## Installation +## πŸ“¦ Installation ```bash # Install from npm @@ -18,11 +18,12 @@ npm install npm run build ``` -## Quick Start: Choose Your Abstraction Level +## πŸš€ Quick Start: Choose Your Abstraction Level Sentience SDK offers **4 levels of abstraction** - choose based on your needs: -### πŸ’¬ Level 4: Conversational Agent (Highest Abstraction) - **NEW in v0.3.0** +
+πŸ’¬ Level 4: Conversational Agent (Highest Abstraction) - NEW in v0.3.0 Complete automation with natural conversation. Just describe what you want, and the agent plans and executes everything: @@ -53,7 +54,10 @@ await browser.close(); **Code reduction:** 99% less code - describe goals in natural language **Requirements:** OpenAI or Anthropic API key -### πŸ€– Level 3: Agent (Natural Language Commands) - **Recommended for Most Users** +
+ +
+πŸ€– Level 3: Agent (Natural Language Commands) - Recommended for Most Users Zero coding knowledge needed. Just write what you want in plain English: @@ -81,7 +85,10 @@ await browser.close(); **Code reduction:** 95-98% less code vs manual approach **Requirements:** OpenAI API key (or Anthropic for Claude) -### πŸ”§ Level 2: Direct SDK (Technical Control) +
+ +
+πŸ”§ Level 2: Direct SDK (Technical Control) Full control with semantic selectors. For technical users who want precision: @@ -109,7 +116,10 @@ await browser.close(); **Code reduction:** Still 80% less code vs raw Playwright **Requirements:** Only Sentience API key -### βš™οΈ Level 1: Raw Playwright (Maximum Control) +
+ +
+βš™οΈ Level 1: Raw Playwright (Maximum Control) For when you need complete low-level control (rare): @@ -127,9 +137,12 @@ await browser.close(); **When to use:** Very specific edge cases, custom browser configs **Tradeoffs:** No semantic intelligence, brittle selectors, more code +
+ --- -## Agent Execution Tracing (NEW in v0.3.1) +
+

πŸ“Š Agent Execution Tracing (NEW in v0.3.1)

Record complete agent execution traces for debugging, analysis, and replay. Traces capture every step, snapshot, LLM decision, and action in a structured JSONL format. @@ -187,187 +200,18 @@ Each agent action generates multiple events: 4. **action** - Executed action (type, element ID, success) 5. **error** - Any failures (error message, retry attempt) -**Example trace output:** -```jsonl -{"v":1,"type":"run_start","ts":"2025-12-26T10:00:00.000Z","run_id":"abc-123","seq":1,"data":{"agent":"SentienceAgent","llm_model":"gpt-4o"}} -{"v":1,"type":"step_start","ts":"2025-12-26T10:00:01.000Z","run_id":"abc-123","seq":2,"step_id":"step-1","data":{"step_index":1,"goal":"Click the search box","attempt":0,"url":"https://google.com"}} -{"v":1,"type":"snapshot","ts":"2025-12-26T10:00:01.500Z","run_id":"abc-123","seq":3,"step_id":"step-1","data":{"url":"https://google.com","elements":[...]}} -{"v":1,"type":"llm_response","ts":"2025-12-26T10:00:02.000Z","run_id":"abc-123","seq":4,"step_id":"step-1","data":{"model":"gpt-4o","prompt_tokens":250,"completion_tokens":10,"response_text":"CLICK(42)"}} -{"v":1,"type":"action","ts":"2025-12-26T10:00:02.500Z","run_id":"abc-123","seq":5,"step_id":"step-1","data":{"action_type":"click","element_id":42,"success":true}} -{"v":1,"type":"run_end","ts":"2025-12-26T10:00:03.000Z","run_id":"abc-123","seq":6,"data":{"steps":1}} -``` - -### Reading and Analyzing Traces - -```typescript -import * as fs from 'fs'; - -// Read trace file -const content = fs.readFileSync(`traces/${runId}.jsonl`, 'utf-8'); -const events = content.trim().split('\n').map(JSON.parse); - -console.log(`Total events: ${events.length}`); - -// Analyze events -events.forEach(event => { - console.log(`[${event.seq}] ${event.type} - ${event.ts}`); -}); - -// Filter by type -const actions = events.filter(e => e.type === 'action'); -console.log(`Actions taken: ${actions.length}`); - -// Get token usage -const llmEvents = events.filter(e => e.type === 'llm_response'); -const totalTokens = llmEvents.reduce((sum, e) => sum + (e.data.prompt_tokens || 0) + (e.data.completion_tokens || 0), 0); -console.log(`Total tokens: ${totalTokens}`); -``` - -### Tracing Without Agent (Manual) - -You can also use the tracer directly for custom workflows: - -```typescript -import { Tracer, JsonlTraceSink } from 'sentienceapi'; -import { randomUUID } from 'crypto'; - -const runId = randomUUID(); -const sink = new JsonlTraceSink(`traces/${runId}.jsonl`); -const tracer = new Tracer(runId, sink); - -// Emit custom events -tracer.emit('custom_event', { - message: 'Something happened', - details: { foo: 'bar' } -}); - -// Use convenience methods -tracer.emitRunStart('MyAgent', 'gpt-4o'); -tracer.emitStepStart('step-1', 1, 'Do something'); -tracer.emitError('step-1', 'Something went wrong'); -tracer.emitRunEnd(1); - -// Flush to disk -await tracer.close(); -``` - ### Schema Compatibility Traces are **100% compatible** with Python SDK traces - use the same tools to analyze traces from both TypeScript and Python agents! **See full example:** [examples/agent-with-tracing.ts](examples/agent-with-tracing.ts) ---- - -## Agent Layer Examples - -### Google Search (6 lines of code) - -```typescript -import { SentienceBrowser, SentienceAgent, OpenAIProvider } from 'sentienceapi'; - -const browser = await SentienceBrowser.create({ apiKey: apiKey }); -const llm = new OpenAIProvider(openaiKey, 'gpt-4o-mini'); -const agent = new SentienceAgent(browser, llm); - -await browser.getPage().goto('https://www.google.com'); -await agent.act('Click the search box'); -await agent.act("Type 'mechanical keyboards' into the search field"); -await agent.act('Press Enter key'); -await agent.act('Click the first non-ad search result'); - -await browser.close(); -``` - -**See full example:** [examples/agent-google-search.ts](examples/agent-google-search.ts) - -### Using Anthropic Claude Instead of GPT - -```typescript -import { SentienceAgent, AnthropicProvider } from 'sentienceapi'; - -// Swap OpenAI for Anthropic - same API! -const llm = new AnthropicProvider( - process.env.ANTHROPIC_API_KEY!, - 'claude-3-5-sonnet-20241022' -); - -const agent = new SentienceAgent(browser, llm); -await agent.act('Click the search button'); // Works exactly the same -``` - -**BYOB (Bring Your Own Brain):** OpenAI, Anthropic, or implement `LLMProvider` for any model. - -**See full example:** [examples/agent-with-anthropic.ts](examples/agent-with-anthropic.ts) - -### Amazon Shopping (98% code reduction) - -**Before (manual approach):** 350 lines -**After (agent layer):** 6 lines - -```typescript -await agent.act('Click the search box'); -await agent.act("Type 'wireless mouse' into the search field"); -await agent.act('Press Enter key'); -await agent.act('Click the first visible product in the search results'); -await agent.act("Click the 'Add to Cart' button"); -``` - -**See full example:** [examples/agent-amazon-shopping.ts](examples/agent-amazon-shopping.ts) +
--- -## Installation for Agent Layer - -```bash -# Install core SDK -npm install sentienceapi - -# Install LLM provider (choose one or both) -npm install openai # For GPT-4, GPT-4o, GPT-4o-mini -npm install @anthropic-ai/sdk # For Claude 3.5 Sonnet - -# Set API keys -export SENTIENCE_API_KEY="your-sentience-key" -export OPENAI_API_KEY="your-openai-key" # OR -export ANTHROPIC_API_KEY="your-anthropic-key" -``` - ---- - -## Direct SDK Quick Start - -```typescript -import { SentienceBrowser, snapshot, find, click } from './src'; - -async function main() { - const browser = new SentienceBrowser(); - - try { - await browser.start(); - - await browser.goto('https://example.com'); - await browser.getPage().waitForLoadState('networkidle'); - - // Take snapshot - captures all interactive elements - const snap = await snapshot(browser); - console.log(`Found ${snap.elements.length} elements`); - - // Find and click a link using semantic selectors - const link = find(snap, 'role=link text~"More information"'); - if (link) { - const result = await click(browser, link.id); - console.log(`Click success: ${result.success}`); - } - } finally { - await browser.close(); - } -} - -main(); -``` - -## Real-World Example: Amazon Shopping Bot +
+

πŸ’Ό Real-World Example: Amazon Shopping Bot

This example demonstrates navigating Amazon, finding products, and adding items to cart: @@ -428,50 +272,38 @@ async function main() { main(); ``` -**See the complete tutorial**: [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md) - -## Running Examples - -**⚠️ Important**: You cannot use `node` directly to run TypeScript files. Use one of these methods: +**πŸ“– See the complete tutorial:** [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md) -### Option 1: Using npm scripts (recommended) -```bash -npm run example:hello -npm run example:basic -npm run example:query -npm run example:wait -``` +
-### Option 2: Using ts-node directly -```bash -npx ts-node examples/hello.ts -# or if ts-node is installed globally: -ts-node examples/hello.ts -``` +--- -### Option 3: Compile then run -```bash -npm run build -# Then use compiled JavaScript from dist/ -``` +## πŸ“š Core Features -## Core Features +
+

🌐 Browser Control

-### Browser Control - **`SentienceBrowser`** - Playwright browser with Sentience extension pre-loaded - **`browser.goto(url)`** - Navigate with automatic extension readiness checks - Automatic bot evasion and stealth mode - Configurable headless/headed mode -### Snapshot - Intelligent Page Analysis -- **`snapshot(browser, options?)`** - Capture page state with AI-ranked elements +
+ +
+

πŸ“Έ Snapshot - Intelligent Page Analysis

+ +**`snapshot(browser, options?)`** - Capture page state with AI-ranked elements + +Features: - Returns semantic elements with roles, text, importance scores, and bounding boxes - Optional screenshot capture (PNG/JPEG) +- Optional visual overlay to see what elements are detected - TypeScript types for type safety **Example:** ```typescript -const snap = await snapshot(browser, { screenshot: true }); +const snap = await snapshot(browser, { screenshot: true, show_overlay: true }); // Access structured data console.log(`URL: ${snap.url}`); @@ -484,7 +316,11 @@ for (const element of snap.elements) { } ``` -### Query Engine - Semantic Element Selection +
+ +
+

πŸ” Query Engine - Semantic Element Selection

+ - **`query(snapshot, selector)`** - Find all matching elements - **`find(snapshot, selector)`** - Find single best match (by importance) - Powerful query DSL with multiple operators @@ -514,7 +350,11 @@ const firstRow = query(snap, 'bbox.y<600'); **πŸ“– [Complete Query DSL Guide](docs/QUERY_DSL.md)** - All operators, fields, and advanced patterns -### Actions - Interact with Elements +
+ +
+

πŸ‘† Actions - Interact with Elements

+ - **`click(browser, elementId)`** - Click element by ID - **`clickRect(browser, rect)`** - Click at center of rectangle (coordinate-based) - **`typeText(browser, elementId, text)`** - Type into input fields @@ -554,7 +394,11 @@ if (element) { } ``` -### Wait & Assertions +
+ +
+

⏱️ Wait & Assertions

+ - **`waitFor(browser, selector, timeout?, interval?, useApi?)`** - Wait for element to appear - **`expect(browser, selector)`** - Assertion helper with fluent API @@ -587,11 +431,55 @@ await expect(browser, 'role=button').toHaveText('Submit'); await expect(browser, 'role=link').toHaveCount(10); ``` -### Content Reading -- **`read(browser, options?)`** - Extract page content - - `format: "text"` - Plain text extraction - - `format: "markdown"` - High-quality markdown conversion (uses Turndown) - - `format: "raw"` - Cleaned HTML (default) +
+ +
+

🎨 Visual Overlay - Debug Element Detection

+ +- **`showOverlay(browser, elements, targetElementId?)`** - Display visual overlay highlighting elements +- **`clearOverlay(browser)`** - Clear overlay manually + +Show color-coded borders around detected elements to debug, validate, and understand what Sentience sees: + +```typescript +import { showOverlay, clearOverlay } from 'sentienceapi'; + +// Take snapshot once +const snap = await snapshot(browser); + +// Show overlay anytime without re-snapshotting +await showOverlay(browser, snap); // Auto-clears after 5 seconds + +// Highlight specific target element in red +const button = find(snap, 'role=button text~"Submit"'); +await showOverlay(browser, snap, button.id); + +// Clear manually before 5 seconds +await new Promise(resolve => setTimeout(resolve, 2000)); +await clearOverlay(browser); +``` + +**Color Coding:** +- πŸ”΄ Red: Target element +- πŸ”΅ Blue: Primary elements (`is_primary=true`) +- 🟒 Green: Regular interactive elements + +**Visual Indicators:** +- Border thickness/opacity scales with importance +- Semi-transparent fill +- Importance badges +- Star icons for primary elements +- Auto-clear after 5 seconds + +
+ +
+

πŸ“„ Content Reading

+ +**`read(browser, options?)`** - Extract page content +- `format: "text"` - Plain text extraction +- `format: "markdown"` - High-quality markdown conversion (uses Turndown) +- `format: "raw"` - Cleaned HTML (default) **Example:** ```typescript @@ -606,11 +494,15 @@ const result = await read(browser, { format: 'text' }); console.log(result.content); // Plain text ``` -### Screenshots -- **`screenshot(browser, options?)`** - Standalone screenshot capture - - Returns base64-encoded data URL - - PNG or JPEG format - - Quality control for JPEG (1-100) +
+ +
+

πŸ“· Screenshots

+ +**`screenshot(browser, options?)`** - Standalone screenshot capture +- Returns base64-encoded data URL +- PNG or JPEG format +- Quality control for JPEG (1-100) **Example:** ```typescript @@ -629,7 +521,14 @@ writeFileSync('screenshot.png', imageData); const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 }); ``` -## Element Properties +
+ +--- + +## πŸ“‹ Reference + +
+

Element Properties

Elements returned by `snapshot()` have the following properties: @@ -645,7 +544,10 @@ element.is_occluded // Is element covered by other elements? element.z_index // CSS stacking order ``` -## Query DSL Reference +
+ +
+

Query DSL Reference

### Basic Operators @@ -668,38 +570,14 @@ element.z_index // CSS stacking order - **Position**: `bbox.x`, `bbox.y`, `bbox.width`, `bbox.height` - **Layering**: `z_index` -## Examples - -See the `examples/` directory for complete working examples: - -### Agent Layer (Level 3 - Natural Language) -- **`agent-google-search.ts`** - Google search automation with natural language commands -- **`agent-amazon-shopping.ts`** - Amazon shopping bot (6 lines vs 350 lines manual code) -- **`agent-with-anthropic.ts`** - Using Anthropic Claude instead of OpenAI GPT - -### Direct SDK (Level 2 - Technical Control) -- **`hello.ts`** - Extension bridge verification -- **`basic-agent.ts`** - Basic snapshot and element inspection -- **`query-demo.ts`** - Query engine demonstrations -- **`wait-and-click.ts`** - Waiting for elements and performing actions -- **`read-markdown.ts`** - Content extraction and markdown conversion - -## Testing - -```bash -# Run all tests -npm test - -# Run with coverage -npm run test:coverage +
-# Run specific test file -npm test -- snapshot.test.ts -``` +--- -## Configuration +## βš™οΈ Configuration -### Viewport Size +
+

Viewport Size

Default viewport is **1280x800** pixels. You can customize it using Playwright's API: @@ -713,7 +591,10 @@ await browser.getPage().setViewportSize({ width: 1920, height: 1080 }); await browser.goto('https://example.com'); ``` -### Headless Mode +
+ +
+

Headless Mode

```typescript // Headed mode (shows browser window) @@ -726,7 +607,10 @@ const browser = new SentienceBrowser(undefined, undefined, true); const browser = new SentienceBrowser(); // headless=true if CI=true, else false ``` -### Residential Proxy Support +
+ +
+

🌍 Residential Proxy Support

For users running from datacenters (AWS, DigitalOcean, etc.), you can configure a residential proxy to prevent IP-based detection by Cloudflare, Akamai, and other anti-bot services. @@ -771,20 +655,12 @@ await agent.act('Navigate to example.com'); The SDK automatically adds WebRTC leak protection flags when a proxy is configured, preventing your real datacenter IP from being exposed via WebRTC even when using proxies. **HTTPS Certificate Handling:** -The SDK automatically ignores HTTPS certificate errors when a proxy is configured, as residential proxies often use self-signed certificates for SSL interception. This ensures seamless navigation to HTTPS sites through the proxy. - -**Example:** -```bash -# Run with proxy via environment variable -SENTIENCE_PROXY=http://user:pass@proxy.com:8000 npm run example:proxy - -# Or via command line argument -ts-node examples/proxy-example.ts --proxy=http://user:pass@proxy.com:8000 -``` +The SDK automatically ignores HTTPS certificate errors when a proxy is configured, as residential proxies often use self-signed certificates for SSL interception. -**Note:** The proxy is configured at the browser level, so all traffic (including the Chrome extension) routes through the proxy. No changes to the extension are required. +
-### Authentication Session Injection +
+

πŸ” Authentication Session Injection

Inject pre-recorded authentication sessions (cookies + localStorage) to start your agent already logged in, bypassing login screens, 2FA, and CAPTCHAs. This saves tokens and reduces costs by eliminating login steps. @@ -833,7 +709,14 @@ await browser3.start(); See `examples/auth-injection-agent.ts` for complete examples. -## Best Practices +
+ +--- + +## πŸ’‘ Best Practices + +
+Click to expand best practices ### 1. Wait for Dynamic Content ```typescript @@ -889,7 +772,14 @@ try { } ``` -## Troubleshooting +
+ +--- + +## πŸ› οΈ Troubleshooting + +
+Click to expand common issues and solutions ### "Extension failed to load" **Solution:** Build the extension first: @@ -915,18 +805,91 @@ npm run example:hello ### Button not clickable **Solutions:** - Check visibility: `element.in_viewport && !element.is_occluded` -- Scroll to element: `await browser.getPage().evaluate(\`window.sentience_registry[${element.id}].scrollIntoView()\`)` +- Scroll to element: ``await browser.getPage().evaluate(`window.sentience_registry[${element.id}].scrollIntoView()`)`` + +
+ +--- + +## πŸ’» Examples & Testing + +
+

Agent Layer Examples (Level 3 - Natural Language)

+ +- **`agent-google-search.ts`** - Google search automation with natural language commands +- **`agent-amazon-shopping.ts`** - Amazon shopping bot (6 lines vs 350 lines manual code) +- **`agent-with-anthropic.ts`** - Using Anthropic Claude instead of OpenAI GPT +- **`agent-with-tracing.ts`** - Agent execution tracing for debugging and analysis -## Documentation +
+ +
+

Direct SDK Examples (Level 2 - Technical Control)

+ +- **`hello.ts`** - Extension bridge verification +- **`basic-agent.ts`** - Basic snapshot and element inspection +- **`query-demo.ts`** - Query engine demonstrations +- **`wait-and-click.ts`** - Waiting for elements and performing actions +- **`read-markdown.ts`** - Content extraction and markdown conversion + +
+ +
+

Running Examples

+ +**⚠️ Important**: You cannot use `node` directly to run TypeScript files. Use one of these methods: + +### Option 1: Using npm scripts (recommended) +```bash +npm run example:hello +npm run example:basic +npm run example:query +npm run example:wait +``` + +### Option 2: Using ts-node directly +```bash +npx ts-node examples/hello.ts +# or if ts-node is installed globally: +ts-node examples/hello.ts +``` + +### Option 3: Compile then run +```bash +npm run build +# Then use compiled JavaScript from dist/ +``` + +
+ +
+

Testing

+ +```bash +# Run all tests +npm test + +# Run with coverage +npm run test:coverage + +# Run specific test file +npm test -- snapshot.test.ts +``` + +
+ +--- + +## πŸ“– Documentation - **πŸ“– [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)** - Complete tutorial with real-world example - **πŸ“– [Query DSL Guide](docs/QUERY_DSL.md)** - Advanced query patterns and operators - **πŸ“„ [API Contract](../spec/SNAPSHOT_V1.md)** - Snapshot API specification - **πŸ“„ [Type Definitions](../spec/sdk-types.md)** - TypeScript/Python type definitions -## License +--- -πŸ“œ **License** +## πŸ“œ License This SDK is licensed under the **Elastic License 2.0 (ELv2)**. @@ -936,11 +899,10 @@ The Elastic License 2.0 allows you to use, modify, and distribute this SDK for i - This SDK is a **client-side library** that communicates with proprietary Sentience services and browser components. -- The Sentience backend services (including semantic geometry grounding, ranking, visual cues, and trace processing) are **not open source** and are governed by Sentience’s Terms of Service. +- The Sentience backend services (including semantic geometry grounding, ranking, visual cues, and trace processing) are **not open source** and are governed by Sentience's Terms of Service. -- Use of this SDK does **not** grant rights to operate, replicate, or reimplement Sentience’s hosted services. +- Use of this SDK does **not** grant rights to operate, replicate, or reimplement Sentience's hosted services. For commercial usage, hosted offerings, or enterprise deployments, please contact Sentience to obtain a commercial license. See the full license text in [`LICENSE`](./LICENSE.md). - diff --git a/package.json b/package.json index 215978b0..c06dd9cf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sentienceapi", - "version": "0.90.1", + "version": "0.90.2", "description": "TypeScript SDK for Sentience AI Agent Browser Automation", "main": "dist/index.js", "types": "dist/index.d.ts",