diff --git a/README.md b/README.md index 3d111f83..667aaca7 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,11 @@ **📜 License**: Apache License 2.0 -TypeScript SDK for Sentience AI Agent Browser Automation. +TypeScript/JavaScript SDK for Sentience AI Agent Browser Automation. Build intelligent web automation agents that can see, understand, and interact with web pages like humans do. ## Installation ```bash -cd sdk-ts npm install npm run build @@ -21,20 +20,20 @@ npx playwright install chromium import { SentienceBrowser, snapshot, find, click } from './src'; async function main() { - const browser = new SentienceBrowser(undefined, undefined, false); - + const browser = new SentienceBrowser(); + try { await browser.start(); - - await browser.getPage().goto('https://example.com'); + + await browser.goto('https://example.com'); await browser.getPage().waitForLoadState('networkidle'); - - // Take snapshot + + // Take snapshot - captures all interactive elements const snap = await snapshot(browser); console.log(`Found ${snap.elements.length} elements`); - - // Find and click a link - const link = find(snap, 'role=link'); + + // Find and click a link using semantic selectors + const link = find(snap, 'role=link text~"More information"'); if (link) { const result = await click(browser, link.id); console.log(`Click success: ${result.success}`); @@ -43,131 +42,221 @@ async function main() { await browser.close(); } } + +main(); ``` +## Real-World Example: Amazon Shopping Bot + +This example demonstrates navigating Amazon, finding products, and adding items to cart: + +```typescript +import { SentienceBrowser, snapshot, find, click } from './src'; + +async function main() { + const browser = new SentienceBrowser(undefined, undefined, false); + + try { + await browser.start(); + + // Navigate to Amazon Best Sellers + await browser.goto('https://www.amazon.com/gp/bestsellers/'); + await browser.getPage().waitForLoadState('networkidle'); + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Take snapshot and find products + const snap = await snapshot(browser); + console.log(`Found ${snap.elements.length} elements`); + + // Find first product in viewport using spatial filtering + const products = snap.elements + .filter(el => + el.role === 'link' && + el.visual_cues.is_clickable && + el.in_viewport && + !el.is_occluded && + el.bbox.y < 600 // First row + ); + + if (products.length > 0) { + // Sort by position (left to right, top to bottom) + products.sort((a, b) => a.bbox.y - b.bbox.y || a.bbox.x - b.bbox.x); + const firstProduct = products[0]; + + console.log(`Clicking: ${firstProduct.text}`); + const result = await click(browser, firstProduct.id); + + // Wait for product page + await browser.getPage().waitForLoadState('networkidle'); + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Find and click "Add to Cart" button + const productSnap = await snapshot(browser); + const addToCart = find(productSnap, 'role=button text~"add to cart"'); + + if (addToCart) { + const cartResult = await click(browser, addToCart.id); + console.log(`Added to cart: ${cartResult.success}`); + } + } + } finally { + await browser.close(); + } +} + +main(); +``` + +**See the complete tutorial**: [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md) + ## Running Examples **⚠️ Important**: You cannot use `node` directly to run TypeScript files. Use one of these methods: -**Option 1: Using npm scripts (recommended)** +### Option 1: Using npm scripts (recommended) ```bash npm run example:hello npm run example:basic +npm run example:query +npm run example:wait ``` -**Option 2: Using ts-node directly** +### Option 2: Using ts-node directly ```bash npx ts-node examples/hello.ts # or if ts-node is installed globally: ts-node examples/hello.ts ``` -**Option 3: Compile then run (not recommended for examples)** +### Option 3: Compile then run ```bash npm run build -# Examples would need to be compiled separately +# Then use compiled JavaScript from dist/ ``` -## Features +## Core Features -### Day 2: Browser Harness -- `SentienceBrowser` - Launch Playwright with extension loaded -- Automatic extension loading and verification +### Browser Control +- **`SentienceBrowser`** - Playwright browser with Sentience extension pre-loaded +- **`browser.goto(url)`** - Navigate with automatic extension readiness checks +- Automatic bot evasion and stealth mode +- Configurable headless/headed mode -### Day 3: Snapshot -- `snapshot(browser, options)` - Capture page state +### Snapshot - Intelligent Page Analysis +- **`snapshot(browser, options?)`** - Capture page state with AI-ranked elements +- Returns semantic elements with roles, text, importance scores, and bounding boxes +- Optional screenshot capture (PNG/JPEG) - TypeScript types for type safety -### Content Reading & Screenshots -- `read(browser, options)` - Read page content as text or markdown - - Enhanced markdown conversion using `turndown` (better than extension's lightweight conversion) - - Supports `enhance_markdown` option to use improved conversion -- `screenshot(browser, options)` - Capture standalone screenshot - - Returns base64-encoded data URL - - Supports PNG and JPEG formats with quality control - -### Day 4: Query Engine -- `query(snapshot, selector)` - Find elements matching selector -- `find(snapshot, selector)` - Find single best match -- String DSL: `"role=button text~'Sign in'"` -- **📖 [Complete DSL Query Guide](docs/QUERY_DSL.md)** - Comprehensive documentation with all operators, fields, and examples - -### Day 5: Actions -- `click(browser, elementId)` - Click element -- `typeText(browser, elementId, text)` - Type into element -- `press(browser, key)` - Press keyboard key - -### Day 6: Wait & Assert -- `waitFor(browser, selector, timeout)` - Wait for element -- `expect(browser, selector)` - Assertion helper - - `.toExist()` - - `.toBeVisible()` - - `.toHaveText(text)` - - `.toHaveCount(n)` +**Example:** +```typescript +const snap = await snapshot(browser, { screenshot: true }); -### Content Reading -- `read(browser, options)` - Read page content - - **Default format: `"raw"`** - Returns HTML suitable for Turndown - - `format: "raw"` - Get cleaned HTML - - `format: "markdown"` - Get high-quality markdown (uses Turndown internally) - - `format: "text"` - Get plain text - - **Examples:** - ```typescript - import { read } from './src'; - - // Get raw HTML (default) - const result = await read(browser); - const html = result.content; - - // Get high-quality markdown (uses Turndown automatically) - const result = await read(browser, { format: 'markdown' }); - const markdown = result.content; - ``` - - See `examples/read-markdown.ts` for complete examples. +// Access structured data +console.log(`URL: ${snap.url}`); +console.log(`Viewport: ${snap.viewport.width}x${snap.viewport.height}`); +console.log(`Elements: ${snap.elements.length}`); -## Examples +// Iterate over elements +for (const element of snap.elements) { + console.log(`${element.role}: ${element.text} (importance: ${element.importance})`); +} +``` + +### Query Engine - Semantic Element Selection +- **`query(snapshot, selector)`** - Find all matching elements +- **`find(snapshot, selector)`** - Find single best match (by importance) +- Powerful query DSL with multiple operators + +**Query Examples:** +```typescript +// Find by role and text +const button = find(snap, 'role=button text="Sign in"'); + +// Substring match (case-insensitive) +const link = find(snap, 'role=link text~"more info"'); + +// Spatial filtering +const topLeft = find(snap, 'bbox.x<=100 bbox.y<=200'); + +// Multiple conditions (AND logic) +const primaryBtn = find(snap, 'role=button clickable=true visible=true importance>800'); + +// Prefix/suffix matching +const startsWith = find(snap, 'text^="Add"'); +const endsWith = find(snap, 'text$="Cart"'); -See `examples/` directory: -- `hello.ts` - Extension bridge verification -- `basic-agent.ts` - Basic snapshot -- `query-demo.ts` - Query engine -- `wait-and-click.ts` - Wait and actions -- `read-markdown.ts` - Reading page content and converting to markdown +// Numeric comparisons +const important = query(snap, 'importance>=700'); +const firstRow = query(snap, 'bbox.y<600'); +``` + +**📖 [Complete Query DSL Guide](docs/QUERY_DSL.md)** - All operators, fields, and advanced patterns + +### Actions - Interact with Elements +- **`click(browser, elementId)`** - Click element by ID +- **`typeText(browser, elementId, text)`** - Type into input fields +- **`press(browser, key)`** - Press keyboard keys (Enter, Escape, Tab, etc.) -### Content Reading Example +All actions return `ActionResult` with success status, timing, and outcome: ```typescript -import { SentienceBrowser, read } from './src'; +const result = await click(browser, element.id); -const browser = new SentienceBrowser(); -await browser.start(); +console.log(`Success: ${result.success}`); +console.log(`Outcome: ${result.outcome}`); // "navigated", "dom_updated", "error" +console.log(`Duration: ${result.duration_ms}ms`); +console.log(`URL changed: ${result.url_changed}`); +``` -await browser.getPage().goto('https://example.com'); -await browser.getPage().waitForLoadState('networkidle'); +### Wait & Assertions +- **`waitFor(browser, selector, timeout?)`** - Wait for element to appear +- **`expect(browser, selector)`** - Assertion helper with fluent API -// Read as enhanced markdown (better quality) -const result = await read(browser, { - format: 'markdown', - enhance_markdown: true -}); -console.log(result.content); // High-quality markdown +**Examples:** +```typescript +// Wait for element +const result = await waitFor(browser, 'role=button text="Submit"', 10000); +if (result.found) { + console.log(`Found after ${result.duration_ms}ms`); +} -await browser.close(); +// Assertions +await expect(browser, 'role=button text="Submit"').toExist(5000); +await expect(browser, 'role=heading').toBeVisible(); +await expect(browser, 'role=button').toHaveText('Submit'); +await expect(browser, 'role=link').toHaveCount(10); ``` -### Screenshot Example +### Content Reading +- **`read(browser, options?)`** - Extract page content + - `format: "text"` - Plain text extraction + - `format: "markdown"` - High-quality markdown conversion (uses Turndown) + - `format: "raw"` - Cleaned HTML (default) +**Example:** ```typescript -import { SentienceBrowser, screenshot } from './src'; -import { writeFileSync } from 'fs'; +import { read } from './src'; -const browser = new SentienceBrowser(); -await browser.start(); +// Get markdown content +const result = await read(browser, { format: 'markdown' }); +console.log(result.content); // Markdown text -await browser.getPage().goto('https://example.com'); -await browser.getPage().waitForLoadState('networkidle'); +// Get plain text +const result = await read(browser, { format: 'text' }); +console.log(result.content); // Plain text +``` + +### Screenshots +- **`screenshot(browser, options?)`** - Standalone screenshot capture + - Returns base64-encoded data URL + - PNG or JPEG format + - Quality control for JPEG (1-100) + +**Example:** +```typescript +import { screenshot } from './src'; +import { writeFileSync } from 'fs'; // Capture PNG screenshot const dataUrl = await screenshot(browser, { format: 'png' }); @@ -177,17 +266,196 @@ const base64Data = dataUrl.split(',')[1]; const imageData = Buffer.from(base64Data, 'base64'); writeFileSync('screenshot.png', imageData); -await browser.close(); +// JPEG with quality control (smaller file size) +const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 }); +``` + +## Element Properties + +Elements returned by `snapshot()` have the following properties: + +```typescript +element.id // Unique identifier for interactions +element.role // ARIA role (button, link, textbox, heading, etc.) +element.text // Visible text content +element.importance // AI importance score (0-1000) +element.bbox // Bounding box (x, y, width, height) +element.visual_cues // Visual analysis (is_primary, is_clickable, background_color) +element.in_viewport // Is element visible in current viewport? +element.is_occluded // Is element covered by other elements? +element.z_index // CSS stacking order ``` +## Query DSL Reference + +### Basic Operators + +| Operator | Description | Example | +|----------|-------------|---------| +| `=` | Exact match | `role=button` | +| `!=` | Exclusion | `role!=link` | +| `~` | Substring (case-insensitive) | `text~"sign in"` | +| `^=` | Prefix match | `text^="Add"` | +| `$=` | Suffix match | `text$="Cart"` | +| `>`, `>=` | Greater than | `importance>500` | +| `<`, `<=` | Less than | `bbox.y<600` | + +### Supported Fields + +- **Role**: `role=button|link|textbox|heading|...` +- **Text**: `text`, `text~`, `text^=`, `text$=` +- **Visibility**: `clickable=true|false`, `visible=true|false` +- **Importance**: `importance`, `importance>=N`, `importance setTimeout(resolve, 1000)); // Extra buffer +``` + +### 2. Use Multiple Strategies for Finding Elements +```typescript +// Try exact match first +let btn = find(snap, 'role=button text="Add to Cart"'); + +// Fallback to fuzzy match +if (!btn) { + btn = find(snap, 'role=button text~"cart"'); +} +``` + +### 3. Check Element Visibility Before Clicking +```typescript +if (element.in_viewport && !element.is_occluded) { + await click(browser, element.id); +} +``` + +### 4. Handle Navigation +```typescript +const result = await click(browser, linkId); +if (result.url_changed) { + await browser.getPage().waitForLoadState('networkidle'); +} +``` + +### 5. Use Screenshots Sparingly +```typescript +// Fast - no screenshot (only element data) +const snap = await snapshot(browser); + +// Slower - with screenshot (for debugging/verification) +const snap = await snapshot(browser, { screenshot: true }); +``` + +### 6. Always Close Browser +```typescript +const browser = new SentienceBrowser(); + +try { + await browser.start(); + // ... your automation code +} finally { + await browser.close(); // Always clean up +} +``` + +## Troubleshooting + +### "Extension failed to load" +**Solution:** Build the extension first: +```bash +cd sentience-chrome +./build.sh +``` + +### "Cannot use import statement outside a module" +**Solution:** Don't use `node` directly. Use `ts-node` or npm scripts: +```bash +npx ts-node examples/hello.ts +# or +npm run example:hello +``` + +### "Element not found" +**Solutions:** +- Ensure page is loaded: `await browser.getPage().waitForLoadState('networkidle')` +- Use `waitFor()`: `await waitFor(browser, 'role=button', 10000)` +- Debug elements: `console.log(snap.elements.map(el => el.text))` + +### Button not clickable +**Solutions:** +- Check visibility: `element.in_viewport && !element.is_occluded` +- Scroll to element: `await browser.getPage().evaluate(\`window.sentience_registry[${element.id}].scrollIntoView()\`)` + ## Documentation -- **📖 [Query DSL Guide](docs/QUERY_DSL.md)** - Complete guide to the semantic query language -- API Contract: `../spec/SNAPSHOT_V1.md` -- Type Definitions: `../spec/sdk-types.md` +- **📖 [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)** - Complete tutorial with real-world example +- **📖 [Query DSL Guide](docs/QUERY_DSL.md)** - Advanced query patterns and operators +- **📄 [API Contract](../spec/SNAPSHOT_V1.md)** - Snapshot API specification +- **📄 [Type Definitions](../spec/sdk-types.md)** - TypeScript/Python type definitions + +## License + +**📜 License** + +This SDK is licensed under the Apache License 2.0. + +Note: The SDK communicates with proprietary Sentience services and browser components that are not open source. Access to those components is governed by Sentience's Terms of Service. diff --git a/examples/basic-agent.ts b/examples/basic-agent.ts index f3e9d68f..22b9f1bf 100644 --- a/examples/basic-agent.ts +++ b/examples/basic-agent.ts @@ -1,12 +1,15 @@ /** - * Day 3 Example: Basic snapshot functionality + * Example: Basic snapshot functionality */ import { SentienceBrowser, snapshot } from '../src/index'; import * as fs from 'fs'; async function main() { - const browser = new SentienceBrowser(undefined, undefined, false); + // Get API key from environment variable (optional - uses free tier if not set) + const apiKey = process.env.SENTIENCE_API_KEY as string | undefined; + + const browser = new SentienceBrowser(apiKey, undefined, false); try { await browser.start(); diff --git a/examples/hello.ts b/examples/hello.ts index 2ade4432..4b9e91b6 100644 --- a/examples/hello.ts +++ b/examples/hello.ts @@ -1,11 +1,14 @@ /** - * Day 2 Example: Verify extension bridge is loaded + * Example: Verify extension bridge is loaded */ import { SentienceBrowser } from '../src/index'; async function main() { - const browser = new SentienceBrowser(undefined, undefined, false); + // Get API key from environment variable (optional - uses free tier if not set) + const apiKey = process.env.SENTIENCE_API_KEY as string | undefined; + + const browser = new SentienceBrowser(apiKey, undefined, false); try { await browser.start(); diff --git a/examples/query-demo.ts b/examples/query-demo.ts index fe154388..46ff3d12 100644 --- a/examples/query-demo.ts +++ b/examples/query-demo.ts @@ -1,11 +1,14 @@ /** - * Day 4 Example: Query engine demonstration + * Example: Query engine demonstration */ import { SentienceBrowser, snapshot, query, find } from '../src/index'; async function main() { - const browser = new SentienceBrowser(undefined, undefined, false); + // Get API key from environment variable (optional - uses free tier if not set) + const apiKey = process.env.SENTIENCE_API_KEY as string | undefined; + + const browser = new SentienceBrowser(apiKey, undefined, false); try { await browser.start(); diff --git a/examples/read-markdown.ts b/examples/read-markdown.ts index 2bb8107b..25b6f500 100644 --- a/examples/read-markdown.ts +++ b/examples/read-markdown.ts @@ -9,8 +9,11 @@ import { SentienceBrowser, read } from '../src'; import TurndownService from 'turndown'; async function main() { + // Get API key from environment variable (optional - uses free tier if not set) + const apiKey = process.env.SENTIENCE_API_KEY as string | undefined; + // Initialize browser - const browser = new SentienceBrowser(); + const browser = new SentienceBrowser(apiKey); await browser.start(); try { diff --git a/examples/wait-and-click.ts b/examples/wait-and-click.ts index 4476a27c..c12a89f2 100644 --- a/examples/wait-and-click.ts +++ b/examples/wait-and-click.ts @@ -1,11 +1,14 @@ /** - * Day 5-6 Example: Wait for element and click + * Example: Wait for element and click */ import { SentienceBrowser, snapshot, find, waitFor, click, expect } from '../src/index'; async function main() { - const browser = new SentienceBrowser(undefined, undefined, false); + // Get API key from environment variable (optional - uses free tier if not set) + const apiKey = process.env.SENTIENCE_API_KEY as string | undefined; + + const browser = new SentienceBrowser(apiKey, undefined, false); try { await browser.start(); diff --git a/tsconfig.json b/tsconfig.json index 19f7f605..7ac147c1 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -19,7 +19,8 @@ "exclude": ["node_modules", "dist", "examples", "tests", "src/extension"], "ts-node": { "compilerOptions": { - "lib": ["ES2020", "DOM"] + "lib": ["ES2020", "DOM"], + "types": ["node"] } } }