From 3524618b49472296e72c96007900a82ba7231608 Mon Sep 17 00:00:00 2001 From: rcholic Date: Mon, 29 Dec 2025 07:23:46 -0800 Subject: [PATCH] get text coordinates --- README.md | 81 +++++++++++++++++++++++ examples/find-text-demo.ts | 128 +++++++++++++++++++++++++++++++++++++ src/index.ts | 1 + src/textSearch.ts | 104 ++++++++++++++++++++++++++++++ src/types.ts | 106 ++++++++++++++++++++++++++++++ 5 files changed, 420 insertions(+) create mode 100644 examples/find-text-demo.ts create mode 100644 src/textSearch.ts diff --git a/README.md b/README.md index d5fca783..909c3b23 100644 --- a/README.md +++ b/README.md @@ -523,6 +523,87 @@ const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 }); +
+

🔎 Text Search - Find Elements by Visible Text

+ +**`findTextRect(page, options)`** - Find text on page and get exact pixel coordinates + +Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match. + +**Example:** +```typescript +import { SentienceBrowser, findTextRect, clickRect } from 'sentienceapi'; + +const browser = await SentienceBrowser.create(); +await browser.getPage().goto('https://example.com'); + +// Find "Sign In" button (simple string syntax) +const result = await findTextRect(browser.getPage(), "Sign In"); +if (result.status === "success" && result.results) { + const firstMatch = result.results[0]; + console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`); + console.log(`In viewport: ${firstMatch.in_viewport}`); + + // Click on the found text + if (firstMatch.in_viewport) { + await clickRect(browser, { + x: firstMatch.rect.x, + y: firstMatch.rect.y, + w: firstMatch.rect.width, + h: firstMatch.rect.height + }); + } +} +``` + +**Advanced Options:** +```typescript +// Case-sensitive search +const result = await findTextRect(browser.getPage(), { + text: "LOGIN", + caseSensitive: true +}); + +// Whole word only (won't match "login" as part of "loginButton") +const result = await findTextRect(browser.getPage(), { + text: "log", + wholeWord: true +}); + +// Find multiple matches +const result = await findTextRect(browser.getPage(), { + text: "Buy", + maxResults: 10 +}); +for (const match of result.results || []) { + if (match.in_viewport) { + console.log(`Found '${match.text}' at (${match.rect.x}, ${match.rect.y})`); + console.log(`Context: ...${match.context.before}[${match.text}]${match.context.after}...`); + } +} +``` + +**Returns:** Promise with: +- **`status`**: "success" or "error" +- **`results`**: Array of `TextMatch` objects with: + - `text` - The matched text + - `rect` - Absolute coordinates (with scroll offset) + - `viewport_rect` - Viewport-relative coordinates + - `context` - Surrounding text (before/after) + - `in_viewport` - Whether visible in current viewport + +**Use Cases:** +- Find buttons/links by visible text without CSS selectors +- Get exact pixel coordinates for click automation +- Verify text visibility and position on page +- Search dynamic content that changes frequently + +**Note:** Does not consume API credits (runs locally in browser) + +**See example:** `examples/find-text-demo.ts` + +
+ --- ## 📋 Reference diff --git a/examples/find-text-demo.ts b/examples/find-text-demo.ts new file mode 100644 index 00000000..19e753d6 --- /dev/null +++ b/examples/find-text-demo.ts @@ -0,0 +1,128 @@ +/** + * Text Search Demo - Using findTextRect() to locate elements by visible text + * + * This example demonstrates how to: + * 1. Find text on a webpage and get exact pixel coordinates + * 2. Use case-sensitive and whole-word matching options + * 3. Click on found text using clickRect() + * 4. Handle multiple matches and filter by viewport visibility + */ + +import { SentienceBrowser, findTextRect, clickRect } from '../src'; + +async function main() { + const browser = new SentienceBrowser(); + await browser.start(); + + const page = browser.getPage(); + + // Navigate to a search page + await page.goto('https://www.google.com'); + await page.waitForLoadState('networkidle'); + + console.log('\n' + '='.repeat(60)); + console.log('Text Search Demo'); + console.log('='.repeat(60) + '\n'); + + // Example 1: Simple text search + console.log('Example 1: Finding "Google Search" button'); + console.log('-'.repeat(60)); + let result = await findTextRect(page, 'Google Search'); + + if (result.status === 'success' && result.results) { + console.log(`✓ Found ${result.matches} match(es) for '${result.query}'`); + for (let i = 0; i < Math.min(3, result.results.length); i++) { + const match = result.results[i]; + console.log(`\nMatch ${i + 1}:`); + console.log(` Text: '${match.text}'`); + console.log(` Position: (${match.rect.x.toFixed(1)}, ${match.rect.y.toFixed(1)})`); + console.log(` Size: ${match.rect.width.toFixed(1)}x${match.rect.height.toFixed(1)} pixels`); + console.log(` In viewport: ${match.in_viewport}`); + console.log( + ` Context: ...${match.context.before}[${match.text}]${match.context.after}...` + ); + } + } else { + console.log(`✗ Search failed: ${result.error}`); + } + + // Example 2: Find and click search box (using simple string syntax) + console.log('\n\nExample 2: Finding and clicking the search box'); + console.log('-'.repeat(60)); + result = await findTextRect(page, { + text: 'Search', + maxResults: 5 + }); + + if (result.status === 'success' && result.results) { + // Find the first visible match + for (const match of result.results) { + if (match.in_viewport) { + console.log(`✓ Found visible match: '${match.text}'`); + console.log(` Clicking at (${match.rect.x.toFixed(1)}, ${match.rect.y.toFixed(1)})`); + + // Click in the center of the text + const clickResult = await clickRect(browser, { + x: match.rect.x, + y: match.rect.y, + w: match.rect.width, + h: match.rect.height + }); + + if (clickResult.success) { + console.log(` ✓ Click successful!`); + } + break; + } + } + } + + // Example 3: Case-sensitive search + console.log('\n\nExample 3: Case-sensitive search for "GOOGLE"'); + console.log('-'.repeat(60)); + const resultInsensitive = await findTextRect(page, { + text: 'GOOGLE', + caseSensitive: false + }); + const resultSensitive = await findTextRect(page, { + text: 'GOOGLE', + caseSensitive: true + }); + + console.log(`Case-insensitive search: ${resultInsensitive.matches || 0} matches`); + console.log(`Case-sensitive search: ${resultSensitive.matches || 0} matches`); + + // Example 4: Whole word search + console.log('\n\nExample 4: Whole word search'); + console.log('-'.repeat(60)); + const resultPartial = await findTextRect(page, { + text: 'Search', + wholeWord: false + }); + const resultWhole = await findTextRect(page, { + text: 'Search', + wholeWord: true + }); + + console.log(`Partial word match: ${resultPartial.matches || 0} matches`); + console.log(`Whole word only: ${resultWhole.matches || 0} matches`); + + // Example 5: Get viewport information + console.log('\n\nExample 5: Viewport and scroll information'); + console.log('-'.repeat(60)); + result = await findTextRect(page, 'Google'); + if (result.status === 'success' && result.viewport) { + console.log(`Viewport size: ${result.viewport.width}x${result.viewport.height}`); + if ('scroll_x' in result.viewport && 'scroll_y' in result.viewport) { + console.log(`Scroll position: (${result.viewport.scroll_x}, ${result.viewport.scroll_y})`); + } + } + + console.log('\n' + '='.repeat(60)); + console.log('Demo complete!'); + console.log('='.repeat(60) + '\n'); + + await browser.close(); +} + +main().catch(console.error); diff --git a/src/index.ts b/src/index.ts index f99b5d8f..2ef3319b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,6 +14,7 @@ export { ScriptGenerator, generate } from './generator'; export { read, ReadOptions, ReadResult } from './read'; export { screenshot, ScreenshotOptions } from './screenshot'; export { showOverlay, clearOverlay } from './overlay'; +export { findTextRect } from './textSearch'; export * from './types'; export { saveStorageState } from './utils'; diff --git a/src/textSearch.ts b/src/textSearch.ts new file mode 100644 index 00000000..b28ae641 --- /dev/null +++ b/src/textSearch.ts @@ -0,0 +1,104 @@ +/** + * Text search utilities - find text and get pixel coordinates + */ + +import { Page } from "playwright"; +import { FindTextRectOptions, TextRectSearchResult } from "./types"; + +/** + * Find all occurrences of text on the page and get their exact pixel coordinates. + * + * This function searches for text in all visible text nodes on the page and returns + * the bounding rectangles for each match. Useful for: + * - Finding specific UI elements by their text content + * - Locating buttons, links, or labels without element IDs + * - Getting exact coordinates for click automation + * - Highlighting search results visually + * + * @param page - Playwright Page instance + * @param options - Search options + * @returns TextRectSearchResult with all matches and their coordinates + * + * @example + * // Find "Sign In" button + * const result = await findTextRect(page, { text: "Sign In" }); + * if (result.status === "success" && result.results) { + * const firstMatch = result.results[0]; + * console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`); + * console.log(`Size: ${firstMatch.rect.width}x${firstMatch.rect.height}`); + * console.log(`In viewport: ${firstMatch.in_viewport}`); + * } + * + * @example + * // Case-sensitive search + * const result = await findTextRect(page, { + * text: "LOGIN", + * caseSensitive: true + * }); + * + * @example + * // Whole word only + * const result = await findTextRect(page, { + * text: "log", + * wholeWord: true // Won't match "login" + * }); + * + * @example + * // Find all matches and click the first visible one + * const result = await findTextRect(page, { + * text: "Buy Now", + * maxResults: 5 + * }); + * if (result.status === "success" && result.results) { + * for (const match of result.results) { + * if (match.in_viewport) { + * // Use clickRect from actions module + * await page.mouse.click( + * match.rect.x + match.rect.width / 2, + * match.rect.y + match.rect.height / 2 + * ); + * break; + * } + * } + * } + */ +export async function findTextRect( + page: Page, + options: FindTextRectOptions | string +): Promise { + // Support simple string input for convenience + const opts: FindTextRectOptions = + typeof options === "string" ? { text: options } : options; + + const { + text, + caseSensitive = false, + wholeWord = false, + maxResults = 10, + } = opts; + + if (!text || text.trim().length === 0) { + return { + status: "error", + error: "Text parameter is required and cannot be empty", + }; + } + + // Limit max_results to prevent performance issues + const limitedMaxResults = Math.min(maxResults, 100); + + // Call the extension's findTextRect method + const result = await page.evaluate( + (evalOptions) => { + return (window as any).sentience.findTextRect(evalOptions); + }, + { + text, + caseSensitive, + wholeWord, + maxResults: limitedMaxResults, + } + ); + + return result as TextRectSearchResult; +} diff --git a/src/types.ts b/src/types.ts index 286e6134..dab36c80 100644 --- a/src/types.ts +++ b/src/types.ts @@ -120,5 +120,111 @@ export interface StorageState { origins: OriginStorage[]; } +// ========== Text Search Types (findTextRect) ========== + +/** + * Rectangle coordinates for text occurrence. + * Includes both absolute (page) and viewport-relative coordinates. + */ +export interface TextRect { + /** Absolute X coordinate (page coordinate with scroll offset) */ + x: number; + /** Absolute Y coordinate (page coordinate with scroll offset) */ + y: number; + /** Rectangle width in pixels */ + width: number; + /** Rectangle height in pixels */ + height: number; + /** Absolute left position (same as x) */ + left: number; + /** Absolute top position (same as y) */ + top: number; + /** Absolute right position (x + width) */ + right: number; + /** Absolute bottom position (y + height) */ + bottom: number; +} + +/** + * Viewport-relative rectangle coordinates (without scroll offset) + */ +export interface ViewportRect { + /** Viewport-relative X coordinate */ + x: number; + /** Viewport-relative Y coordinate */ + y: number; + /** Rectangle width in pixels */ + width: number; + /** Rectangle height in pixels */ + height: number; +} + +/** + * Context text surrounding a match + */ +export interface TextContext { + /** Text before the match (up to 20 chars) */ + before: string; + /** Text after the match (up to 20 chars) */ + after: string; +} + +/** + * A single text match with its rectangle and context + */ +export interface TextMatch { + /** The matched text */ + text: string; + /** Absolute rectangle coordinates (with scroll offset) */ + rect: TextRect; + /** Viewport-relative rectangle (without scroll offset) */ + viewport_rect: ViewportRect; + /** Surrounding text context */ + context: TextContext; + /** Whether the match is currently visible in viewport */ + in_viewport: boolean; +} + +/** + * Result of findTextRect operation. + * Returns all occurrences of text on the page with their exact pixel coordinates. + */ +export interface TextRectSearchResult { + status: "success" | "error"; + /** The search text that was queried */ + query?: string; + /** Whether search was case-sensitive */ + case_sensitive?: boolean; + /** Whether whole-word matching was used */ + whole_word?: boolean; + /** Number of matches found */ + matches?: number; + /** List of text matches with coordinates */ + results?: TextMatch[]; + /** Current viewport dimensions */ + viewport?: Viewport & { + scroll_x: number; + scroll_y: number; + }; + /** Error message if status is 'error' */ + error?: string; +} + +/** + * Options for findTextRect operation + */ +export interface FindTextRectOptions { + /** Text to search for (required) */ + text: string; + /** Container element to search within (default: document.body) */ + containerElement?: Element; + /** Case-sensitive search (default: false) */ + caseSensitive?: boolean; + /** Match whole words only (default: false) */ + wholeWord?: boolean; + /** Maximum number of results to return (default: 10) */ + maxResults?: number; +} +