Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,87 @@ const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 });

</details>

<details>
<summary><h3>🔎 Text Search - Find Elements by Visible Text</h3></summary>

**`findTextRect(page, options)`** - Find text on page and get exact pixel coordinates

Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.

**Example:**
```typescript
import { SentienceBrowser, findTextRect, clickRect } from 'sentienceapi';

const browser = await SentienceBrowser.create();
await browser.getPage().goto('https://example.com');

// Find "Sign In" button (simple string syntax)
const result = await findTextRect(browser.getPage(), "Sign In");
if (result.status === "success" && result.results) {
const firstMatch = result.results[0];
console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`);
console.log(`In viewport: ${firstMatch.in_viewport}`);

// Click on the found text
if (firstMatch.in_viewport) {
await clickRect(browser, {
x: firstMatch.rect.x,
y: firstMatch.rect.y,
w: firstMatch.rect.width,
h: firstMatch.rect.height
});
}
}
```

**Advanced Options:**
```typescript
// Case-sensitive search
const result = await findTextRect(browser.getPage(), {
text: "LOGIN",
caseSensitive: true
});

// Whole word only (won't match "login" as part of "loginButton")
const result = await findTextRect(browser.getPage(), {
text: "log",
wholeWord: true
});

// Find multiple matches
const result = await findTextRect(browser.getPage(), {
text: "Buy",
maxResults: 10
});
for (const match of result.results || []) {
if (match.in_viewport) {
console.log(`Found '${match.text}' at (${match.rect.x}, ${match.rect.y})`);
console.log(`Context: ...${match.context.before}[${match.text}]${match.context.after}...`);
}
}
```

**Returns:** Promise<TextRectSearchResult> with:
- **`status`**: "success" or "error"
- **`results`**: Array of `TextMatch` objects with:
- `text` - The matched text
- `rect` - Absolute coordinates (with scroll offset)
- `viewport_rect` - Viewport-relative coordinates
- `context` - Surrounding text (before/after)
- `in_viewport` - Whether visible in current viewport

**Use Cases:**
- Find buttons/links by visible text without CSS selectors
- Get exact pixel coordinates for click automation
- Verify text visibility and position on page
- Search dynamic content that changes frequently

**Note:** Does not consume API credits (runs locally in browser)

**See example:** `examples/find-text-demo.ts`

</details>

---

## 📋 Reference
Expand Down
128 changes: 128 additions & 0 deletions examples/find-text-demo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
* Text Search Demo - Using findTextRect() to locate elements by visible text
*
* This example demonstrates how to:
* 1. Find text on a webpage and get exact pixel coordinates
* 2. Use case-sensitive and whole-word matching options
* 3. Click on found text using clickRect()
* 4. Handle multiple matches and filter by viewport visibility
*/

import { SentienceBrowser, findTextRect, clickRect } from '../src';

async function main() {
const browser = new SentienceBrowser();
await browser.start();

const page = browser.getPage();

// Navigate to a search page
await page.goto('https://www.google.com');
await page.waitForLoadState('networkidle');

console.log('\n' + '='.repeat(60));
console.log('Text Search Demo');
console.log('='.repeat(60) + '\n');

// Example 1: Simple text search
console.log('Example 1: Finding "Google Search" button');
console.log('-'.repeat(60));
let result = await findTextRect(page, 'Google Search');

if (result.status === 'success' && result.results) {
console.log(`✓ Found ${result.matches} match(es) for '${result.query}'`);
for (let i = 0; i < Math.min(3, result.results.length); i++) {
const match = result.results[i];
console.log(`\nMatch ${i + 1}:`);
console.log(` Text: '${match.text}'`);
console.log(` Position: (${match.rect.x.toFixed(1)}, ${match.rect.y.toFixed(1)})`);
console.log(` Size: ${match.rect.width.toFixed(1)}x${match.rect.height.toFixed(1)} pixels`);
console.log(` In viewport: ${match.in_viewport}`);
console.log(
` Context: ...${match.context.before}[${match.text}]${match.context.after}...`
);
}
} else {
console.log(`✗ Search failed: ${result.error}`);
}

// Example 2: Find and click search box (using simple string syntax)
console.log('\n\nExample 2: Finding and clicking the search box');
console.log('-'.repeat(60));
result = await findTextRect(page, {
text: 'Search',
maxResults: 5
});

if (result.status === 'success' && result.results) {
// Find the first visible match
for (const match of result.results) {
if (match.in_viewport) {
console.log(`✓ Found visible match: '${match.text}'`);
console.log(` Clicking at (${match.rect.x.toFixed(1)}, ${match.rect.y.toFixed(1)})`);

// Click in the center of the text
const clickResult = await clickRect(browser, {
x: match.rect.x,
y: match.rect.y,
w: match.rect.width,
h: match.rect.height
});

if (clickResult.success) {
console.log(` ✓ Click successful!`);
}
break;
}
}
}

// Example 3: Case-sensitive search
console.log('\n\nExample 3: Case-sensitive search for "GOOGLE"');
console.log('-'.repeat(60));
const resultInsensitive = await findTextRect(page, {
text: 'GOOGLE',
caseSensitive: false
});
const resultSensitive = await findTextRect(page, {
text: 'GOOGLE',
caseSensitive: true
});

console.log(`Case-insensitive search: ${resultInsensitive.matches || 0} matches`);
console.log(`Case-sensitive search: ${resultSensitive.matches || 0} matches`);

// Example 4: Whole word search
console.log('\n\nExample 4: Whole word search');
console.log('-'.repeat(60));
const resultPartial = await findTextRect(page, {
text: 'Search',
wholeWord: false
});
const resultWhole = await findTextRect(page, {
text: 'Search',
wholeWord: true
});

console.log(`Partial word match: ${resultPartial.matches || 0} matches`);
console.log(`Whole word only: ${resultWhole.matches || 0} matches`);

// Example 5: Get viewport information
console.log('\n\nExample 5: Viewport and scroll information');
console.log('-'.repeat(60));
result = await findTextRect(page, 'Google');
if (result.status === 'success' && result.viewport) {
console.log(`Viewport size: ${result.viewport.width}x${result.viewport.height}`);
if ('scroll_x' in result.viewport && 'scroll_y' in result.viewport) {
console.log(`Scroll position: (${result.viewport.scroll_x}, ${result.viewport.scroll_y})`);
}
}

console.log('\n' + '='.repeat(60));
console.log('Demo complete!');
console.log('='.repeat(60) + '\n');

await browser.close();
}

main().catch(console.error);
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export { ScriptGenerator, generate } from './generator';
export { read, ReadOptions, ReadResult } from './read';
export { screenshot, ScreenshotOptions } from './screenshot';
export { showOverlay, clearOverlay } from './overlay';
export { findTextRect } from './textSearch';
export * from './types';
export { saveStorageState } from './utils';

Expand Down
104 changes: 104 additions & 0 deletions src/textSearch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Text search utilities - find text and get pixel coordinates
*/

import { Page } from "playwright";
import { FindTextRectOptions, TextRectSearchResult } from "./types";

/**
* Find all occurrences of text on the page and get their exact pixel coordinates.
*
* This function searches for text in all visible text nodes on the page and returns
* the bounding rectangles for each match. Useful for:
* - Finding specific UI elements by their text content
* - Locating buttons, links, or labels without element IDs
* - Getting exact coordinates for click automation
* - Highlighting search results visually
*
* @param page - Playwright Page instance
* @param options - Search options
* @returns TextRectSearchResult with all matches and their coordinates
*
* @example
* // Find "Sign In" button
* const result = await findTextRect(page, { text: "Sign In" });
* if (result.status === "success" && result.results) {
* const firstMatch = result.results[0];
* console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`);
* console.log(`Size: ${firstMatch.rect.width}x${firstMatch.rect.height}`);
* console.log(`In viewport: ${firstMatch.in_viewport}`);
* }
*
* @example
* // Case-sensitive search
* const result = await findTextRect(page, {
* text: "LOGIN",
* caseSensitive: true
* });
*
* @example
* // Whole word only
* const result = await findTextRect(page, {
* text: "log",
* wholeWord: true // Won't match "login"
* });
*
* @example
* // Find all matches and click the first visible one
* const result = await findTextRect(page, {
* text: "Buy Now",
* maxResults: 5
* });
* if (result.status === "success" && result.results) {
* for (const match of result.results) {
* if (match.in_viewport) {
* // Use clickRect from actions module
* await page.mouse.click(
* match.rect.x + match.rect.width / 2,
* match.rect.y + match.rect.height / 2
* );
* break;
* }
* }
* }
*/
export async function findTextRect(
page: Page,
options: FindTextRectOptions | string
): Promise<TextRectSearchResult> {
// Support simple string input for convenience
const opts: FindTextRectOptions =
typeof options === "string" ? { text: options } : options;

const {
text,
caseSensitive = false,
wholeWord = false,
maxResults = 10,
} = opts;

if (!text || text.trim().length === 0) {
return {
status: "error",
error: "Text parameter is required and cannot be empty",
};
}

// Limit max_results to prevent performance issues
const limitedMaxResults = Math.min(maxResults, 100);

// Call the extension's findTextRect method
const result = await page.evaluate(
(evalOptions) => {
return (window as any).sentience.findTextRect(evalOptions);
},
{
text,
caseSensitive,
wholeWord,
maxResults: limitedMaxResults,
}
);

return result as TextRectSearchResult;
}
Loading