Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ coverage/
*.min.js
package-lock.json



27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,33 @@ await browser.close();

---

## 🆕 What's New (2026-01-06)

### Human-like Typing
Add realistic delays between keystrokes to mimic human typing:
```typescript
// Type instantly (default)
await typeText(browser, elementId, 'Hello World');

// Type with human-like delay (~10ms between keystrokes)
await typeText(browser, elementId, 'Hello World', false, 10);
```

### Scroll to Element
Scroll elements into view with smooth animation:
```typescript
const snap = await snapshot(browser);
const button = find(snap, 'role=button text~"Submit"');

// Scroll element into view with smooth animation
await scrollTo(browser, button.id);

// Scroll instantly to top of viewport
await scrollTo(browser, button.id, 'instant', 'start');
```

---

<details>
<summary><h2>📊 Agent Execution Tracing (NEW in v0.3.1)</h2></summary>

Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

102 changes: 99 additions & 3 deletions src/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,22 +223,28 @@ export async function click(
* @param elementId - Element ID from snapshot (must be a text input element)
* @param text - Text to type
* @param takeSnapshot - Take snapshot after action (default: false)
* @param delayMs - Delay between keystrokes in milliseconds for human-like typing (default: 0)
* @returns ActionResult with success status, outcome, duration, and optional snapshot
*
* @example
* ```typescript
* const snap = await snapshot(browser);
* const searchBox = find(snap, 'role=searchbox');
* if (searchBox) {
* // Type instantly (default behavior)
* await typeText(browser, searchBox.id, 'Hello World');
*
* // Type with human-like delay (~10ms between keystrokes)
* await typeText(browser, searchBox.id, 'Hello World', false, 10);
* }
* ```
*/
export async function typeText(
browser: IBrowser,
elementId: number,
text: string,
takeSnapshot: boolean = false
takeSnapshot: boolean = false,
delayMs: number = 0
): Promise<ActionResult> {
const page = browser.getPage();
if (!page) {
Expand Down Expand Up @@ -270,8 +276,98 @@ export async function typeText(
};
}

// Type using Playwright keyboard
await page.keyboard.type(text);
// Type using Playwright keyboard with optional delay between keystrokes
await page.keyboard.type(text, { delay: delayMs });

const durationMs = Date.now() - startTime;
const urlAfter = page.url();
const urlChanged = urlBefore !== urlAfter;

const outcome = urlChanged ? 'navigated' : 'dom_updated';

let snapshotAfter: Snapshot | undefined;
if (takeSnapshot) {
snapshotAfter = await snapshot(browser);
}

return {
success: true,
duration_ms: durationMs,
outcome,
url_changed: urlChanged,
snapshot_after: snapshotAfter,
};
}

/**
* Scroll an element into view
*
* Scrolls the page so that the specified element is visible in the viewport.
* Uses the element registry to find the element and scrollIntoView() to scroll it.
*
* @param browser - SentienceBrowser instance
* @param elementId - Element ID from snapshot to scroll into view
* @param behavior - Scroll behavior: 'smooth' for animated scroll, 'instant' for immediate (default: 'smooth')
* @param block - Vertical alignment: 'start', 'center', 'end', 'nearest' (default: 'center')
* @param takeSnapshot - Take snapshot after action (default: false)
* @returns ActionResult with success status, outcome, duration, and optional snapshot
*
* @example
* ```typescript
* const snap = await snapshot(browser);
* const button = find(snap, 'role=button[name="Submit"]');
* if (button) {
* // Scroll element into view with smooth animation
* await scrollTo(browser, button.id);
*
* // Scroll instantly to top of viewport
* await scrollTo(browser, button.id, 'instant', 'start');
* }
* ```
*/
export async function scrollTo(
browser: IBrowser,
elementId: number,
behavior: 'smooth' | 'instant' | 'auto' = 'smooth',
block: 'start' | 'center' | 'end' | 'nearest' = 'center',
takeSnapshot: boolean = false
): Promise<ActionResult> {
const page = browser.getPage();
if (!page) {
throw new Error('Browser not started. Call start() first.');
}
const startTime = Date.now();
const urlBefore = page.url();

// Scroll element into view using the element registry
const scrolled = await BrowserEvaluator.evaluate(
page,
(args: { id: number; behavior: string; block: string }) => {
const el = (window as any).sentience_registry[args.id];
if (el && el.scrollIntoView) {
el.scrollIntoView({
behavior: args.behavior,
block: args.block,
inline: 'nearest',
});
return true;
}
return false;
},
{ id: elementId, behavior, block }
);

if (!scrolled) {
return {
success: false,
duration_ms: Date.now() - startTime,
outcome: 'error',
error: { code: 'scroll_failed', reason: 'Element not found or not scrollable' },
};
}

// Wait a bit for scroll to complete (especially for smooth scrolling)
await page.waitForTimeout(behavior === 'smooth' ? 500 : 100);

const durationMs = Date.now() - startTime;
const urlAfter = page.url();
Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
export { SentienceBrowser } from './browser';
export { snapshot, SnapshotOptions } from './snapshot';
export { query, find, parseSelector } from './query';
export { click, typeText, press, clickRect, ClickRect } from './actions';
export { click, typeText, press, scrollTo, clickRect, ClickRect } from './actions';
export { waitFor } from './wait';
export { expect, Expectation } from './expect';
export { Inspector, inspect } from './inspector';
Expand Down
127 changes: 126 additions & 1 deletion tests/actions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,17 @@
* Tests for actions (click, type, press, clickRect)
*/

import { SentienceBrowser, click, typeText, press, clickRect, snapshot, find, BBox } from '../src';
import {
SentienceBrowser,
click,
typeText,
press,
scrollTo,
clickRect,
snapshot,
find,
BBox,
} from '../src';
import { createTestBrowser, getPageOrThrow } from './test-utils';

describe('Actions', () => {
Expand Down Expand Up @@ -119,6 +129,121 @@ describe('Actions', () => {
}, 60000);
});

describe('scrollTo', () => {
it('should scroll an element into view', async () => {
const browser = await createTestBrowser();

try {
const page = getPageOrThrow(browser);
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });

const snap = await snapshot(browser);
// Find an element to scroll to
const elements = snap.elements.filter(el => el.role === 'link');

if (elements.length > 0) {
// Get the last element which might be out of viewport
const element = elements.length > 1 ? elements[elements.length - 1] : elements[0];
const result = await scrollTo(browser, element.id);
expect(result.success).toBe(true);
expect(result.duration_ms).toBeGreaterThan(0);
expect(['navigated', 'dom_updated']).toContain(result.outcome);
}
} finally {
await browser.close();
}
}, 60000);

it('should scroll with instant behavior', async () => {
const browser = await createTestBrowser();

try {
const page = getPageOrThrow(browser);
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });

const snap = await snapshot(browser);
const elements = snap.elements.filter(el => el.role === 'link');

if (elements.length > 0) {
const element = elements[0];
const result = await scrollTo(browser, element.id, 'instant', 'start');
expect(result.success).toBe(true);
expect(result.duration_ms).toBeGreaterThan(0);
}
} finally {
await browser.close();
}
}, 60000);

it('should take snapshot after scroll when requested', async () => {
const browser = await createTestBrowser();

try {
const page = getPageOrThrow(browser);
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });

const snap = await snapshot(browser);
const elements = snap.elements.filter(el => el.role === 'link');

if (elements.length > 0) {
const element = elements[0];
const result = await scrollTo(browser, element.id, 'smooth', 'center', true);
expect(result.success).toBe(true);
expect(result.snapshot_after).toBeDefined();
expect(result.snapshot_after?.status).toBe('success');
}
} finally {
await browser.close();
}
}, 60000);

it('should fail for invalid element ID', async () => {
const browser = await createTestBrowser();

try {
const page = getPageOrThrow(browser);
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });

// Try to scroll to non-existent element
const result = await scrollTo(browser, 99999);
expect(result.success).toBe(false);
expect(result.error).toBeDefined();
expect(result.error?.code).toBe('scroll_failed');
} finally {
await browser.close();
}
}, 60000);
});

describe('typeText with delay', () => {
it('should type text with human-like delay', async () => {
const browser = await createTestBrowser();

try {
const page = getPageOrThrow(browser);
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });

const snap = await snapshot(browser);
const textbox = find(snap, 'role=textbox');

if (textbox) {
// Test with 10ms delay between keystrokes
const result = await typeText(browser, textbox.id, 'hello', false, 10);
expect(result.success).toBe(true);
// Duration should be longer due to delays (at least 5 chars * 10ms = 50ms)
expect(result.duration_ms).toBeGreaterThanOrEqual(50);
}
} finally {
await browser.close();
}
}, 60000);
});

describe('clickRect', () => {
it('should click at rectangle center using rect dict', async () => {
const browser = await createTestBrowser();
Expand Down
Loading