Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 228 additions & 2 deletions bun.lock

Large diffs are not rendered by default.

64 changes: 33 additions & 31 deletions example.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
import { chromium } from 'playwright';
import { z } from 'zod';
import { ComputerUseAgent } from './index';
import { chromium } from "playwright";
import { z } from "zod";
import { ComputerUseAgent } from "./src/index";

async function textResponseExample(): Promise<void> {
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (!ANTHROPIC_API_KEY) {
throw new Error('ANTHROPIC_API_KEY environment variable is required');
throw new Error("ANTHROPIC_API_KEY environment variable is required");
}

const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://news.ycombinator.com/");

try {
console.log('\n=== Text Response Examples ===');
console.log("\n=== Text Response Examples ===");
const agent = new ComputerUseAgent({
apiKey: ANTHROPIC_API_KEY,
page,
});

// Text response with action
const topStory = await agent.execute('Tell me the title of the top story on this page');
console.log('Top story:', topStory);
const topStory = await agent.execute(
"Tell me the title of the top story on this page"
);
console.log("Top story:", topStory);

// Text response with multiple pieces of information
const summary = await agent.execute('Give me a brief summary of the top 3 stories');
console.log('Summary:', summary);

const summary = await agent.execute(
"Give me a brief summary of the top 3 stories"
);
console.log("Summary:", summary);
} catch (error) {
console.error('Error in text response example:', error);
console.error("Error in text response example:", error);
} finally {
await browser.close();
}
Expand All @@ -37,20 +40,20 @@ async function textResponseExample(): Promise<void> {
async function structuredResponseExample(): Promise<void> {
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (!ANTHROPIC_API_KEY) {
throw new Error('ANTHROPIC_API_KEY environment variable is required');
throw new Error("ANTHROPIC_API_KEY environment variable is required");
}

const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://news.ycombinator.com/");

try {
console.log('\n=== Structured Response Examples ===');
console.log("\n=== Structured Response Examples ===");
const agent = new ComputerUseAgent({
apiKey: ANTHROPIC_API_KEY,
page,
});

// Define schema for a single story
const HackerNewsStory = z.object({
title: z.string(),
Expand All @@ -59,13 +62,13 @@ async function structuredResponseExample(): Promise<void> {
comments: z.number(),
url: z.string().optional(),
});

// Get multiple stories with structured data
const stories = await agent.execute(
'Get the top 5 stories with their titles, points, authors, and comment counts',
"Get the top 5 stories with their titles, points, authors, and comment counts",
z.array(HackerNewsStory).max(5)
);
console.log('Structured stories:', JSON.stringify(stories, null, 2));
console.log("Structured stories:", JSON.stringify(stories, null, 2));

// Define schema for page metadata
const PageInfo = z.object({
Expand All @@ -76,26 +79,25 @@ async function structuredResponseExample(): Promise<void> {

// Get page information with structured data
const pageInfo = await agent.execute(
'Get information about this page including its title, total number of stories visible, and current page number',
"Get information about this page including its title, total number of stories visible, and current page number",
PageInfo
);
console.log('Page info:', JSON.stringify(pageInfo, null, 2));

console.log("Page info:", JSON.stringify(pageInfo, null, 2));
} catch (error) {
console.error('Error in structured response example:', error);
console.error("Error in structured response example:", error);
} finally {
await browser.close();
}
}

// Run examples
async function runExamples(): Promise<void> {
console.log('Running Computer Use Agent Examples...');
console.log("Running Computer Use Agent Examples...");

await textResponseExample();
await structuredResponseExample();
console.log('\nAll examples completed!');

console.log("\nAll examples completed!");
}

runExamples().catch(console.error);
runExamples().catch(console.error);
4 changes: 0 additions & 4 deletions index.ts

This file was deleted.

48 changes: 20 additions & 28 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,32 +1,10 @@
{
"name": "@onkernel/cu-playwright",
"version": "0.1.2",
"description": "Computer Use x Playwright SDK",
"main": "./dist/index.js",
"module": "./dist/index.js",
"types": "./index.ts",
"type": "module",
"scripts": {
"build": "bun build ./index.ts --outdir ./dist --target node",
"format": "bun run eslint . --fix",
"lint": "bun run eslint .",
"lint:fix": "bun run eslint . --fix",
"prepublishOnly": "bun run lint && bun run build"
},
"exports": {
".": {
"import": "./dist/index.js",
"types": "./index.ts"
}
},
"files": [
"dist/",
"index.ts",
"loop.ts",
"tools/",
"types/",
"utils/"
],
"description": "Computer Use x Playwright SDK",
"author": "Kernel",
"license": "MIT",
"keywords": [
"computer-use",
"playwright",
Expand All @@ -35,15 +13,21 @@
"ai",
"typescript"
],
"author": "Kernel",
"license": "MIT",
"repository": {
"type": "git",
"url": ""
},
"scripts": {
"build": "bunchee --clean src/index.ts",
"format": "bun run eslint . --fix",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The files array was completely removed from the package.json. This array controls which files are included when the package is published to npm. Without it, npm will include all files by default (except those in .npmignore/.gitignore), which could lead to publishing unwanted files or a bloated package. Consider adding a files array that includes the appropriate files for your new structure, likely something like ["dist/"] since you're building to the dist directory.

Type: Performance | Severity: Medium

"lint": "bun run eslint .",
"lint:fix": "bun run eslint . --fix",
"prepublishOnly": "bun run lint && bun run build"
},
"peerDependencies": {
"typescript": "^5",
"playwright": "^1.52.0"
"playwright": "^1.53.0",
"playwright-core": "^1.53.0"
},
"dependencies": {
"@anthropic-ai/sdk": "0.52.0",
Expand All @@ -55,10 +39,18 @@
"@eslint/js": "^9.0.0",
"@typescript-eslint/eslint-plugin": "^8.0.0",
"@typescript-eslint/parser": "^8.0.0",
"bunchee": "^6.5.4",
"eslint": "^9.0.0",
"eslint-config-prettier": "^9.0.0"
},
"publishConfig": {
"access": "public"
},
"main": "./dist/index.js",
"exports": {
".": {
"import": "./dist/index.js",
"types": "./dist/index.d.ts"
}
}
}
58 changes: 29 additions & 29 deletions agent.ts → src/agent.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';
import type { Page } from 'playwright';
import { computerUseLoop } from './loop';
import { z } from "zod";
import zodToJsonSchema from "zod-to-json-schema";
import type { Page } from "playwright";
import { computerUseLoop } from "./loop";

/**
* Computer Use Agent for automating browser interactions with Claude
*
*
* This agent provides a clean interface to Anthropic's Computer Use capabilities,
* allowing Claude to interact with web pages through Playwright.
*
*
* @see https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/computer-use-tool
*/
export class ComputerUseAgent {
Expand All @@ -18,24 +18,24 @@ export class ComputerUseAgent {

/**
* Create a new ComputerUseAgent instance
*
*
* @param options - Configuration options
* @param options.apiKey - Anthropic API key (get one from https://console.anthropic.com/)
* @param options.page - Playwright page instance to control
* @param options.model - Anthropic model to use (defaults to claude-sonnet-4-20250514)
*
*
* @see https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/computer-use-tool#model-compatibility
*/
constructor({
apiKey,
page,
model = 'claude-sonnet-4-20250514',
model = "claude-sonnet-4-20250514",
}: {
/** Anthropic API key for authentication */
apiKey: string;
/** Playwright page instance to control */
page: Page;
/**
/**
* Anthropic model to use for computer use tasks
* @default 'claude-sonnet-4-20250514'
*/
Expand All @@ -48,30 +48,30 @@ export class ComputerUseAgent {

/**
* Execute a computer use task with Claude
*
*
* This method can return either text responses or structured data validated against a Zod schema.
*
*
* @template T - The expected return type (string by default, or inferred from schema)
* @param query - The task description for Claude to execute
* @param schema - Optional Zod schema for structured responses
* @param options - Additional execution options
* @param options.systemPromptSuffix - Additional instructions appended to the system prompt
* @param options.thinkingBudget - Token budget for Claude's internal reasoning (default: 1024)
*
*
* @returns Promise that resolves to either a string (when no schema) or validated data of type T
*
*
* @example
* ```typescript
* // Text response
* const result = await agent.execute('Tell me the page title');
*
*
* // Structured response
* const data = await agent.execute(
* 'Get user info',
* z.object({ name: z.string(), age: z.number() })
* );
* ```
*
*
* @see https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
*/
async execute<T = string>(
Expand All @@ -80,7 +80,7 @@ export class ComputerUseAgent {
options?: {
/** Additional instructions appended to the system prompt */
systemPromptSuffix?: string;
/**
/**
* Token budget for Claude's internal reasoning process
* @default 1024
* @see https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#working-with-thinking-budgets
Expand Down Expand Up @@ -116,7 +116,7 @@ Respond ONLY with the JSON object, no additional text.`;

const lastMessage = messages[messages.length - 1];
if (!lastMessage) {
throw new Error('No response received');
throw new Error("No response received");
}

const response = this.extractTextFromMessage(lastMessage);
Expand All @@ -131,19 +131,21 @@ Respond ONLY with the JSON object, no additional text.`;
return schema.parse(parsed);
}

private extractTextFromMessage(message: { content: string | Array<{ type: string; text?: string }> }): string {
if (typeof message.content === 'string') {
private extractTextFromMessage(message: {
content: string | Array<{ type: string; text?: string }>;
}): string {
if (typeof message.content === "string") {
return message.content;
}

if (Array.isArray(message.content)) {
return message.content
.filter((block) => block.type === 'text')
.map((block) => block.text || '')
.join('');
.filter((block) => block.type === "text")
.map((block) => block.text || "")
.join("");
}
return '';

return "";
}

private parseJsonResponse(response: string): unknown {
Expand All @@ -165,6 +167,4 @@ Respond ONLY with the JSON object, no additional text.`;
// Example: " {\"status\": \"ok\"} " (JSON with whitespace)
return JSON.parse(response.trim());
}


}
}
4 changes: 4 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export { ComputerUseAgent } from "./agent";
export type { BetaMessageParam, BetaTextBlock } from "./types/beta";
export type { ToolVersion } from "./tools/collection";
export { Action } from "./tools/types/computer";
Loading