Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 40 additions & 37 deletions loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,18 @@ import { Anthropic } from '@anthropic-ai/sdk';
import { DateTime } from 'luxon';
import type { Page } from 'playwright';
import type { BetaMessageParam, BetaTextBlock } from './types/beta';
import { ToolCollection, DEFAULT_TOOL_VERSION, TOOL_GROUPS_BY_VERSION, type ToolVersion } from './tools/collection';
import { ComputerUseToolCollection, DEFAULT_TOOL_VERSION, TOOL_GROUPS_BY_VERSION, type ToolVersion } from './tools/collection';
import { responseToParams, maybeFilterToNMostRecentImages, injectPromptCaching, PROMPT_CACHING_BETA_FLAG } from './utils/message-processing';
import { makeApiToolResult } from './utils/tool-results';
import { ComputerTool20241022, ComputerTool20250124 } from './tools/computer';
import type { ActionParams } from './tools/types/computer';
import { Action } from './tools/types/computer';
import { PlaywrightTool } from './tools/playwright';

// System prompt optimized for the environment
const SYSTEM_PROMPT = `<SYSTEM_CAPABILITY>
* You are utilising an Ubuntu virtual machine using ${process.arch} architecture with internet access.
* When you connect to the display, CHROMIUM IS ALREADY OPEN. The url bar is not visible but it is there.
* If you need to navigate to a new page, use ctrl+l to focus the url bar and then enter the url.
* You won't be able to see the url bar from the screenshot but ctrl-l still works.
* If you need to navigate to a new page, use the "goto" tool provided.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page.
* Either that, or make sure you scroll down to see everything before deciding something isn't available.
* When using your computer function calls, they take a while to run and send back to you.
Expand All @@ -27,7 +26,10 @@ const SYSTEM_PROMPT = `<SYSTEM_CAPABILITY>

<IMPORTANT>
* When using Chromium, if a startup wizard appears, IGNORE IT. Do not even click "skip this step".
* Instead, click on the search bar on the center of the screen where it says "Search or enter address", and enter the appropriate search term or URL there.
* For ALL navigation needs, use the "goto" method - never use keyboard shortcuts or URL bar interactions.
* The goto method is the reliable way to navigate to any website or URL.
* If no specific URL is provided to achieve a goal or part of a goal, use Google (https://www.google.com) as your entry point to search for and navigate to relevant websites.
* For ambiguous requests, use Google to find the most relevant site.
</IMPORTANT>`;

// Add new type definitions
Expand All @@ -41,7 +43,9 @@ interface ExtraBodyConfig {
}

interface ToolUseInput extends Record<string, unknown> {
action: Action;
action?: Action;
method?: string;
args?: string[];
}

export async function samplingLoop({
Expand Down Expand Up @@ -69,7 +73,14 @@ export async function samplingLoop({
}): Promise<BetaMessageParam[]> {
const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION;
const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion];
const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(playwrightPage)));

// Create computer tools
const computerTools = toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(playwrightPage));

// Create playwright tool
const playwrightTool = new PlaywrightTool(playwrightPage);

const toolCollection = new ComputerUseToolCollection(...computerTools, playwrightTool);

const system: BetaTextBlock = {
type: 'text',
Expand Down Expand Up @@ -153,41 +164,33 @@ export async function samplingLoop({
for (const contentBlock of responseParams) {
if (contentBlock.type === 'tool_use' && contentBlock.name && contentBlock.input && typeof contentBlock.input === 'object') {
const input = contentBlock.input as ToolUseInput;
if ('action' in input && typeof input.action === 'string') {
hasToolUse = true;
const toolInput: ActionParams = {
action: input.action as Action,
...Object.fromEntries(
Object.entries(input).filter(([key]) => key !== 'action')
)
};

try {
const result = await toolCollection.run(
contentBlock.name,
toolInput
);
hasToolUse = true;

const toolResult = makeApiToolResult(result, contentBlock.id!);
toolResultContent.push(toolResult);
} catch (error) {
console.error(error);
throw error;
}
try {
const result = await toolCollection.run(
contentBlock.name,
input
);

const toolResult = makeApiToolResult(result, contentBlock.id!);
toolResultContent.push(toolResult);
} catch (error) {
console.error(error);
throw error;
}
}
}

if (toolResultContent.length === 0 && !hasToolUse && response.stop_reason !== 'tool_use') {
console.log('No tool use or results, and not waiting for tool use, ending loop');
return messages;
}
if (toolResultContent.length === 0 && !hasToolUse && response.stop_reason !== 'tool_use') {
console.log('No tool use or results, and not waiting for tool use, ending loop');
return messages;
}

if (toolResultContent.length > 0) {
messages.push({
role: 'user',
content: toolResultContent,
});
if (toolResultContent.length > 0) {
messages.push({
role: 'user',
content: toolResultContent,
});
}
}
}
}
Expand Down
33 changes: 23 additions & 10 deletions tools/collection.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { ComputerTool20241022, ComputerTool20250124 } from './computer';
import type { PlaywrightActionParams } from './playwright';
import { Action } from './types/computer';
import type { ActionParams, ToolResult } from './types/computer';
import type { ComputerActionParams } from './types/computer';
import type { ComputerUseTool, ComputerUseToolDef, ToolResult } from './types/base';

export type ToolVersion = 'computer_use_20250124' | 'computer_use_20241022' | 'computer_use_20250429';

Expand Down Expand Up @@ -35,27 +37,38 @@ export const TOOL_GROUPS_BY_VERSION: Record<ToolVersion, ToolGroup> = Object.fro
TOOL_GROUPS.map(group => [group.version, group])
) as Record<ToolVersion, ToolGroup>;

export class ToolCollection {
private tools: Map<string, ComputerTool20241022 | ComputerTool20250124>;
export class ComputerUseToolCollection {
private tools: Map<string, ComputerUseTool>;

constructor(...tools: (ComputerTool20241022 | ComputerTool20250124)[]) {
constructor(...tools: ComputerUseTool[]) {
this.tools = new Map(tools.map(tool => [tool.name, tool]));
}

toParams(): ActionParams[] {
toParams(): ComputerUseToolDef[] {
return Array.from(this.tools.values()).map(tool => tool.toParams());
}

async run(name: string, toolInput: { action: Action } & Record<string, ActionParams>): Promise<ToolResult> {
async run(name: string, toolInput: Record<string, unknown>): Promise<ToolResult> {
const tool = this.tools.get(name);
if (!tool) {
throw new Error(`Tool ${name} not found`);
}

if (!Object.values(Action).includes(toolInput.action)) {
throw new Error(`Invalid action ${toolInput.action} for tool ${name}`);
// Handle different tool types based on their expected input structure
if (name === 'playwright') {
// Validate playwright tool input
const playwrightInput = toolInput as PlaywrightActionParams;
if (!playwrightInput.method || !Array.isArray(playwrightInput.args)) {
throw new Error(`Invalid input for playwright tool: method and args are required`);
}
return await tool.call(toolInput);
} else {
// Validate computer tool input
const computerInput = toolInput as ComputerActionParams;
if (!computerInput.action || !Object.values(Action).includes(computerInput.action)) {
throw new Error(`Invalid action ${computerInput.action} for tool ${name}`);
}
return await tool.call(toolInput);
}

return await tool.call(toolInput);
}
}
13 changes: 7 additions & 6 deletions tools/computer.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import type { Page } from 'playwright';
import { Action, ToolError } from './types/computer';
import type { ActionParams, BaseAnthropicTool, ToolResult } from './types/computer';
import { Action } from './types/computer';
import type { BaseComputerTool, ComputerActionParams } from './types/computer';
import { KeyboardUtils } from './utils/keyboard';
import { ActionValidator } from './utils/validator';
import { ToolError, type ComputerToolDef, type ToolResult } from './types/base';

const TYPING_DELAY_MS = 12;

export class ComputerTool implements BaseAnthropicTool {
export class ComputerTool implements BaseComputerTool {
name: 'computer' = 'computer';
protected page: Page;
protected _screenshotDelay = 2.0;
Expand Down Expand Up @@ -46,7 +47,7 @@ export class ComputerTool implements BaseAnthropicTool {
return this.version === '20241022' ? 'computer_20241022' : 'computer_20250124';
}

toParams(): ActionParams {
toParams(): ComputerToolDef{
const params = {
name: this.name,
type: this.apiType,
Expand Down Expand Up @@ -136,7 +137,7 @@ export class ComputerTool implements BaseAnthropicTool {
}
}

async call(params: ActionParams): Promise<ToolResult> {
async call(params: ComputerActionParams): Promise<ToolResult> {
const {
action,
text,
Expand Down Expand Up @@ -174,7 +175,7 @@ export class ComputerTool implements BaseAnthropicTool {
throw new ToolError(`${action} is only available in version 20250124`);
}

const scrollDirection = scrollDirectionParam || kwargs.scroll_direction;
const scrollDirection = scrollDirectionParam || kwargs.scroll_direction as string;
const scrollAmountValue = scrollAmount || scroll_amount;

if (!scrollDirection || !['up', 'down', 'left', 'right'].includes(scrollDirection)) {
Expand Down
115 changes: 115 additions & 0 deletions tools/playwright.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import type { Page } from 'playwright';
import { ToolError, type ToolResult, type ComputerUseTool, type FunctionToolDef, type ActionParams } from './types/base';

// Supported Playwright methods - initially only goto
const SUPPORTED_METHODS = ['goto'] as const;
type SupportedMethod = typeof SUPPORTED_METHODS[number];

export type PlaywrightActionParams = ActionParams & {
method: string;
args: string[];
}

export class PlaywrightTool implements ComputerUseTool {
name: 'playwright' = 'playwright';
protected page: Page;

constructor(page: Page) {
this.page = page;
}

toParams(): FunctionToolDef {
return {
name: this.name,
type: 'custom',
input_schema: {
type: 'object',
properties: {
method: {
type: 'string',
description: 'The playwright function to call.',
enum: SUPPORTED_METHODS,
},
args: {
type: 'array',
description: 'The required arguments',
items: {
type: 'string',
description: 'The argument to pass to the function',
},
},
},
required: ['method', 'args'],
},
};
}

private validateMethod(method: string): method is SupportedMethod {
return SUPPORTED_METHODS.includes(method as SupportedMethod);
}

private async executeGoto(args: string[]): Promise<ToolResult> {
if (args.length !== 1) {
throw new ToolError('goto method requires exactly one argument: the URL');
}

const url = args[0];
if (!url || typeof url !== 'string') {
throw new ToolError('URL must be a non-empty string');
}

// Normalize URL - handles both full URLs and bare hostnames
let normalizedURL: string;
try {
const urlObj = new URL(url);
normalizedURL = urlObj.href;
} catch {
try {
const urlObj = new URL(`https://${url}`);
normalizedURL = urlObj.href;
} catch {
throw new ToolError(`Invalid URL format: ${url}`);
}
}

try {
await this.page.goto(normalizedURL, {
waitUntil: 'networkidle',
timeout: 30000
});

// Wait a bit for the page to fully load
await this.page.waitForTimeout(1000);

const currentURL = this.page.url();
const title = await this.page.title();

return {
output: `Successfully navigated to ${currentURL}. Page title: "${title}"`,
};
} catch (error) {
throw new ToolError(`Failed to navigate to ${normalizedURL}: ${error}`);
}
}

async call(params: PlaywrightActionParams): Promise<ToolResult> {
const { method, args } = params as PlaywrightActionParams;

if (!this.validateMethod(method)) {
throw new ToolError(
`Unsupported method: ${method}. Supported methods: ${SUPPORTED_METHODS.join(', ')}`
);
}

if (!Array.isArray(args)) {
throw new ToolError('args must be an array');
}

switch (method) {
case 'goto':
return await this.executeGoto(args);
default:
throw new ToolError(`Method ${method} is not implemented`);
}
}
}
50 changes: 50 additions & 0 deletions tools/types/base.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
export type ActionParams = Record<string, unknown>;

export interface ToolResult {
output?: string;
error?: string;
base64Image?: string;
system?: string;
}

export class ToolError extends Error {
constructor(message: string) {
super(message);
this.name = 'ToolError';
}
}

// Standard function tool definition for custom tools like Playwright
export interface FunctionToolDef {
name: string;
type: 'custom';
input_schema: {
type: 'object';
properties: Record<string, {
type: string;
description: string;
enum?: readonly string[];
items?: { type: string; description: string };
}>;
required?: string[];
};
}

// Computer tool definition (matches Anthropic's built-in computer tool format)
export interface ComputerToolDef {
name: string;
type: 'computer_20241022' | 'computer_20250124';
display_width_px: number;
display_height_px: number;
display_number: null;
}

// Union type for all possible tool definitions
export type ComputerUseToolDef = ComputerToolDef | FunctionToolDef;

// Simple base interface for all tools
export interface ComputerUseTool {
name: string;
toParams(): ComputerUseToolDef;
call(params: Record<string, unknown>): Promise<ToolResult>;
}
Loading