From 47d6a0f70762b3ee786f46180ba1431e5f73d755 Mon Sep 17 00:00:00 2001 From: bigboateng Date: Fri, 20 Jun 2025 03:08:06 +0100 Subject: [PATCH] feat: update computer tool implementation and add base types - Update computer tool with latest functionality - Add new base types for better type safety - Add Playwright tool integration - Update validator and collection utilities - Enhance loop functionality --- loop.ts | 77 +++++++++++++------------- tools/collection.ts | 33 +++++++---- tools/computer.ts | 13 +++-- tools/playwright.ts | 115 +++++++++++++++++++++++++++++++++++++++ tools/types/base.ts | 50 +++++++++++++++++ tools/types/computer.ts | 26 +++------ tools/utils/validator.ts | 5 +- types/base.ts | 50 +++++++++++++++++ 8 files changed, 295 insertions(+), 74 deletions(-) create mode 100644 tools/playwright.ts create mode 100644 tools/types/base.ts create mode 100644 types/base.ts diff --git a/loop.ts b/loop.ts index 2cb5fe5..a079501 100644 --- a/loop.ts +++ b/loop.ts @@ -2,19 +2,18 @@ import { Anthropic } from '@anthropic-ai/sdk'; import { DateTime } from 'luxon'; import type { Page } from 'playwright'; import type { BetaMessageParam, BetaTextBlock } from './types/beta'; -import { ToolCollection, DEFAULT_TOOL_VERSION, TOOL_GROUPS_BY_VERSION, type ToolVersion } from './tools/collection'; +import { ComputerUseToolCollection, DEFAULT_TOOL_VERSION, TOOL_GROUPS_BY_VERSION, type ToolVersion } from './tools/collection'; import { responseToParams, maybeFilterToNMostRecentImages, injectPromptCaching, PROMPT_CACHING_BETA_FLAG } from './utils/message-processing'; import { makeApiToolResult } from './utils/tool-results'; import { ComputerTool20241022, ComputerTool20250124 } from './tools/computer'; -import type { ActionParams } from './tools/types/computer'; import { Action } from './tools/types/computer'; +import { PlaywrightTool } from './tools/playwright'; // System prompt optimized for the environment const SYSTEM_PROMPT = ` * You are utilising an Ubuntu virtual machine using ${process.arch} architecture with internet access. * When you connect to the display, CHROMIUM IS ALREADY OPEN. The url bar is not visible but it is there. -* If you need to navigate to a new page, use ctrl+l to focus the url bar and then enter the url. -* You won't be able to see the url bar from the screenshot but ctrl-l still works. +* If you need to navigate to a new page, use the "goto" tool provided. * When viewing a page it can be helpful to zoom out so that you can see everything on the page. * Either that, or make sure you scroll down to see everything before deciding something isn't available. * When using your computer function calls, they take a while to run and send back to you. @@ -27,7 +26,10 @@ const SYSTEM_PROMPT = ` * When using Chromium, if a startup wizard appears, IGNORE IT. Do not even click "skip this step". -* Instead, click on the search bar on the center of the screen where it says "Search or enter address", and enter the appropriate search term or URL there. +* For ALL navigation needs, use the "goto" method - never use keyboard shortcuts or URL bar interactions. +* The goto method is the reliable way to navigate to any website or URL. +* If no specific URL is provided to achieve a goal or part of a goal, use Google (https://www.google.com) as your entry point to search for and navigate to relevant websites. +* For ambiguous requests, use Google to find the most relevant site. `; // Add new type definitions @@ -41,7 +43,9 @@ interface ExtraBodyConfig { } interface ToolUseInput extends Record { - action: Action; + action?: Action; + method?: string; + args?: string[]; } export async function samplingLoop({ @@ -69,7 +73,14 @@ export async function samplingLoop({ }): Promise { const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION; const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion]; - const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(playwrightPage))); + + // Create computer tools + const computerTools = toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(playwrightPage)); + + // Create playwright tool + const playwrightTool = new PlaywrightTool(playwrightPage); + + const toolCollection = new ComputerUseToolCollection(...computerTools, playwrightTool); const system: BetaTextBlock = { type: 'text', @@ -153,41 +164,33 @@ export async function samplingLoop({ for (const contentBlock of responseParams) { if (contentBlock.type === 'tool_use' && contentBlock.name && contentBlock.input && typeof contentBlock.input === 'object') { const input = contentBlock.input as ToolUseInput; - if ('action' in input && typeof input.action === 'string') { - hasToolUse = true; - const toolInput: ActionParams = { - action: input.action as Action, - ...Object.fromEntries( - Object.entries(input).filter(([key]) => key !== 'action') - ) - }; - - try { - const result = await toolCollection.run( - contentBlock.name, - toolInput - ); + hasToolUse = true; - const toolResult = makeApiToolResult(result, contentBlock.id!); - toolResultContent.push(toolResult); - } catch (error) { - console.error(error); - throw error; - } + try { + const result = await toolCollection.run( + contentBlock.name, + input + ); + + const toolResult = makeApiToolResult(result, contentBlock.id!); + toolResultContent.push(toolResult); + } catch (error) { + console.error(error); + throw error; } } - } - if (toolResultContent.length === 0 && !hasToolUse && response.stop_reason !== 'tool_use') { - console.log('No tool use or results, and not waiting for tool use, ending loop'); - return messages; - } + if (toolResultContent.length === 0 && !hasToolUse && response.stop_reason !== 'tool_use') { + console.log('No tool use or results, and not waiting for tool use, ending loop'); + return messages; + } - if (toolResultContent.length > 0) { - messages.push({ - role: 'user', - content: toolResultContent, - }); + if (toolResultContent.length > 0) { + messages.push({ + role: 'user', + content: toolResultContent, + }); + } } } } diff --git a/tools/collection.ts b/tools/collection.ts index 45f3afe..56bf734 100644 --- a/tools/collection.ts +++ b/tools/collection.ts @@ -1,6 +1,8 @@ import { ComputerTool20241022, ComputerTool20250124 } from './computer'; +import type { PlaywrightActionParams } from './playwright'; import { Action } from './types/computer'; -import type { ActionParams, ToolResult } from './types/computer'; +import type { ComputerActionParams } from './types/computer'; +import type { ComputerUseTool, ComputerUseToolDef, ToolResult } from './types/base'; export type ToolVersion = 'computer_use_20250124' | 'computer_use_20241022' | 'computer_use_20250429'; @@ -35,27 +37,38 @@ export const TOOL_GROUPS_BY_VERSION: Record = Object.fro TOOL_GROUPS.map(group => [group.version, group]) ) as Record; -export class ToolCollection { - private tools: Map; +export class ComputerUseToolCollection { + private tools: Map; - constructor(...tools: (ComputerTool20241022 | ComputerTool20250124)[]) { + constructor(...tools: ComputerUseTool[]) { this.tools = new Map(tools.map(tool => [tool.name, tool])); } - toParams(): ActionParams[] { + toParams(): ComputerUseToolDef[] { return Array.from(this.tools.values()).map(tool => tool.toParams()); } - async run(name: string, toolInput: { action: Action } & Record): Promise { + async run(name: string, toolInput: Record): Promise { const tool = this.tools.get(name); if (!tool) { throw new Error(`Tool ${name} not found`); } - if (!Object.values(Action).includes(toolInput.action)) { - throw new Error(`Invalid action ${toolInput.action} for tool ${name}`); + // Handle different tool types based on their expected input structure + if (name === 'playwright') { + // Validate playwright tool input + const playwrightInput = toolInput as PlaywrightActionParams; + if (!playwrightInput.method || !Array.isArray(playwrightInput.args)) { + throw new Error(`Invalid input for playwright tool: method and args are required`); + } + return await tool.call(toolInput); + } else { + // Validate computer tool input + const computerInput = toolInput as ComputerActionParams; + if (!computerInput.action || !Object.values(Action).includes(computerInput.action)) { + throw new Error(`Invalid action ${computerInput.action} for tool ${name}`); + } + return await tool.call(toolInput); } - - return await tool.call(toolInput); } } \ No newline at end of file diff --git a/tools/computer.ts b/tools/computer.ts index 61197e7..4402227 100644 --- a/tools/computer.ts +++ b/tools/computer.ts @@ -1,12 +1,13 @@ import type { Page } from 'playwright'; -import { Action, ToolError } from './types/computer'; -import type { ActionParams, BaseAnthropicTool, ToolResult } from './types/computer'; +import { Action } from './types/computer'; +import type { BaseComputerTool, ComputerActionParams } from './types/computer'; import { KeyboardUtils } from './utils/keyboard'; import { ActionValidator } from './utils/validator'; +import { ToolError, type ComputerToolDef, type ToolResult } from './types/base'; const TYPING_DELAY_MS = 12; -export class ComputerTool implements BaseAnthropicTool { +export class ComputerTool implements BaseComputerTool { name: 'computer' = 'computer'; protected page: Page; protected _screenshotDelay = 2.0; @@ -46,7 +47,7 @@ export class ComputerTool implements BaseAnthropicTool { return this.version === '20241022' ? 'computer_20241022' : 'computer_20250124'; } - toParams(): ActionParams { + toParams(): ComputerToolDef{ const params = { name: this.name, type: this.apiType, @@ -136,7 +137,7 @@ export class ComputerTool implements BaseAnthropicTool { } } - async call(params: ActionParams): Promise { + async call(params: ComputerActionParams): Promise { const { action, text, @@ -174,7 +175,7 @@ export class ComputerTool implements BaseAnthropicTool { throw new ToolError(`${action} is only available in version 20250124`); } - const scrollDirection = scrollDirectionParam || kwargs.scroll_direction; + const scrollDirection = scrollDirectionParam || kwargs.scroll_direction as string; const scrollAmountValue = scrollAmount || scroll_amount; if (!scrollDirection || !['up', 'down', 'left', 'right'].includes(scrollDirection)) { diff --git a/tools/playwright.ts b/tools/playwright.ts new file mode 100644 index 0000000..8adad25 --- /dev/null +++ b/tools/playwright.ts @@ -0,0 +1,115 @@ +import type { Page } from 'playwright'; +import { ToolError, type ToolResult, type ComputerUseTool, type FunctionToolDef, type ActionParams } from './types/base'; + +// Supported Playwright methods - initially only goto +const SUPPORTED_METHODS = ['goto'] as const; +type SupportedMethod = typeof SUPPORTED_METHODS[number]; + +export type PlaywrightActionParams = ActionParams & { + method: string; + args: string[]; +} + +export class PlaywrightTool implements ComputerUseTool { + name: 'playwright' = 'playwright'; + protected page: Page; + + constructor(page: Page) { + this.page = page; + } + + toParams(): FunctionToolDef { + return { + name: this.name, + type: 'custom', + input_schema: { + type: 'object', + properties: { + method: { + type: 'string', + description: 'The playwright function to call.', + enum: SUPPORTED_METHODS, + }, + args: { + type: 'array', + description: 'The required arguments', + items: { + type: 'string', + description: 'The argument to pass to the function', + }, + }, + }, + required: ['method', 'args'], + }, + }; + } + + private validateMethod(method: string): method is SupportedMethod { + return SUPPORTED_METHODS.includes(method as SupportedMethod); + } + + private async executeGoto(args: string[]): Promise { + if (args.length !== 1) { + throw new ToolError('goto method requires exactly one argument: the URL'); + } + + const url = args[0]; + if (!url || typeof url !== 'string') { + throw new ToolError('URL must be a non-empty string'); + } + + // Normalize URL - handles both full URLs and bare hostnames + let normalizedURL: string; + try { + const urlObj = new URL(url); + normalizedURL = urlObj.href; + } catch { + try { + const urlObj = new URL(`https://${url}`); + normalizedURL = urlObj.href; + } catch { + throw new ToolError(`Invalid URL format: ${url}`); + } + } + + try { + await this.page.goto(normalizedURL, { + waitUntil: 'networkidle', + timeout: 30000 + }); + + // Wait a bit for the page to fully load + await this.page.waitForTimeout(1000); + + const currentURL = this.page.url(); + const title = await this.page.title(); + + return { + output: `Successfully navigated to ${currentURL}. Page title: "${title}"`, + }; + } catch (error) { + throw new ToolError(`Failed to navigate to ${normalizedURL}: ${error}`); + } + } + + async call(params: PlaywrightActionParams): Promise { + const { method, args } = params as PlaywrightActionParams; + + if (!this.validateMethod(method)) { + throw new ToolError( + `Unsupported method: ${method}. Supported methods: ${SUPPORTED_METHODS.join(', ')}` + ); + } + + if (!Array.isArray(args)) { + throw new ToolError('args must be an array'); + } + + switch (method) { + case 'goto': + return await this.executeGoto(args); + default: + throw new ToolError(`Method ${method} is not implemented`); + } + } +} \ No newline at end of file diff --git a/tools/types/base.ts b/tools/types/base.ts new file mode 100644 index 0000000..fdef4b9 --- /dev/null +++ b/tools/types/base.ts @@ -0,0 +1,50 @@ +export type ActionParams = Record; + +export interface ToolResult { + output?: string; + error?: string; + base64Image?: string; + system?: string; +} + +export class ToolError extends Error { + constructor(message: string) { + super(message); + this.name = 'ToolError'; + } +} + +// Standard function tool definition for custom tools like Playwright +export interface FunctionToolDef { + name: string; + type: 'custom'; + input_schema: { + type: 'object'; + properties: Record; + required?: string[]; + }; +} + +// Computer tool definition (matches Anthropic's built-in computer tool format) +export interface ComputerToolDef { + name: string; + type: 'computer_20241022' | 'computer_20250124'; + display_width_px: number; + display_height_px: number; + display_number: null; +} + +// Union type for all possible tool definitions +export type ComputerUseToolDef = ComputerToolDef | FunctionToolDef; + +// Simple base interface for all tools +export interface ComputerUseTool { + name: string; + toParams(): ComputerUseToolDef; + call(params: Record): Promise; +} \ No newline at end of file diff --git a/tools/types/computer.ts b/tools/types/computer.ts index f4b061f..cf42f02 100644 --- a/tools/types/computer.ts +++ b/tools/types/computer.ts @@ -1,3 +1,5 @@ +import type { ActionParams, ComputerToolDef, ComputerUseTool, ToolResult } from "./base"; + export enum Action { // Mouse actions MOUSE_MOVE = 'mouse_move', @@ -31,8 +33,7 @@ export type ScrollDirection = 'up' | 'down' | 'left' | 'right'; export type Coordinate = [number, number]; export type Duration = number; -export interface ActionParams { - action: Action; +export type ComputerActionParams = ActionParams & { action: Action; text?: string; coordinate?: Coordinate; scrollDirection?: ScrollDirection; @@ -43,22 +44,9 @@ export interface ActionParams { [key: string]: Action | string | Coordinate | ScrollDirection | number | Duration | undefined; } -export interface ToolResult { - output?: string; - error?: string; - base64Image?: string; - system?: string; -} - -export interface BaseAnthropicTool { +export interface BaseComputerTool extends ComputerUseTool { name: string; apiType: string; - toParams(): ActionParams; -} - -export class ToolError extends Error { - constructor(message: string) { - super(message); - this.name = 'ToolError'; - } -} \ No newline at end of file + toParams(): ComputerToolDef; + call(params: ComputerActionParams): Promise; +} \ No newline at end of file diff --git a/tools/utils/validator.ts b/tools/utils/validator.ts index b8522c8..c00cc9f 100644 --- a/tools/utils/validator.ts +++ b/tools/utils/validator.ts @@ -1,5 +1,6 @@ -import { Action, ToolError } from '../types/computer'; -import type { ActionParams, Coordinate, Duration } from '../types/computer'; +import { ToolError, type ActionParams } from '../types/base'; +import { Action } from '../types/computer'; +import type { Coordinate, Duration } from '../types/computer'; export class ActionValidator { static validateText(text: string | undefined, required: boolean, action: string): void { diff --git a/types/base.ts b/types/base.ts new file mode 100644 index 0000000..fdef4b9 --- /dev/null +++ b/types/base.ts @@ -0,0 +1,50 @@ +export type ActionParams = Record; + +export interface ToolResult { + output?: string; + error?: string; + base64Image?: string; + system?: string; +} + +export class ToolError extends Error { + constructor(message: string) { + super(message); + this.name = 'ToolError'; + } +} + +// Standard function tool definition for custom tools like Playwright +export interface FunctionToolDef { + name: string; + type: 'custom'; + input_schema: { + type: 'object'; + properties: Record; + required?: string[]; + }; +} + +// Computer tool definition (matches Anthropic's built-in computer tool format) +export interface ComputerToolDef { + name: string; + type: 'computer_20241022' | 'computer_20250124'; + display_width_px: number; + display_height_px: number; + display_number: null; +} + +// Union type for all possible tool definitions +export type ComputerUseToolDef = ComputerToolDef | FunctionToolDef; + +// Simple base interface for all tools +export interface ComputerUseTool { + name: string; + toParams(): ComputerUseToolDef; + call(params: Record): Promise; +} \ No newline at end of file