diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..b6da653 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,166 @@ +name: Release Package + +on: + workflow_dispatch: + inputs: + version_type: + description: 'Version bump type' + required: true + default: 'patch' + type: choice + options: + - patch + - minor + - major + prerelease: + description: 'Create as prerelease' + required: false + default: false + type: boolean + +jobs: + release: + runs-on: ubuntu-latest + permissions: + contents: write + packages: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Bun + uses: oven-sh/setup-bun@v1 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Run linter + run: bun run lint + + - name: Build package + run: bun run build + + - name: Verify build output + run: | + echo "Checking dist folder contents:" + ls -la dist/ || echo "dist folder not found" + echo "Checking if main entry point exists:" + ls -la dist/index.js || echo "dist/index.js not found" + + - name: Configure Git + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + + - name: Get current version + id: current_version + run: | + CURRENT_VERSION=$(node -p "require('./package.json').version") + echo "current=$CURRENT_VERSION" >> $GITHUB_OUTPUT + + - name: Bump version + id: version + run: | + # Install semver for version bumping + npm install -g semver + + CURRENT_VERSION="${{ steps.current_version.outputs.current }}" + NEW_VERSION=$(semver -i ${{ github.event.inputs.version_type }} $CURRENT_VERSION) + + echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT + echo "tag=v$NEW_VERSION" >> $GITHUB_OUTPUT + + # Update package.json + npm version $NEW_VERSION --no-git-tag-version + + - name: Update package.json and commit + run: | + git add package.json + git add -f dist/ + git commit -m "chore: bump version to ${{ steps.version.outputs.new_version }} with built files" + git push origin main + + - name: Create Git tag + run: | + git tag ${{ steps.version.outputs.tag }} + git push origin ${{ steps.version.outputs.tag }} + + - name: Generate changelog + id: changelog + run: | + # Get commits since last tag + LAST_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "") + + if [ -z "$LAST_TAG" ]; then + COMMITS=$(git log --pretty=format:"- %s (%h)" --no-merges) + else + COMMITS=$(git log ${LAST_TAG}..HEAD --pretty=format:"- %s (%h)" --no-merges) + fi + + # Create changelog + CHANGELOG="## What's Changed"$'\n\n'"$COMMITS" + + # Handle multiline output for GitHub Actions + { + echo 'changelog<> $GITHUB_OUTPUT + + - name: Create distribution archive + run: | + if [ -d "dist" ]; then + echo "Creating distribution archive from dist folder" + cd dist + zip -r ../dist.zip . + cd .. + else + echo "Warning: dist folder not found, creating empty archive" + touch empty.txt + zip dist.zip empty.txt + rm empty.txt + fi + + - name: Create GitHub Release + id: create_release + uses: softprops/action-gh-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.version.outputs.tag }} + name: Release ${{ steps.version.outputs.tag }} + body: | + # Release ${{ steps.version.outputs.tag }} + + ${{ steps.changelog.outputs.changelog }} + + ## Installation + + ### NPM/Yarn/Bun + ```bash + npm install @onkernel/cu-playwright-ts@${{ steps.version.outputs.new_version }} + # or + yarn add @onkernel/cu-playwright-ts@${{ steps.version.outputs.new_version }} + # or + bun add @onkernel/cu-playwright-ts@${{ steps.version.outputs.new_version }} + ``` + + ### GitHub Dependency + ```json + { + "dependencies": { + "@onkernel/cu-playwright-ts": "github:bigboateng/cu-playwright-ts#${{ steps.version.outputs.tag }}" + } + } + ``` + draft: false + prerelease: ${{ github.event.inputs.prerelease }} + files: | + dist.zip + package.json \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9325515..bd4b8cc 100644 --- a/.gitignore +++ b/.gitignore @@ -36,4 +36,7 @@ coverage/ # Misc .cache/ .temp/ -.tmp/ \ No newline at end of file +.tmp/ + +# Examples with sensitive data +example-beam-benefits.ts \ No newline at end of file diff --git a/README.md b/README.md index ecc719c..7620597 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,67 @@ # Computer Use Playwright SDK +> **Fork Notice**: This is a fork of [@onkernel/cu-playwright-ts](https://github.com/onkernel/cu-playwright-ts) with additional custom features. + A TypeScript SDK that combines Anthropic's Computer Use capabilities with Playwright for browser automation tasks. This SDK provides a clean, type-safe interface for automating browser interactions using Claude's computer use abilities. +## 🆕 Additional Features in This Fork + +This fork extends the original SDK with powerful new capabilities: + +### 🔗 URL Extraction Tool + +This fork includes a powerful custom URL extraction tool that intelligently finds and extracts URLs from elements on the page using visible text. This feature is **unique to this fork** and not available in the original SDK. + +#### How It Works + +The agent automatically uses the URL extraction tool when you ask for URLs by visible text: + +```typescript +// Simple URL extraction - just ask naturally! +const url = await agent.execute( + 'Extract the URL from the "Learn More" link' +); + +// Extract from article titles +const articleUrl = await agent.execute( + 'Get the URL from the article titled "Introduction to AI"' +); + +// Extract multiple URLs with structured output +const urls = await agent.execute( + 'Extract URLs from the top 3 navigation links', + z.array(z.object({ + linkText: z.string(), + url: z.string(), + })) +); +``` + +#### Advanced Capabilities + +**Smart Search Strategies** (prioritized in order): +1. **Exact text matching** - Finds elements containing the exact visible text +2. **Partial text matching** - Matches text within larger content blocks +3. **Anchor tag detection** - Locates `` tags containing the text +4. **CSS selector fallback** - Direct element selection if text is a valid selector +5. **Clickable element search** - Finds interactive elements with the text +6. **URL pattern extraction** - Detects URLs directly within text content + +**Technical Features**: +- **Computer Use optimized** - Works seamlessly with Claude's visual perception +- **Multiple HTML structures** - Handles complex nested elements and dynamic content +- **Automatic URL normalization** - Converts relative to absolute URLs +- **Smart error handling** - Provides helpful feedback when elements aren't found +- **Logging and debugging** - Built-in console logging for troubleshooting + +**Best Practices**: +- Use the exact visible text you can see on the page +- For buttons or links, use their label text (e.g., "Download", "Read More", "View Details") +- For articles or stories, use their title text +- The tool will automatically handle finding the associated URL + +--- + ## Features - 🤖 **Simple API**: Single `ComputerUseAgent` class for all computer use tasks @@ -166,6 +226,8 @@ const result = await agent.execute( ); ``` + + ## Environment Setup 1. **Anthropic API Key**: Set your API key as an environment variable: diff --git a/examples/example-url-extraction.ts b/examples/example-url-extraction.ts new file mode 100644 index 0000000..3725670 --- /dev/null +++ b/examples/example-url-extraction.ts @@ -0,0 +1,52 @@ +import { chromium } from 'playwright'; +import { ComputerUseAgent } from './index'; +import { z } from 'zod'; +async function urlExtractionExample(): Promise { + const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; + if (!ANTHROPIC_API_KEY) { + throw new Error('ANTHROPIC_API_KEY environment variable is required'); + } + + const browser = await chromium.launch({ headless: false }); + const page = await browser.newPage(); + + try { + console.log('=== URL Extraction Examples ===\n'); + + // Example 1: Extract URL from a specific story on Hacker News + await page.goto("https://news.ycombinator.com/"); + + const agent = new ComputerUseAgent({ + apiKey: ANTHROPIC_API_KEY, + page, + }); + + console.log('1. Extracting URL from the top story...'); + const result = await agent.execute( + 'Extract the URL from the top story on this page' + , + z.object({ + url: z.string(), + }) + ); + console.log('Top story URL:', JSON.stringify(result, null, 2)); + + + } catch (error) { + console.error('Error in URL extraction example:', error); + } finally { + await browser.close(); + } +} + +// Run examples +async function runExamples(): Promise { + console.log('Running URL Extraction Examples...\n'); + + await urlExtractionExample(); + // await structuredUrlExtractionExample(); + + console.log('\nAll examples completed!'); +} + +runExamples().catch(console.error); \ No newline at end of file diff --git a/example.ts b/examples/example.ts similarity index 100% rename from example.ts rename to examples/example.ts diff --git a/loop.ts b/loop.ts index 2cb5fe5..d3761d4 100644 --- a/loop.ts +++ b/loop.ts @@ -6,7 +6,7 @@ import { ToolCollection, DEFAULT_TOOL_VERSION, TOOL_GROUPS_BY_VERSION, type Tool import { responseToParams, maybeFilterToNMostRecentImages, injectPromptCaching, PROMPT_CACHING_BETA_FLAG } from './utils/message-processing'; import { makeApiToolResult } from './utils/tool-results'; import { ComputerTool20241022, ComputerTool20250124 } from './tools/computer'; -import type { ActionParams } from './tools/types/computer'; +import { PlaywrightTool } from './tools/playwright'; import { Action } from './tools/types/computer'; // System prompt optimized for the environment @@ -19,10 +19,31 @@ const SYSTEM_PROMPT = ` * Either that, or make sure you scroll down to see everything before deciding something isn't available. * When using your computer function calls, they take a while to run and send back to you. * Where possible/feasible, try to chain multiple of these calls all into one function calls request. -* The current date is ${DateTime.now().toFormat('EEEE, MMMM d, yyyy')}. -* After each step, take a screenshot and carefully evaluate if you have achieved the right outcome. -* Explicitly show your thinking: "I have evaluated step X..." If not correct, try again. -* Only when you confirm a step was executed correctly should you move on to the next one. +* The current date is ${DateTime.now().toFormat('EEEE, MMMM d, yyyy')} + +PLAYWRIGHT TOOL: +* You have access to a 'playwright' tool that provides browser automation capabilities +* Currently supports the 'extract_url' method for extracting URLs from page elements + +HOW TO USE EXTRACT_URL: +1. First, take a screenshot to see what's on the page +2. Identify the visible text of the link/button you want to extract the URL from +3. Call the playwright tool with this exact format: + { + "name": "playwright", + "input": { + "method": "extract_url", + "args": ["exact visible text here"] + } + } +4. The tool will find the element containing that text and extract its URL + +EXAMPLES: +- To get URL from a link that says "Read more": args: ["Read more"] +- To get URL from a story titled "New AI breakthrough": args: ["New AI breakthrough"] +- To get URL from a button labeled "Download PDF": args: ["Download PDF"] + +IMPORTANT: Always use the EXACT text you can see on the page as the argument @@ -41,7 +62,9 @@ interface ExtraBodyConfig { } interface ToolUseInput extends Record { - action: Action; + action?: Action; + method?: string; + args?: string[]; } export async function samplingLoop({ @@ -69,8 +92,16 @@ export async function samplingLoop({ }): Promise { const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION; const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion]; - const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(playwrightPage))); + // Create computer tools + const computerTools = toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(playwrightPage)); + + // Create playwright tool + const playwrightTool = new PlaywrightTool(playwrightPage); + + // Combine all tools + const toolCollection = new ToolCollection(...computerTools, playwrightTool); + const system: BetaTextBlock = { type: 'text', text: `${SYSTEM_PROMPT}${systemPromptSuffix ? ' ' + systemPromptSuffix : ''}`, @@ -124,6 +155,9 @@ export async function samplingLoop({ const loggableContent = responseParams.map(block => { if (block.type === 'tool_use') { + // Deep log the full input including arrays + console.log(`\n=== TOOL USE: ${block.name} ===`); + console.log('Full input:', JSON.stringify(block.input, null, 2)); return { type: 'tool_use', name: block.name, @@ -153,27 +187,19 @@ export async function samplingLoop({ for (const contentBlock of responseParams) { if (contentBlock.type === 'tool_use' && contentBlock.name && contentBlock.input && typeof contentBlock.input === 'object') { const input = contentBlock.input as ToolUseInput; - if ('action' in input && typeof input.action === 'string') { - hasToolUse = true; - const toolInput: ActionParams = { - action: input.action as Action, - ...Object.fromEntries( - Object.entries(input).filter(([key]) => key !== 'action') - ) - }; - - try { - const result = await toolCollection.run( - contentBlock.name, - toolInput - ); + hasToolUse = true; + + try { + const result = await toolCollection.run( + contentBlock.name, + input + ); - const toolResult = makeApiToolResult(result, contentBlock.id!); - toolResultContent.push(toolResult); - } catch (error) { + const toolResult = makeApiToolResult(result, contentBlock.id!); + toolResultContent.push(toolResult); + } catch (error) { console.error(error); throw error; - } } } } diff --git a/package.json b/package.json index 7bd604e..66cf9d6 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,7 @@ "license": "MIT", "repository": { "type": "git", - "url": "" + "url": "https://github.com/bigboateng/cu-playwright-ts.git" }, "peerDependencies": { "typescript": "^5", diff --git a/tools/collection.ts b/tools/collection.ts index 45f3afe..d352424 100644 --- a/tools/collection.ts +++ b/tools/collection.ts @@ -1,6 +1,8 @@ import { ComputerTool20241022, ComputerTool20250124 } from './computer'; +import type { PlaywrightActionParams } from './playwright'; import { Action } from './types/computer'; -import type { ActionParams, ToolResult } from './types/computer'; +import type { ActionParams } from './types/computer'; +import type { ComputerUseTool, ComputerUseToolDef, ToolResult } from './types/base'; export type ToolVersion = 'computer_use_20250124' | 'computer_use_20241022' | 'computer_use_20250429'; @@ -36,26 +38,40 @@ export const TOOL_GROUPS_BY_VERSION: Record = Object.fro ) as Record; export class ToolCollection { - private tools: Map; + private tools: Map; - constructor(...tools: (ComputerTool20241022 | ComputerTool20250124)[]) { + constructor(...tools: ComputerUseTool[]) { this.tools = new Map(tools.map(tool => [tool.name, tool])); } - toParams(): ActionParams[] { + toParams(): ComputerUseToolDef[] { return Array.from(this.tools.values()).map(tool => tool.toParams()); } - async run(name: string, toolInput: { action: Action } & Record): Promise { + async run(name: string, toolInput: Record): Promise { const tool = this.tools.get(name); if (!tool) { throw new Error(`Tool ${name} not found`); } - if (!Object.values(Action).includes(toolInput.action)) { - throw new Error(`Invalid action ${toolInput.action} for tool ${name}`); - } + console.log(`\n=== Running tool: ${name} ===`); + console.log('Input:', JSON.stringify(toolInput, null, 2)); - return await tool.call(toolInput); + // Handle different tool types based on their expected input structure + if (name === 'playwright') { + // Validate playwright tool input + const playwrightInput = toolInput as PlaywrightActionParams; + if (!playwrightInput.method || !Array.isArray(playwrightInput.args)) { + throw new Error(`Invalid input for playwright tool: method and args are required`); + } + return await tool.call(toolInput); + } else { + // Validate computer tool input + const computerInput = toolInput as ActionParams; + if (!computerInput.action || !Object.values(Action).includes(computerInput.action)) { + throw new Error(`Invalid action ${computerInput.action} for tool ${name}`); + } + return await tool.call(toolInput); + } } } \ No newline at end of file diff --git a/tools/computer.ts b/tools/computer.ts index 61197e7..da87fe5 100644 --- a/tools/computer.ts +++ b/tools/computer.ts @@ -1,12 +1,13 @@ import type { Page } from 'playwright'; -import { Action, ToolError } from './types/computer'; -import type { ActionParams, BaseAnthropicTool, ToolResult } from './types/computer'; +import { Action } from './types/computer'; +import type { ActionParams } from './types/computer'; import { KeyboardUtils } from './utils/keyboard'; import { ActionValidator } from './utils/validator'; +import { ToolError, type ToolResult, type ComputerUseTool, type ComputerToolDef } from './types/base'; const TYPING_DELAY_MS = 12; -export class ComputerTool implements BaseAnthropicTool { +export class ComputerTool implements ComputerUseTool { name: 'computer' = 'computer'; protected page: Page; protected _screenshotDelay = 2.0; @@ -35,6 +36,7 @@ export class ComputerTool implements BaseAnthropicTool { Action.CURSOR_POSITION, Action.SCROLL, Action.WAIT, + Action.EXTRACT_URL, ]); constructor(page: Page, version: '20241022' | '20250124' = '20250124') { @@ -46,7 +48,7 @@ export class ComputerTool implements BaseAnthropicTool { return this.version === '20241022' ? 'computer_20241022' : 'computer_20250124'; } - toParams(): ActionParams { + toParams(): ComputerToolDef { const params = { name: this.name, type: this.apiType, @@ -177,7 +179,7 @@ export class ComputerTool implements BaseAnthropicTool { const scrollDirection = scrollDirectionParam || kwargs.scroll_direction; const scrollAmountValue = scrollAmount || scroll_amount; - if (!scrollDirection || !['up', 'down', 'left', 'right'].includes(scrollDirection)) { + if (!scrollDirection || !['up', 'down', 'left', 'right'].includes(scrollDirection as string)) { throw new ToolError(`Scroll direction "${scrollDirection}" must be 'up', 'down', 'left', or 'right'`); } if (typeof scrollAmountValue !== 'number' || scrollAmountValue < 0) { diff --git a/tools/playwright.ts b/tools/playwright.ts new file mode 100644 index 0000000..7bcbb28 --- /dev/null +++ b/tools/playwright.ts @@ -0,0 +1,220 @@ +import type { Page } from 'playwright'; +import { ToolError, type ToolResult, type ComputerUseTool, type FunctionToolDef, type ActionParams } from './types/base'; + +// Supported Playwright methods +const SUPPORTED_METHODS = ['extract_url'] as const; +type SupportedMethod = typeof SUPPORTED_METHODS[number]; + +export type PlaywrightActionParams = ActionParams & { + method: string; + args: string[]; +} + +export class PlaywrightTool implements ComputerUseTool { + name: 'playwright' = 'playwright'; + protected page: Page; + + constructor(page: Page) { + this.page = page; + } + + toParams(): FunctionToolDef { + return { + name: this.name, + type: 'custom', + input_schema: { + type: 'object', + properties: { + method: { + type: 'string', + description: 'The playwright function to call.', + enum: SUPPORTED_METHODS, + }, + args: { + type: 'array', + description: 'The required arguments', + items: { + type: 'string', + description: 'The argument to pass to the function', + }, + }, + }, + required: ['method', 'args'], + }, + }; + } + + private validateMethod(method: string): method is SupportedMethod { + return SUPPORTED_METHODS.includes(method as SupportedMethod); + } + + private async executeExtractUrl(args: string[]): Promise { + if (args.length !== 1) { + throw new ToolError('extract_url method requires exactly one argument: the selector or text to find the element'); + } + + const selector = args[0]; + if (!selector || typeof selector !== 'string') { + throw new ToolError('Selector must be a non-empty string'); + } + + console.log(`\n=== Extract URL: Looking for text: "${selector}" ===`); + + try { + // Try multiple strategies to find the element and extract URL + let url: string | null = null; + let elementInfo: string = ''; + + // Strategy 1: Find element by exact or partial text match (prioritized since Computer Use sees text) + const textElement = await this.page.locator(`text="${selector}"`).first(); + const partialTextElement = await this.page.locator(`text=/.*${selector}.*/i`).first(); + + if (await textElement.count() > 0) { + // Check if the element itself has an href + url = await textElement.getAttribute('href'); + if (url) { + elementInfo = 'element with exact matching text'; + } else { + // Check if it's wrapped in an anchor + const parentAnchor = await textElement.locator('xpath=ancestor::a[1]').first(); + if (await parentAnchor.count() > 0) { + url = await parentAnchor.getAttribute('href'); + elementInfo = 'parent anchor of element with exact text'; + } else { + // Check if there's a sibling or nearby anchor + const nearbyAnchor = await textElement.locator('xpath=following-sibling::a[1] | preceding-sibling::a[1]').first(); + if (await nearbyAnchor.count() > 0) { + url = await nearbyAnchor.getAttribute('href'); + elementInfo = 'anchor near element with exact text'; + } + } + } + } else if (await partialTextElement.count() > 0) { + // Try with partial match + url = await partialTextElement.getAttribute('href'); + if (url) { + elementInfo = 'element with partial matching text'; + } else { + const parentAnchor = await partialTextElement.locator('xpath=ancestor::a[1]').first(); + if (await parentAnchor.count() > 0) { + url = await parentAnchor.getAttribute('href'); + elementInfo = 'parent anchor of element with partial text match'; + } + } + } + + // Strategy 2: Look for anchor tags containing the text + if (!url) { + const anchorWithText = await this.page.locator(`a:has-text("${selector}")`).first(); + if (await anchorWithText.count() > 0) { + url = await anchorWithText.getAttribute('href'); + const text = await anchorWithText.textContent(); + elementInfo = `anchor tag with text: "${text?.trim() || 'N/A'}"`; + } + } + + // Strategy 3: Direct selector match (if user provides CSS selector) + if (!url && (selector.includes('.') || selector.includes('#') || selector.includes('[') || selector.includes('>'))) { + try { + const element = await this.page.locator(selector).first(); + if (await element.count() > 0) { + url = await element.getAttribute('href'); + if (!url) { + // Check for other URL attributes + url = await element.getAttribute('data-url') || + await element.getAttribute('data-href') || + await element.getAttribute('data-link'); + } + if (url) { + elementInfo = 'element matching CSS selector'; + } + } + } catch { + // Selector might be invalid, continue with other strategies + } + } + + // Strategy 4: Look for clickable elements with the text that might have onclick handlers + if (!url) { + const clickableElements = await this.page.locator(`button:has-text("${selector}"), [role="button"]:has-text("${selector}"), [role="link"]:has-text("${selector}")`).first(); + if (await clickableElements.count() > 0) { + // Check for data attributes that might contain URLs + url = await clickableElements.getAttribute('data-url') || + await clickableElements.getAttribute('data-href') || + await clickableElements.getAttribute('data-link') || + await clickableElements.getAttribute('data-target'); + if (url) { + elementInfo = 'clickable element with URL in data attribute'; + } + } + } + + // Strategy 5: Check if the text itself contains a URL + if (!url) { + const elements = await this.page.locator(`*:has-text("${selector}")`).all(); + for (const el of elements) { + const textContent = await el.textContent(); + if (textContent) { + // Look for URL patterns in text + const urlMatch = textContent.match(/https?:\/\/[^\s<>"{}|\\^`[\]]+/); + if (urlMatch) { + url = urlMatch[0]; + elementInfo = 'URL found in text content'; + break; + } + } + } + } + + if (!url) { + throw new ToolError(`Could not find any URL associated with text or selector: "${selector}". Try being more specific about the link text you can see on the page.`); + } + + // Normalize relative URLs to absolute + if (url.startsWith('/')) { + const baseUrl = new URL(this.page.url()); + url = `${baseUrl.origin}${url}`; + } else if (!url.startsWith('http')) { + // Handle protocol-relative URLs + if (url.startsWith('//')) { + const baseUrl = new URL(this.page.url()); + url = `${baseUrl.protocol}${url}`; + } else { + // Relative URL without leading slash + const baseUrl = new URL(this.page.url()); + url = new URL(url, baseUrl.href).href; + } + } + + return { + output: `Successfully extracted URL: ${url} (from ${elementInfo})`, + }; + } catch (error) { + if (error instanceof ToolError) { + throw error; + } + throw new ToolError(`Failed to extract URL for selector "${selector}": ${error}`); + } + } + + async call(params: PlaywrightActionParams): Promise { + const { method, args } = params as PlaywrightActionParams; + + if (!this.validateMethod(method)) { + throw new ToolError( + `Unsupported method: ${method}. Supported methods: ${SUPPORTED_METHODS.join(', ')}` + ); + } + + if (!Array.isArray(args)) { + throw new ToolError('args must be an array'); + } + + switch (method) { + case 'extract_url': + return await this.executeExtractUrl(args); + default: + throw new ToolError(`Method ${method} is not implemented`); + } + } +} \ No newline at end of file diff --git a/tools/types/base.ts b/tools/types/base.ts new file mode 100644 index 0000000..e0fba65 --- /dev/null +++ b/tools/types/base.ts @@ -0,0 +1,50 @@ +export type ActionParams = Record; + +export interface ToolResult { + output?: string; + error?: string; + base64Image?: string; + system?: string; +} + +export class ToolError extends Error { + constructor(message: string) { + super(message); + this.name = 'ToolError'; + } +} + +// Standard function tool definition for custom tools like Playwright +export interface FunctionToolDef { + name: string; + type: 'custom'; + input_schema: { + type: 'object'; + properties: Record; + required?: string[]; + }; +} + +// Computer tool definition (matches Anthropic's built-in computer tool format) +export interface ComputerToolDef { + name: string; + type: 'computer_20241022' | 'computer_20250124'; + display_width_px: number; + display_height_px: number; + display_number: null; +} + +// Union type for all possible tool definitions +export type ComputerUseToolDef = ComputerToolDef | FunctionToolDef; + +// Simple base interface for all tools +export interface ComputerUseTool { + name: string; + toParams(): ComputerUseToolDef; + call(params: Record): Promise; +} \ No newline at end of file diff --git a/tools/types/computer.ts b/tools/types/computer.ts index f4b061f..5b5c49d 100644 --- a/tools/types/computer.ts +++ b/tools/types/computer.ts @@ -20,13 +20,9 @@ export enum Action { CURSOR_POSITION = 'cursor_position', SCROLL = 'scroll', WAIT = 'wait', + EXTRACT_URL = 'extract_url', } -// For backward compatibility -export type Action_20241022 = Action; -export type Action_20250124 = Action; - -export type MouseButton = 'left' | 'right' | 'middle'; export type ScrollDirection = 'up' | 'down' | 'left' | 'right'; export type Coordinate = [number, number]; export type Duration = number; @@ -36,29 +32,15 @@ export interface ActionParams { text?: string; coordinate?: Coordinate; scrollDirection?: ScrollDirection; - scroll_amount?: number; scrollAmount?: number; + scroll_amount?: number; duration?: Duration; - key?: string; + // Allow additional properties for compatibility [key: string]: Action | string | Coordinate | ScrollDirection | number | Duration | undefined; } -export interface ToolResult { - output?: string; - error?: string; - base64Image?: string; - system?: string; -} - export interface BaseAnthropicTool { name: string; apiType: string; - toParams(): ActionParams; -} - -export class ToolError extends Error { - constructor(message: string) { - super(message); - this.name = 'ToolError'; - } + toParams(): Record; } \ No newline at end of file diff --git a/tools/utils/validator.ts b/tools/utils/validator.ts index b8522c8..dd09571 100644 --- a/tools/utils/validator.ts +++ b/tools/utils/validator.ts @@ -1,5 +1,6 @@ -import { Action, ToolError } from '../types/computer'; +import { Action } from '../types/computer'; import type { ActionParams, Coordinate, Duration } from '../types/computer'; +import { ToolError } from '../types/base'; export class ActionValidator { static validateText(text: string | undefined, required: boolean, action: string): void {