From faf0047fbb07e04448ee95228a15a4d337b55154 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Wed, 11 Feb 2026 13:20:07 +0000 Subject: [PATCH] docs(ui-automation): prefer label/id targets for tap guidance Recommend accessibility targeting (id/label) before coordinates for ui-automation tap interactions.\n\nUpdate tap tool schema and manifest descriptions, validation guidance, and XcodeBuildMCP skill docs to keep guidance consistent across MCP and CLI usage.\n\nCo-Authored-By: Claude --- docs/TOOLS-CLI.md | 4 ++-- docs/TOOLS.md | 4 ++-- manifests/tools/tap.yaml | 2 +- skills/xcodebuildmcp-cli/SKILL.md | 3 +++ skills/xcodebuildmcp/SKILL.md | 4 ++-- src/mcp/tools/ui-automation/tap.ts | 30 +++++++++++++++++++++++++----- 6 files changed, 35 insertions(+), 12 deletions(-) diff --git a/docs/TOOLS-CLI.md b/docs/TOOLS-CLI.md index a1eb08c8..e3b74209 100644 --- a/docs/TOOLS-CLI.md +++ b/docs/TOOLS-CLI.md @@ -164,7 +164,7 @@ XcodeBuildMCP provides 71 canonical tools organized into 13 workflow groups. - `screenshot` - Defined in iOS Simulator Development workflow. - `snapshot-ui` - Defined in iOS Simulator Development workflow. - `swipe` - Swipe between points. -- `tap` - Tap coordinate or element. +- `tap` - Tap UI element by accessibility id/label (recommended) or coordinates as fallback. - `touch` - Touch down/up at coords. - `type-text` - Type text. @@ -187,4 +187,4 @@ XcodeBuildMCP provides 71 canonical tools organized into 13 workflow groups. --- -*This documentation is automatically generated by `scripts/update-tools-docs.ts` from the tools manifest. Last updated: 2026-02-08T12:09:33.648Z UTC* +*This documentation is automatically generated by `scripts/update-tools-docs.ts` from the tools manifest. Last updated: 2026-02-11T13:12:19.881Z UTC* diff --git a/docs/TOOLS.md b/docs/TOOLS.md index 71950ad7..be04267f 100644 --- a/docs/TOOLS.md +++ b/docs/TOOLS.md @@ -172,7 +172,7 @@ This document lists MCP tool names as exposed to MCP clients. XcodeBuildMCP prov - `screenshot` - Defined in iOS Simulator Development workflow. - `snapshot_ui` - Defined in iOS Simulator Development workflow. - `swipe` - Swipe between points. -- `tap` - Tap coordinate or element. +- `tap` - Tap UI element by accessibility id/label (recommended) or coordinates as fallback. - `touch` - Touch down/up at coords. - `type_text` - Type text. @@ -202,4 +202,4 @@ This document lists MCP tool names as exposed to MCP clients. XcodeBuildMCP prov --- -*This documentation is automatically generated by `scripts/update-tools-docs.ts` from the tools manifest. Last updated: 2026-02-08T12:09:33.648Z UTC* +*This documentation is automatically generated by `scripts/update-tools-docs.ts` from the tools manifest. Last updated: 2026-02-11T13:12:19.881Z UTC* diff --git a/manifests/tools/tap.yaml b/manifests/tools/tap.yaml index f5ccd719..eda27341 100644 --- a/manifests/tools/tap.yaml +++ b/manifests/tools/tap.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/tap names: mcp: tap cli: tap -description: Tap coordinate or element. +description: Tap UI element by accessibility id/label (recommended) or coordinates as fallback. annotations: title: Tap destructiveHint: true diff --git a/skills/xcodebuildmcp-cli/SKILL.md b/skills/xcodebuildmcp-cli/SKILL.md index d37d3e20..9170d6ea 100644 --- a/skills/xcodebuildmcp-cli/SKILL.md +++ b/skills/xcodebuildmcp-cli/SKILL.md @@ -112,6 +112,9 @@ Snapshot UI accessibility tree, tap/swipe/type, and capture screenshots: ```bash xcodebuildmcp ui-automation snapshot-ui --simulator-id SIMULATOR_UDID +xcodebuildmcp ui-automation tap --simulator-id SIMULATOR_UDID --label "Submit" +xcodebuildmcp ui-automation tap --simulator-id SIMULATOR_UDID --id "SubmitButton" +# Coordinate fallback when label/id is unavailable xcodebuildmcp ui-automation tap --simulator-id SIMULATOR_UDID --x 200 --y 400 xcodebuildmcp ui-automation type-text --simulator-id SIMULATOR_UDID --text "hello" xcodebuildmcp ui-automation screenshot --simulator-id SIMULATOR_UDID --return-format path diff --git a/skills/xcodebuildmcp/SKILL.md b/skills/xcodebuildmcp/SKILL.md index 5fbf388e..54895136 100644 --- a/skills/xcodebuildmcp/SKILL.md +++ b/skills/xcodebuildmcp/SKILL.md @@ -152,11 +152,11 @@ Before you call any other tools, you **must** call `session_show_defaults` to sh - `screenshot` - Capture screenshot. - `snapshot_ui` - - Print view hierarchy with precise view coordinates (x, y, width, height) for visible elements. + - Print view hierarchy with element ids/labels and precise coordinates (x, y, width, height) for visible elements. - `swipe` - Swipe between points. - `tap` - - Tap coordinate or element. + - Tap UI element by accessibility id/label (recommended) or coordinates as fallback. - `touch` - Touch down/up at coords. - `type_text` diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index 5ba776e5..f033b592 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -28,10 +28,30 @@ export interface AxeHelpers { // Define schema as ZodObject const baseTapSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z.number().int({ message: 'X coordinate must be an integer' }).optional(), - y: z.number().int({ message: 'Y coordinate must be an integer' }).optional(), - id: z.string().min(1, { message: 'Id must be non-empty' }).optional(), - label: z.string().min(1, { message: 'Label must be non-empty' }).optional(), + x: z + .number() + .int({ message: 'X coordinate must be an integer' }) + .optional() + .describe( + 'Fallback tap X coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', + ), + y: z + .number() + .int({ message: 'Y coordinate must be an integer' }) + .optional() + .describe( + 'Fallback tap Y coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', + ), + id: z + .string() + .min(1, { message: 'Id must be non-empty' }) + .optional() + .describe('Recommended tap target: accessibility element id (AXUniqueId).'), + label: z + .string() + .min(1, { message: 'Label must be non-empty' }) + .optional() + .describe('Recommended when unique: accessibility label (AXLabel).'), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) @@ -79,7 +99,7 @@ const tapSchema = baseTapSchema.superRefine((values, ctx) => { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['x'], - message: 'Provide x/y coordinates or an element id/label.', + message: 'Provide an element id/label (recommended) or x/y coordinates as fallback.', }); } });