From fd0af2a1e9899d028f205eec0fff50790d1faf88 Mon Sep 17 00:00:00 2001 From: Alex Makeev Date: Thu, 19 Feb 2026 04:24:41 +0000 Subject: [PATCH] fix: sanitize lone Unicode surrogates in YAML serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lone surrogates (U+D800-U+DFFF) in DOM textContent are valid in JavaScript strings but produce invalid JSON when serialized. yamlEscapeValueIfNeeded() escapes control characters but does not handle lone surrogates, causing "no low surrogate in string" errors in MCP clients. Apply the same toWellFormed() pattern already used in cli/driver.ts for non-JavaScript language bindings. When toWellFormed() is unavailable, the string passes through unchanged — consistent with driver.ts behavior. Note: a regex fallback for environments without toWellFormed() is possible: str.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])/g, '\uFFFD') .replace(/(? { + // Sanitize lone surrogates (valid in JS, invalid in JSON per RFC 8259 §8.2). + // Same approach as cli/driver.ts for non-JS language bindings. + const sanitized = (str as any).toWellFormed ? (str as any).toWellFormed() : str; + if (!yamlStringNeedsQuotes(sanitized)) + return sanitized; + return '"' + sanitized.replace(/[\\"\x00-\x1f\x7f-\x9f]/g, c => { switch (c) { case '\\': return '\\\\'; diff --git a/packages/playwright-core/src/utils/isomorphic/yaml.ts b/packages/playwright-core/src/utils/isomorphic/yaml.ts index e79cf58474069..5e4db22937ab6 100644 --- a/packages/playwright-core/src/utils/isomorphic/yaml.ts +++ b/packages/playwright-core/src/utils/isomorphic/yaml.ts @@ -21,9 +21,12 @@ export function yamlEscapeKeyIfNeeded(str: string): string { } export function yamlEscapeValueIfNeeded(str: string): string { - if (!yamlStringNeedsQuotes(str)) - return str; - return '"' + str.replace(/[\\"\x00-\x1f\x7f-\x9f]/g, c => { + // Sanitize lone surrogates (valid in JS, invalid in JSON per RFC 8259 §8.2). + // Same approach as cli/driver.ts for non-JS language bindings. + const sanitized = (str as any).toWellFormed ? (str as any).toWellFormed() : str; + if (!yamlStringNeedsQuotes(sanitized)) + return sanitized; + return '"' + sanitized.replace(/[\\"\x00-\x1f\x7f-\x9f]/g, c => { switch (c) { case '\\': return '\\\\'; diff --git a/tests/mcp/snapshot-unicode.spec.ts b/tests/mcp/snapshot-unicode.spec.ts new file mode 100644 index 0000000000000..fdb738576d7f4 --- /dev/null +++ b/tests/mcp/snapshot-unicode.spec.ts @@ -0,0 +1,67 @@ +/** + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { test, expect } from './fixtures'; + +test('should handle lone high surrogate in snapshot', async ({ client, server }) => { + server.setContent('/', ` +
+ + `, 'text/html'); + + const response = await client.callTool({ + name: 'browser_navigate', + arguments: { + url: server.PREFIX, + }, + }); + + expect(response).toHaveResponse({ + snapshot: expect.any(String), + }); + + // Lone surrogates should be replaced with U+FFFD (replacement character) + expect(response.content[0].text).toContain('\uFFFD'); +}); + +test('should handle lone low surrogate in snapshot', async ({ client, server }) => { + server.setContent('/', ` +
+ + `, 'text/html'); + + const response = await client.callTool({ + name: 'browser_navigate', + arguments: { + url: server.PREFIX, + }, + }); + + expect(response).toHaveResponse({ + snapshot: expect.any(String), + }); + + // Lone surrogates should be replaced with U+FFFD + expect(response.content[0].text).toContain('\uFFFD'); +});