diff --git a/packages/playwright-core/src/tools/backend/response.ts b/packages/playwright-core/src/tools/backend/response.ts index 6d6593303bdc3..f0b9266869ba8 100644 --- a/packages/playwright-core/src/tools/backend/response.ts +++ b/packages/playwright-core/src/tools/backend/response.ts @@ -151,7 +151,7 @@ export class Response { const content: (TextContent | ImageContent)[] = [ { type: 'text', - text: redactText(text.join('\n')), + text: sanitizeUnicode(redactText(text.join('\n'))), } ]; @@ -265,6 +265,14 @@ function trimMiddle(text: string, maxLength: number) { return text.slice(0, Math.floor(maxLength / 2)) + '...' + text.slice(- 3 - Math.floor(maxLength / 2)); } +/** + * Sanitizes a string to ensure it only contains well-formed Unicode. + * Replaces lone surrogates with U+FFFD using String.prototype.toWellFormed(). + */ +function sanitizeUnicode(text: string): string { + return text.toWellFormed(); +} + function parseSections(text: string): Map { const sections = new Map(); const sectionHeaders = text.split(/^### /m).slice(1); // Remove empty first element diff --git a/tests/mcp/unicode-serialization.spec.ts b/tests/mcp/unicode-serialization.spec.ts new file mode 100644 index 0000000000000..f5bf825a47294 --- /dev/null +++ b/tests/mcp/unicode-serialization.spec.ts @@ -0,0 +1,56 @@ +/** + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { test, expect } from './fixtures'; + +test.describe('unicode serialization', () => { + test.use({ mcpArgs: ['--no-sandbox'] }); + + test('handles lone surrogates in page content', async ({ client, server }) => { + server.setContent('/', `Text with ${String.fromCharCode(0xD800)} lone surrogate`, 'text/html'); + + const result = await client.callTool({ + name: 'browser_navigate', + arguments: { url: server.PREFIX }, + }); + + expect(result.content[0].text).toContain('Page URL:'); + }); + + test('preserves valid emoji and surrogate pairs', async ({ client, server }) => { + server.setContent('/', 'Valid emoji: 💀 skull and text', 'text/html'); + + const result = await client.callTool({ + name: 'browser_navigate', + arguments: { url: server.PREFIX }, + }); + + expect(result.content[0].text).toContain('emoji'); + }); + + test('handles console messages with lone surrogates', async ({ startClient, server }) => { + server.setContent('/', ``, 'text/html'); + + const { client } = await startClient({ args: ['--console-level=debug'] }); + + const result = await client.callTool({ + name: 'browser_navigate', + arguments: { url: server.PREFIX }, + }); + + expect(result.content[0].text).toBeDefined(); + }); +});