From f620d5c7820397896e2214a0f3055614a676a57a Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Thu, 4 Jun 2026 22:57:31 +1000 Subject: [PATCH 1/2] Feed malformed client-side tool call args back to the model --- ui/src/pages/chat/useChat.ts | 28 +++- .../utils/__tests__/toolCallParser.test.ts | 154 ++++++++++++++++++ ui/src/pages/chat/utils/toolCallParser.ts | 86 ++++++---- ui/src/pages/chat/utils/toolExecutors.ts | 15 ++ 4 files changed, 248 insertions(+), 35 deletions(-) create mode 100644 ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts diff --git a/ui/src/pages/chat/useChat.ts b/ui/src/pages/chat/useChat.ts index a3cc5a52..4c2e3373 100644 --- a/ui/src/pages/chat/useChat.ts +++ b/ui/src/pages/chat/useChat.ts @@ -1698,13 +1698,27 @@ export function useChat({ (item: { type: string }) => item.type === "function_call" ) as Array<{ type: string; call_id: string; name: string; arguments: string }>; if (functionCalls.length > 0) { - completedToolCalls = functionCalls.map((fc) => ({ - id: fc.call_id, // Use call_id as id since that's what we have - callId: fc.call_id, - name: fc.name, - status: "completed" as const, - arguments: JSON.parse(fc.arguments || "{}"), - })); + completedToolCalls = functionCalls.map((fc) => { + // Parse the model's arguments. An unparseable payload is + // marked invalid (not dropped, and without throwing — a + // throw here would abort the whole handler and strand every + // sibling call) so the tool loop feeds the error back. + let parsedArguments: Record = {}; + let invalid: string | undefined; + try { + parsedArguments = JSON.parse(fc.arguments || "{}") as Record; + } catch (err) { + invalid = err instanceof Error ? err.message : String(err); + } + return { + id: fc.call_id, // Use call_id as id since that's what we have + callId: fc.call_id, + name: fc.name, + status: "completed" as const, + arguments: parsedArguments, + ...(invalid ? { invalid } : {}), + }; + }); } } diff --git a/ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts b/ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts new file mode 100644 index 00000000..deca4559 --- /dev/null +++ b/ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts @@ -0,0 +1,154 @@ +import { describe, it, expect } from "vitest"; + +import { + createToolCallTracker, + parseToolCallFromEvent, + invalidArgumentsText, +} from "../toolCallParser"; +import type { BaseSSEEvent } from "../toolCallParser"; + +/** + * Anthropic (and the gateway's other providers) emit `function_call` items + * whose item `id` (`fc_xxx`) differs from the provider `call_id` (`toolu_xxx`). + * The streaming argument events only carry `item_id` (== the item `id`), so the + * tracker must be keyed on the item id, not the call id, or every delta misses. + */ +const ITEM_ID = "fc_abc"; +const CALL_ID = "toolu_abc"; + +const added: BaseSSEEvent = { + type: "response.output_item.added", + output_index: 0, + item: { type: "function_call", id: ITEM_ID, call_id: CALL_ID, name: "web_search" }, +}; + +describe("parseToolCallFromEvent", () => { + it("matches streaming argument deltas keyed by item_id, not call_id", () => { + const tracker = createToolCallTracker(); + parseToolCallFromEvent(added, tracker); + + // Deltas carry `item_id` (the item id), never the provider call_id. + const delta = parseToolCallFromEvent( + { + type: "response.function_call_arguments.delta", + item_id: ITEM_ID, + output_index: 0, + delta: '{"query":', + }, + tracker + ); + expect(delta.type).toBe("tool_call_arguments_delta"); + + const more = parseToolCallFromEvent( + { + type: "response.function_call_arguments.delta", + item_id: ITEM_ID, + output_index: 0, + delta: '"rust"}', + }, + tracker + ); + expect(more.type).toBe("tool_call_arguments_delta"); + + // The buffer accumulated from the deltas (it would be empty if the lookup + // had keyed on call_id and missed). + expect(tracker.toolCalls.get(ITEM_ID)?.argumentsBuffer).toBe('{"query":"rust"}'); + }); + + it("preserves the provider call_id for building the continuation", () => { + const tracker = createToolCallTracker(); + const result = parseToolCallFromEvent(added, tracker); + expect(result.type).toBe("tool_call_added"); + const state = tracker.toolCalls.get(ITEM_ID); + expect(state?.id).toBe(ITEM_ID); + expect(state?.callId).toBe(CALL_ID); + }); + + it("marks a call invalid (not dropped) when its arguments fail to parse", () => { + const tracker = createToolCallTracker(); + parseToolCallFromEvent(added, tracker); + + // Malformed JSON arriving on the done event. + parseToolCallFromEvent( + { + type: "response.function_call_arguments.done", + item_id: ITEM_ID, + output_index: 0, + arguments: '{"query": ', + }, + tracker + ); + + const done = parseToolCallFromEvent( + { + type: "response.output_item.done", + output_index: 0, + item: { + type: "function_call", + id: ITEM_ID, + call_id: CALL_ID, + name: "web_search", + arguments: '{"query": ', + status: "completed", + }, + }, + tracker + ); + + // The call surfaces as complete (so the loop runs) and is flagged invalid + // rather than producing an `error` result that drops it silently. + expect(done.type).toBe("tool_call_complete"); + if (done.type === "tool_call_complete") { + expect(done.toolCall.invalid).toBeTruthy(); + expect(done.toolCall.callId).toBe(CALL_ID); + } + + // It is included in the completed set so the tool loop can feed back the + // error instead of ending the turn as a false "completed". + const completed = tracker.getCompletedToolCalls(); + expect(completed).toHaveLength(1); + expect(completed[0].invalid).toBeTruthy(); + }); + + it("parses well-formed arguments into a clean completed call", () => { + const tracker = createToolCallTracker(); + parseToolCallFromEvent(added, tracker); + parseToolCallFromEvent( + { + type: "response.function_call_arguments.done", + item_id: ITEM_ID, + output_index: 0, + arguments: '{"query": "rust"}', + }, + tracker + ); + const done = parseToolCallFromEvent( + { + type: "response.output_item.done", + output_index: 0, + item: { + type: "function_call", + id: ITEM_ID, + call_id: CALL_ID, + name: "web_search", + arguments: '{"query": "rust"}', + status: "completed", + }, + }, + tracker + ); + expect(done.type).toBe("tool_call_complete"); + if (done.type === "tool_call_complete") { + expect(done.toolCall.invalid).toBeUndefined(); + expect(done.toolCall.arguments).toEqual({ query: "rust" }); + } + }); +}); + +describe("invalidArgumentsText", () => { + it("mirrors the backend's invalid_arguments_text format", () => { + expect(invalidArgumentsText("web_search", "Unexpected end of JSON input")).toBe( + "Invalid arguments for tool `web_search`: Unexpected end of JSON input" + ); + }); +}); diff --git a/ui/src/pages/chat/utils/toolCallParser.ts b/ui/src/pages/chat/utils/toolCallParser.ts index 76e3a789..628f3686 100644 --- a/ui/src/pages/chat/utils/toolCallParser.ts +++ b/ui/src/pages/chat/utils/toolCallParser.ts @@ -143,6 +143,12 @@ export interface FileSearchToolCall { name: "file_search"; status: ToolCallStatus; arguments: FileSearchArguments; + /** + * Set when the model emitted this call but its `arguments` could not be + * parsed as JSON. The call is still surfaced (never dropped) so the tool + * loop can feed the error back to the model. See {@link invalidArgumentsText}. + */ + invalid?: string; } /** @@ -164,6 +170,12 @@ export interface GenericToolCall { name: string; status: ToolCallStatus; arguments: Record; + /** + * Set when the model emitted this call but its `arguments` could not be + * parsed as JSON. The call is still surfaced (never dropped) so the tool + * loop can feed the error back to the model. See {@link invalidArgumentsText}. + */ + invalid?: string; } /** @@ -191,6 +203,11 @@ export interface ToolCallState { parsedArguments?: Record; /** Error message if status is "failed" */ error?: string; + /** + * Set when `arguments` could not be parsed as JSON. The call still counts + * as completed so the tool loop feeds the error back instead of dropping it. + */ + invalid?: string; } /** @@ -246,7 +263,7 @@ export function createToolCallTracker(): ToolCallTracker { getCompletedToolCalls(): ParsedToolCall[] { return Array.from(toolCalls.values()) - .filter((tc) => tc.status === "completed" && tc.parsedArguments) + .filter((tc) => tc.status === "completed" && (tc.parsedArguments || tc.invalid)) .map((tc) => createParsedToolCall(tc)); }, @@ -274,6 +291,15 @@ function mapToolNameToType(name: string): ToolCallType { } } +/** + * Standard human-readable message for an unparseable tool call, fed back to + * the model in the `function_call_output` so it can correct the call. Mirrors + * the backend's `invalid_arguments_text` (src/services/server_tools/mod.rs). + */ +export function invalidArgumentsText(toolName: string, error: string): string { + return `Invalid arguments for tool \`${toolName}\`: ${error}`; +} + /** * Create a ParsedToolCall from ToolCallState */ @@ -283,6 +309,7 @@ function createParsedToolCall(state: ToolCallState): ParsedToolCall { callId: state.callId, name: state.name, status: state.status, + ...(state.invalid ? { invalid: state.invalid } : {}), }; if (state.name === "file_search") { @@ -335,11 +362,16 @@ export function parseToolCallFromEvent(event: BaseSSEEvent, tracker: ToolCallTra return { type: "ignored" }; } - // Use call_id as the canonical identifier - it's required for matching - // function_call_output back to the original call, so it's always present + // Key the tracker by the item id (`fc_xxx`): that's what the streaming + // `function_call_arguments.delta`/`.done` events carry as `item_id`. + // The provider's `call_id` (`toolu_xxx`/`call_xxx`) differs from the item + // id, so keying on it here would make every argument delta/done miss the + // entry. `callId` is kept as a field for matching `function_call_output` + // back to the original call when building the continuation. + const itemId = addedEvent.item.id; const callId = addedEvent.item.call_id ?? addedEvent.item.id; const state: ToolCallState = { - id: callId, + id: itemId, callId: callId, name: addedEvent.item.name ?? "unknown", outputIndex: addedEvent.output_index, @@ -347,7 +379,7 @@ export function parseToolCallFromEvent(event: BaseSSEEvent, tracker: ToolCallTra status: "pending", }; - tracker.toolCalls.set(callId, state); + tracker.toolCalls.set(itemId, state); return { type: "tool_call_added", toolCall: state }; } @@ -386,14 +418,13 @@ export function parseToolCallFromEvent(event: BaseSSEEvent, tracker: ToolCallTra // Use the final arguments from the event (more reliable than buffer) state.argumentsBuffer = doneEvent.arguments; - // Parse the arguments JSON + // Parse the arguments JSON. An unparseable payload is recorded as + // invalid (not dropped) so the tool loop can feed the error back to the + // model on `output_item.done`; the call is surfaced either way. try { state.parsedArguments = JSON.parse(doneEvent.arguments) as Record; - } catch { - return { - type: "error", - message: `Failed to parse arguments for tool call ${state.id}: ${doneEvent.arguments}`, - }; + } catch (err) { + state.invalid = err instanceof Error ? err.message : String(err); } return { type: "tool_call_arguments_done", id: state.id, arguments: doneEvent.arguments }; @@ -407,15 +438,17 @@ export function parseToolCallFromEvent(event: BaseSSEEvent, tracker: ToolCallTra return { type: "ignored" }; } - // Use call_id as the canonical identifier + // Key by the item id (`fc_xxx`) to match the entry created by + // `output_item.added` and the streaming argument events. + const itemId = itemDoneEvent.item.id; const callId = itemDoneEvent.item.call_id ?? itemDoneEvent.item.id; - const state = tracker.toolCalls.get(callId); + const state = tracker.toolCalls.get(itemId); if (!state) { // Item might have been created without output_item.added (edge case) // Create it now from the done event const newState: ToolCallState = { - id: callId, + id: itemId, callId: callId, name: itemDoneEvent.item.name ?? "unknown", outputIndex: itemDoneEvent.output_index, @@ -423,40 +456,37 @@ export function parseToolCallFromEvent(event: BaseSSEEvent, tracker: ToolCallTra status: "completed", }; - // Parse arguments + // Parse arguments. An unparseable payload is marked invalid (not + // dropped) so the tool loop feeds the error back to the model. if (itemDoneEvent.item.arguments) { try { newState.parsedArguments = JSON.parse(itemDoneEvent.item.arguments) as Record< string, unknown >; - } catch { - return { - type: "error", - message: `Failed to parse arguments from output_item.done: ${itemDoneEvent.item.arguments}`, - }; + } catch (err) { + newState.invalid = err instanceof Error ? err.message : String(err); } } - tracker.toolCalls.set(callId, newState); + tracker.toolCalls.set(itemId, newState); return { type: "tool_call_complete", toolCall: createParsedToolCall(newState) }; } // Update existing state state.status = "completed"; - // If we don't have parsed arguments yet, try from the done event - if (!state.parsedArguments && itemDoneEvent.item.arguments) { + // If we don't have parsed arguments yet, try from the done event. An + // unparseable payload is marked invalid (not dropped) so the tool loop + // feeds the error back to the model instead of ending the turn silently. + if (!state.parsedArguments && !state.invalid && itemDoneEvent.item.arguments) { try { state.parsedArguments = JSON.parse(itemDoneEvent.item.arguments) as Record< string, unknown >; - } catch { - return { - type: "error", - message: `Failed to parse arguments from output_item.done: ${itemDoneEvent.item.arguments}`, - }; + } catch (err) { + state.invalid = err instanceof Error ? err.message : String(err); } } diff --git a/ui/src/pages/chat/utils/toolExecutors.ts b/ui/src/pages/chat/utils/toolExecutors.ts index 3ad5a144..6391349a 100644 --- a/ui/src/pages/chat/utils/toolExecutors.ts +++ b/ui/src/pages/chat/utils/toolExecutors.ts @@ -35,6 +35,7 @@ import type { Citation, ChunkCitation } from "@/components/chat-types"; import type { ParsedToolCall, FileSearchToolCall } from "./toolCallParser"; +import { invalidArgumentsText } from "./toolCallParser"; import { executeFileSearch, type ExecuteFileSearchOptions } from "./executeFileSearch"; import { pyodideService } from "@/services/pyodide"; import { quickjsService } from "@/services/quickjs"; @@ -2900,6 +2901,20 @@ export async function executeToolCalls( // Execute all tool calls in parallel const execPromises = toolCalls.map(async (toolCall) => { + // The model emitted this call but its arguments couldn't be parsed. Mirror + // the backend's invalid-argument contract: don't run the underlying tool, + // feed the parse error back as a function_call_output so the model can + // self-correct on the next round. (See src/services/server_tools/mod.rs.) + if (toolCall.invalid) { + const message = invalidArgumentsText(toolCall.name, toolCall.invalid); + results.set(toolCall.id, { + success: false, + error: message, + output: JSON.stringify({ error: message }), + }); + return; + } + // Check for MCP tools (dynamically named with "mcp:" prefix) let executor = executors[toolCall.name]; From 8420cbc44a9e80075dd81e418a9dfb7eb784d629 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 6 Jun 2026 20:30:40 +1000 Subject: [PATCH 2/2] Review fixes --- .../utils/__tests__/toolCallParser.test.ts | 40 +++++++++++++++++++ ui/src/pages/chat/utils/toolCallParser.ts | 13 ++++-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts b/ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts index deca4559..aa7e05bf 100644 --- a/ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts +++ b/ui/src/pages/chat/utils/__tests__/toolCallParser.test.ts @@ -110,6 +110,46 @@ describe("parseToolCallFromEvent", () => { expect(completed[0].invalid).toBeTruthy(); }); + it("recovers a valid parse from output_item.done after a truncated arguments.done", () => { + const tracker = createToolCallTracker(); + parseToolCallFromEvent(added, tracker); + + // arguments.done arrives truncated and flags the call invalid. + parseToolCallFromEvent( + { + type: "response.function_call_arguments.done", + item_id: ITEM_ID, + output_index: 0, + arguments: '{"query": ', + }, + tracker + ); + expect(tracker.toolCalls.get(ITEM_ID)?.invalid).toBeTruthy(); + + // output_item.done carries the complete, valid payload — the call must + // recover rather than stay permanently flagged invalid. + const done = parseToolCallFromEvent( + { + type: "response.output_item.done", + output_index: 0, + item: { + type: "function_call", + id: ITEM_ID, + call_id: CALL_ID, + name: "web_search", + arguments: '{"query": "rust"}', + status: "completed", + }, + }, + tracker + ); + expect(done.type).toBe("tool_call_complete"); + if (done.type === "tool_call_complete") { + expect(done.toolCall.invalid).toBeUndefined(); + expect(done.toolCall.arguments).toEqual({ query: "rust" }); + } + }); + it("parses well-formed arguments into a clean completed call", () => { const tracker = createToolCallTracker(); parseToolCallFromEvent(added, tracker); diff --git a/ui/src/pages/chat/utils/toolCallParser.ts b/ui/src/pages/chat/utils/toolCallParser.ts index 628f3686..786eec44 100644 --- a/ui/src/pages/chat/utils/toolCallParser.ts +++ b/ui/src/pages/chat/utils/toolCallParser.ts @@ -476,15 +476,20 @@ export function parseToolCallFromEvent(event: BaseSSEEvent, tracker: ToolCallTra // Update existing state state.status = "completed"; - // If we don't have parsed arguments yet, try from the done event. An - // unparseable payload is marked invalid (not dropped) so the tool loop - // feeds the error back to the model instead of ending the turn silently. - if (!state.parsedArguments && !state.invalid && itemDoneEvent.item.arguments) { + // If we don't have parsed arguments yet, try from the done event. + // `output_item.done` carries the complete `item.arguments`, so it can + // still recover a valid parse even if an earlier `arguments.done` was + // truncated and set `invalid` — so retry whenever args are unset and + // clear the stale `invalid` marker on success. An unparseable payload is + // (re)marked invalid (not dropped) so the tool loop feeds the error back + // to the model instead of ending the turn silently. + if (!state.parsedArguments && itemDoneEvent.item.arguments) { try { state.parsedArguments = JSON.parse(itemDoneEvent.item.arguments) as Record< string, unknown >; + state.invalid = undefined; } catch (err) { state.invalid = err instanceof Error ? err.message : String(err); }