diff --git a/scripts/openapi-conformance.py b/scripts/openapi-conformance.py index f706c5e..81799d9 100755 --- a/scripts/openapi-conformance.py +++ b/scripts/openapi-conformance.py @@ -62,6 +62,7 @@ ("/chat/completions", "POST", "request", "function_call"): "Deprecated - use tool_choice instead", ("/chat/completions", "POST", "request", "functions"): "Deprecated - use tools instead", ("/chat/completions", "POST", "request", "include_obfuscation"): "OpenAI internal obfuscation feature", + ("/chat/completions", "POST", "request", "moderation"): "OpenAI hosted moderation pass (omni-moderation) - Hadrian has separate guardrails feature", # /completions - Legacy endpoint, minimal support ("/completions", "POST", "request", "include_usage"): "Legacy completions - use chat/completions instead", ("/completions", "POST", "request", "include_obfuscation"): "OpenAI internal obfuscation feature", @@ -72,6 +73,7 @@ # /models - Object field missing (schema issue) ("/models", "GET", "response", "object"): "List response object type - TODO: add to schema", # /responses - OpenAI-specific features + ("/responses", "POST", "request", "moderation"): "OpenAI hosted moderation pass (omni-moderation) - Hadrian has separate guardrails feature", ("/responses", "POST", "request", "top_logprobs"): "Log probabilities not implemented", ("/responses", "POST", "request", "prompt_cache_retention"): "OpenAI-specific cache retention", ("/responses", "POST", "request", "max_tool_calls"): "Tool call limits not implemented", diff --git a/src/api_types/responses.rs b/src/api_types/responses.rs index c3ee462..e1f3a29 100644 --- a/src/api_types/responses.rs +++ b/src/api_types/responses.rs @@ -233,6 +233,8 @@ pub enum ResponsesIncludable { ReasoningEncryptedContent, #[serde(rename = "code_interpreter_call.outputs")] CodeInterpreterCallOutputs, + #[serde(rename = "web_search_call.action.sources")] + WebSearchCallActionSources, } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] @@ -668,12 +670,74 @@ pub enum WebSearchCallOutputType { WebSearchCall, } +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum WebSearchActionType { + #[default] + Search, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum WebSearchSourceType { + Url, +} + +/// A single source the model consulted during a web search — an entry in +/// `web_search_call.action.sources`. Per OpenAI's spec the only source kind is +/// a URL. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WebSearchSource { + #[serde(rename = "type")] + pub type_: WebSearchSourceType, + pub url: String, +} + +/// The action a `web_search_call` performed. Mirrors OpenAI's Responses API +/// `search` action: it carries the issued query and, only when the request +/// opts in via `include: ["web_search_call.action.sources"]`, the list of +/// consulted source URLs. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct WebSearchAction { + #[serde(rename = "type")] + pub type_: WebSearchActionType, + /// The issued search query. OpenAI marks this `[DEPRECATED]` in favour of + /// `queries`, but it is still `required` on the `search` action, so it is + /// always serialized (empty string when the query is unknown, e.g. a call + /// whose arguments failed to parse). `#[serde(default)]` keeps + /// deserialization tolerant of native items that omit the deprecated field. + #[serde(default)] + pub query: String, + /// The issued search queries — OpenAI's modern array form, mirroring + /// `file_search_call.queries`. Hadrian's `web_search` function takes a + /// single query, so this carries at most one entry. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub queries: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub sources: Option>, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WebSearchCallOutput { #[serde(rename = "type")] pub type_: WebSearchCallOutputType, pub id: String, pub status: WebSearchStatus, + /// The action taken (query/queries, and optional source URLs). `action` is + /// `required` in OpenAI's `web_search_call`, so it is always serialized; + /// `#[serde(default)]` only relaxes deserialization for items persisted + /// before the field existed. + #[serde(default)] + pub action: WebSearchAction, + /// **Hadrian Extension:** the full formatted search-result text that was + /// fed to the model when this search ran. Hadrian executes web search + /// itself (Tavily/Exa) against upstreams that keep no server-side search + /// state, so it retains the result text here to replay the search as a + /// `function_call` + `function_call_output` pair on a later turn (see + /// `services/web_search_tool.rs::rewrite_web_search_history`). OpenAI's + /// native item has no equivalent field — it relies on OpenAI-side state. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub replay_content: Option, } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] @@ -682,20 +746,11 @@ pub enum FileSearchCallOutputType { FileSearchCall, } -/// Content item within a file search result. -/// -/// Matches OpenAI's format where content is an array of typed items. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum FileSearchResultContent { - /// Text content from the search result. - Text { text: String }, -} - /// A single result item from a file search operation. /// -/// This matches OpenAI's file search result schema when `include=["file_search_call.results"]` -/// is specified in the request. +/// Matches OpenAI's `file_search_call.results[]` schema, surfaced when +/// `include=["file_search_call.results"]` is set. Each field is optional in the +/// spec; Hadrian always populates `file_id`, `filename`, `text`, and `score`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileSearchResultItem { /// The ID of the file this result came from. @@ -704,12 +759,14 @@ pub struct FileSearchResultItem { pub filename: String, /// Relevance score between 0 and 1. pub score: f64, - /// Optional attributes/metadata associated with the file. + /// Optional attributes/metadata associated with the file. OpenAI's + /// `VectorStoreFileAttributes`: a map of string keys to string/number/bool + /// values. #[serde(skip_serializing_if = "Option::is_none")] pub attributes: Option>, - /// The content retrieved from the file. - /// OpenAI uses an array format with typed content items. - pub content: Vec, + /// The text retrieved from the file. OpenAI's Responses API uses a flat + /// string here (unlike the Assistants API's typed `content` array). + pub text: String, } /// Output item for a file_search tool call. @@ -733,6 +790,18 @@ pub struct FileSearchCallOutput { /// When not included, this field is omitted from the response. #[serde(skip_serializing_if = "Option::is_none")] pub results: Option>, + /// **Hadrian Extension:** the full formatted retrieval text that was fed to + /// the model when this search ran. Like + /// [`WebSearchCallOutput::replay_content`], Hadrian executes file search + /// itself against upstreams that keep no server-side search state, so it + /// retains the result text here to replay the search as a `function_call` + + /// `function_call_output` pair on a later turn (see + /// `services/file_search_tool.rs::rewrite_file_search_history`). Unlike + /// `results`, this is retained regardless of the `include` parameter — it is + /// the model-facing text, not the client-facing chunk list. OpenAI's native + /// item has no equivalent field — it relies on OpenAI-side state. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub replay_content: Option, } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] diff --git a/src/services/file_search_tool.rs b/src/services/file_search_tool.rs index 1efdb60..96a6aef 100644 --- a/src/services/file_search_tool.rs +++ b/src/services/file_search_tool.rs @@ -45,10 +45,10 @@ use crate::{ api_types::responses::{ CreateResponsesPayload, FileSearchCallOutput, FileSearchCallOutputType, FileSearchComparisonFilter, FileSearchCompoundFilter, FileSearchFilter, - FileSearchFilterComparison, FileSearchFilterLogicalType, FileSearchResultContent, - FileSearchResultItem, FileSearchTool, FunctionCallOutput, FunctionCallOutputType, - FunctionTool, ResponsesAnnotation, ResponsesIncludable, ResponsesInput, ResponsesInputItem, - ResponsesToolDefinition, WebSearchStatus, + FileSearchFilterComparison, FileSearchFilterLogicalType, FileSearchResultItem, + FileSearchTool, FunctionCallOutput, FunctionCallOutputType, FunctionTool, FunctionToolCall, + FunctionToolCallType, ResponsesAnnotation, ResponsesIncludable, ResponsesInput, + ResponsesInputItem, ResponsesToolDefinition, WebSearchStatus, }, auth::AuthenticatedRequest, config::FileSearchConfig, @@ -57,7 +57,10 @@ use crate::{ LogicalOperator, }, observability::{metrics::record_file_search, otel_span_error, otel_span_ok}, - services::{FileSearchRequest, FileSearchResponse, FileSearchService}, + services::{ + FileSearchRequest, FileSearchResponse, FileSearchService, + server_tool_history::rewrite_hosted_calls_to_function_pairs, + }, }; // ───────────────────────────────────────────────────────────────────────────── @@ -238,6 +241,12 @@ impl FileSearchToolArguments { /// // payload.tools now contains function tools instead of file_search tools /// ``` pub fn preprocess_file_search_tools(payload: &mut CreateResponsesPayload) { + // Rewrite any hosted `file_search_call` items echoed back in the input + // before the tools early-return, so a continuation that no longer + // re-declares file_search still gets its history rewritten. See + // [`rewrite_file_search_history`]. + rewrite_file_search_history(payload); + let Some(tools) = payload.tools.as_mut() else { return; }; @@ -258,6 +267,59 @@ pub fn preprocess_file_search_tools(payload: &mut CreateResponsesPayload) { } } +/// Rewrite hosted `file_search_call` items echoed back in `payload.input` into +/// the `function_call` + `function_call_output` pair every provider understands. +/// +/// File search is server-executed exactly like `web_search`: +/// [`preprocess_file_search_tools`] rewrites the `file_search` tool to a function +/// tool, so the provider never produces a native `file_search_call`. The shared +/// driver [`rewrite_hosted_calls_to_function_pairs`] does the expansion, replaying +/// the retained [`FileSearchCallOutput::replay_content`] as the tool output so the +/// model keeps the retrieved chunks in later-turn context. See its docs and +/// `web_search_tool::rewrite_web_search_history` for the sibling rewrite. +/// +/// RAG chunks are larger than web snippets, so re-injecting them every turn costs +/// more tokens than web search does — the tradeoff for keeping multi-turn file +/// search coherent rather than dropping the evidence the model already cited. +fn rewrite_file_search_history(payload: &mut CreateResponsesPayload) { + rewrite_hosted_calls_to_function_pairs(payload, |item| match item { + ResponsesInputItem::FileSearchCall(call) => Some(file_search_call_to_function_pair(call)), + _ => None, + }); +} + +/// Reconstruct the `(function_call, function_call_output)` pair for one echoed +/// `file_search_call`. The two share a `call_id` derived from the item id so the +/// provider conversion pairs them. The function arguments mirror what the model +/// originally emitted (`{"query": …}`, taken from the first of `queries`) and the +/// output is the retained [`FileSearchCallOutput::replay_content`] — the same +/// retrieval text the model saw when the search first ran. A missing query/content +/// (e.g. a failed search or a row from before content retention) degrades to an +/// empty string rather than dropping the pair, so the transcript stays well-formed. +fn file_search_call_to_function_pair( + call: &FileSearchCallOutput, +) -> (FunctionToolCall, FunctionCallOutput) { + let query = call.queries.first().cloned().unwrap_or_default(); + let arguments = serde_json::json!({ "query": query }).to_string(); + let output_text = call.replay_content.clone().unwrap_or_default(); + let function_call = FunctionToolCall { + type_: FunctionToolCallType::FunctionCall, + id: call.id.clone(), + call_id: call.id.clone(), + name: FileSearchToolArguments::FUNCTION_NAME.to_string(), + arguments, + status: None, + }; + let output = FunctionCallOutput { + type_: FunctionCallOutputType::FunctionCallOutput, + id: None, + call_id: call.id.clone(), + output: output_text, + status: None, + }; + (function_call, output) +} + /// Check if a payload contains any file_search tools. #[allow(dead_code)] // Utility for future use pub fn has_file_search_tools(payload: &CreateResponsesPayload) -> bool { @@ -955,12 +1017,16 @@ fn should_include_results(payload: &CreateResponsesPayload) -> bool { /// /// This creates the output item that OpenAI returns when the model invokes /// the file_search tool. When `include_results` is true, the detailed search -/// results are included in the response. +/// results are included in the response. `replay_content` — the formatted +/// retrieval text fed to the model — is always retained (independent of +/// `include_results`) so the call can be replayed on a later turn (see +/// [`rewrite_file_search_history`]). fn build_file_search_call_output( tool_call_id: &str, query: &str, response: &FileSearchResponse, include_results: bool, + replay_content: &str, ) -> FileSearchCallOutput { let results = if include_results { Some( @@ -979,9 +1045,7 @@ fn build_file_search_call_output( filename: r.filename.clone().unwrap_or_else(|| "unknown".to_string()), score: r.score, attributes, - content: vec![FileSearchResultContent::Text { - text: r.content.clone(), - }], + text: r.content.clone(), } }) .collect(), @@ -996,6 +1060,7 @@ fn build_file_search_call_output( queries: vec![query.to_string()], status: WebSearchStatus::Completed, results, + replay_content: Some(replay_content.to_string()), } } @@ -1030,12 +1095,17 @@ fn synthesize_file_search_invalid_handle( error: &str, ) -> crate::services::server_tools::ToolExecutionHandle { let id = call_id.to_string(); + let error_text = crate::services::server_tools::invalid_arguments_text("file_search", error); + // The arguments couldn't be parsed, so there's no query to record; keep the + // error as `replay_content` so a later-turn replay surfaces the same failure + // rather than an empty retrieval. let failed_item = FileSearchCallOutput { type_: FileSearchCallOutputType::FileSearchCall, id: id.clone(), queries: Vec::new(), status: WebSearchStatus::Failed, results: None, + replay_content: Some(error_text.clone()), }; let events = vec![ format_file_search_in_progress_event(&id, 0), @@ -1046,7 +1116,7 @@ fn synthesize_file_search_invalid_handle( type_: FunctionCallOutputType::FunctionCallOutput, id: Some(id.clone()), call_id: id.clone(), - output: crate::services::server_tools::invalid_arguments_text("file_search", error), + output: error_text, status: None, }); let result = crate::services::server_tools::ToolCallResult { @@ -1621,12 +1691,15 @@ impl crate::services::server_tools::ServerExecutedTool for FileSearchExecutor { ); } - // Emit the file_search_call output_item.done event. + // Emit the file_search_call output_item.done event. `search_result.content` + // is the formatted retrieval text fed to the model below; retain it as + // `replay_content` so the call replays on a later turn. let call_output = build_file_search_call_output( &tool_call.id, &tool_call.query, raw, include_results, + &search_result.content, ); let _ = event_tx .send(format_file_search_call_sse_event(&call_output)) @@ -2236,12 +2309,15 @@ mod tests { vector_stores_searched: 1, }; - let output = build_file_search_call_output("call_123", "test query", &response, false); + let output = + build_file_search_call_output("call_123", "test query", &response, false, "formatted"); assert_eq!(output.id, "call_123"); assert_eq!(output.queries, vec!["test query"]); assert_eq!(output.status, WebSearchStatus::Completed); assert!(output.results.is_none()); // Results not included + // replay_content is retained even when results are not included. + assert_eq!(output.replay_content.as_deref(), Some("formatted")); } #[test] @@ -2264,7 +2340,8 @@ mod tests { vector_stores_searched: 1, }; - let output = build_file_search_call_output("call_456", "test query", &response, true); + let output = + build_file_search_call_output("call_456", "test query", &response, true, "formatted"); assert_eq!(output.id, "call_456"); assert_eq!(output.queries, vec!["test query"]); @@ -2277,7 +2354,75 @@ mod tests { assert_eq!(results[0].filename, "test.pdf"); assert_eq!(results[0].score, 0.85); assert!(results[0].attributes.is_some()); - assert_eq!(results[0].content.len(), 1); + // Flat `text` per OpenAI's Responses API schema (not a `content` array). + assert_eq!(results[0].text, "Test content"); + + // Lock the spec-shaped wire form: `text` is a flat string and there is + // no `content` array. + let wire = serde_json::to_value(&results[0]).unwrap(); + assert_eq!(wire["text"], "Test content"); + assert!( + wire.get("content").is_none(), + "no `content` array on results" + ); + } + + #[test] + fn test_rewrite_file_search_history_expands_to_function_pair() { + // Continuation turn: a synthesized file_search_call comes back between two + // user messages and must expand to a function_call + function_call_output + // pair, replaying the retained chunk text — even with no tools re-declared + // (the rewrite must run before the tools early-return). + let mut payload: CreateResponsesPayload = serde_json::from_value(serde_json::json!({ + "input": [ + {"role": "user", "content": "find the policy"}, + {"role": "user", "content": "and the appendix?"}, + ], + "stream": false, + })) + .unwrap(); + let file_search_call = ResponsesInputItem::FileSearchCall(FileSearchCallOutput { + type_: FileSearchCallOutputType::FileSearchCall, + id: "fs_1".to_string(), + queries: vec!["policy".to_string()], + status: WebSearchStatus::Completed, + results: None, + replay_content: Some("Retrieved: the policy says...".to_string()), + }); + let Some(ResponsesInput::Items(items)) = payload.input.as_mut() else { + panic!("expected items input"); + }; + items.insert(1, file_search_call); + assert_eq!(items.len(), 3); + + assert!(payload.tools.is_none()); + preprocess_file_search_tools(&mut payload); + + let Some(ResponsesInput::Items(items)) = payload.input else { + panic!("expected items input"); + }; + // The file_search_call expands to a pair, so 2 user messages + 2 = 4. + assert_eq!(items.len(), 4); + assert!( + !items + .iter() + .any(|i| matches!(i, ResponsesInputItem::FileSearchCall(_))), + "no file_search_call items should remain" + ); + let ResponsesInputItem::FunctionCall(ref fc) = items[1] else { + panic!("expected a function_call at index 1, got {:?}", items[1]); + }; + assert_eq!(fc.name, "file_search"); + assert_eq!(fc.call_id, "fs_1"); + assert!(fc.arguments.contains("policy")); + let ResponsesInputItem::FunctionCallOutput(ref out) = items[2] else { + panic!( + "expected a function_call_output at index 2, got {:?}", + items[2] + ); + }; + assert_eq!(out.call_id, "fs_1"); + assert_eq!(out.output, "Retrieved: the policy says..."); } #[test] @@ -2292,6 +2437,7 @@ mod tests { queries: vec!["test query".to_string()], status: WebSearchStatus::Completed, results: None, + replay_content: Some("formatted".to_string()), }; let sse_event = format_file_search_call_sse_event(&output); diff --git a/src/services/mcp/preprocess.rs b/src/services/mcp/preprocess.rs index 6d3b0f5..2cd77d9 100644 --- a/src/services/mcp/preprocess.rs +++ b/src/services/mcp/preprocess.rs @@ -33,7 +33,10 @@ use crate::{ ResponsesInputItem, ResponsesMcpToolChoice, ResponsesNamedToolChoice, ResponsesNamedToolChoiceType, ResponsesToolChoice, ResponsesToolDefinition, }, - services::{mcp::tool_search::TOOL_SEARCH_FUNCTION_NAME, mcp_tool::McpProviderKind}, + services::{ + mcp::tool_search::TOOL_SEARCH_FUNCTION_NAME, mcp_tool::McpProviderKind, + server_tool_history::rewrite_hosted_calls_to_function_pairs, + }, }; /// Failures surfaced by [`rewrite_mcp_tools`]. `ListToolsFailed` maps @@ -401,27 +404,10 @@ fn collect_inlined_catalogs( /// content. `mcp_approval_request` / `mcp_approval_response` items are /// also left alone — pending approvals are resolved by [`super::resume`]. fn rewrite_mcp_history(payload: &mut CreateResponsesPayload) { - let Some(ResponsesInput::Items(items)) = payload.input.as_mut() else { - return; - }; - if !items - .iter() - .any(|i| matches!(i, ResponsesInputItem::McpCall(_))) - { - return; - } - let mut rewritten = Vec::with_capacity(items.len() + 1); - for item in std::mem::take(items) { - match item { - ResponsesInputItem::McpCall(call) => { - let (function_call, output) = mcp_call_to_function_pair(call); - rewritten.push(ResponsesInputItem::FunctionCall(function_call)); - rewritten.push(ResponsesInputItem::FunctionCallOutput(output)); - } - other => rewritten.push(other), - } - } - *items = rewritten; + rewrite_hosted_calls_to_function_pairs(payload, |item| match item { + ResponsesInputItem::McpCall(call) => Some(mcp_call_to_function_pair(call)), + _ => None, + }); } /// Reconstruct the `(function_call, function_call_output)` pair for one @@ -431,12 +417,11 @@ fn rewrite_mcp_history(payload: &mut CreateResponsesPayload) { /// executor's live-loop continuation: `output` verbatim on success, /// `{"error": …}` on failure (the executor stores at most one of /// `output` / `error`). -fn mcp_call_to_function_pair(call: McpCallItem) -> (FunctionToolCall, FunctionCallOutput) { +fn mcp_call_to_function_pair(call: &McpCallItem) -> (FunctionToolCall, FunctionCallOutput) { let function_name = synthesize_function_name(&call.server_label, &call.name); // Reuse the item id as the pairing token — it's already unique per // response and never collides with a live `function_call.call_id` // (those are suppressed before the client ever sees them). - let call_id = call.id.clone(); let output_text = match (&call.output, &call.error) { (_, Some(err)) => serde_json::json!({ "error": err }).to_string(), (Some(out), None) => out.clone(), @@ -444,16 +429,16 @@ fn mcp_call_to_function_pair(call: McpCallItem) -> (FunctionToolCall, FunctionCa }; let function_call = FunctionToolCall { type_: FunctionToolCallType::FunctionCall, - id: call.id, - call_id: call_id.clone(), + id: call.id.clone(), + call_id: call.id.clone(), name: function_name, - arguments: call.arguments, + arguments: call.arguments.clone(), status: None, }; let output = FunctionCallOutput { type_: FunctionCallOutputType::FunctionCallOutput, id: None, - call_id, + call_id: call.id.clone(), output: output_text, status: None, }; diff --git a/src/services/mod.rs b/src/services/mod.rs index b89819b..3469f8c 100644 --- a/src/services/mod.rs +++ b/src/services/mod.rs @@ -59,6 +59,7 @@ pub mod responses_webhook; mod scim_configs; #[cfg(feature = "sso")] mod scim_provisioning; +pub mod server_tool_history; #[cfg(not(target_arch = "wasm32"))] pub mod server_tools; mod service_accounts; diff --git a/src/services/responses_chain.rs b/src/services/responses_chain.rs index 4269e81..6fcf5bd 100644 --- a/src/services/responses_chain.rs +++ b/src/services/responses_chain.rs @@ -77,13 +77,15 @@ fn input_to_items(input: ResponsesInput) -> Vec { /// replayed as conversation history. The inner payloads are identical between /// the two enums, so this is a total, lossless 1:1 mapping. /// -/// Note the hosted-shell items (`ShellCall` / `ShellCallOutput`) are replayed -/// verbatim here — the array-`output` shape they carry is only valid for -/// native OpenAI passthrough. In function mode `preprocess_shell_tools` -/// (`services/shell_tool.rs`, run per provider in `routes/execution.rs`) -/// rewrites them to `function_call` / `function_call_output` before dispatch, -/// since that's the mode-aware layer that knows whether the shell tool stayed -/// native or was rewritten to a function. +/// Note the hosted server-tool items (`ShellCall` / `ShellCallOutput`, +/// `WebSearchCall`, `FileSearchCall`, `McpCall`, …) are replayed verbatim here — +/// the per-provider preprocess layer in `routes/execution.rs` is what normalizes +/// them before dispatch, since that's the mode-aware layer that knows whether +/// each tool stayed native or was rewritten to a function. `preprocess_shell_tools` +/// (`services/shell_tool.rs`) rewrites the two-item shell history in place, while +/// `web_search`, `file_search`, and MCP share +/// `server_tool_history::rewrite_hosted_calls_to_function_pairs` to expand their +/// single hosted item into a `function_call` / `function_call_output` pair there. fn output_item_to_input(item: ResponsesOutputItem) -> ResponsesInputItem { match item { ResponsesOutputItem::Message(m) => ResponsesInputItem::OutputMessage(m), @@ -285,6 +287,40 @@ mod tests { )); } + #[test] + fn web_search_call_output_replays_with_action_and_content() { + // A stored `web_search_call` output item must round-trip through + // reconstruction as a `WebSearchCall` *input* item carrying its + // `action` (query + sources) and the Hadrian `replay_content`, so the + // per-provider preprocess can rebuild the function-call pair. Guards the + // untagged-enum deserialization against the added optional fields. + let output = json!([{ + "type": "web_search_call", + "id": "ws_1", + "status": "completed", + "action": { + "type": "search", + "query": "rust 2024", + "sources": [{"type": "url", "url": "https://example.com"}] + }, + "replay_content": "Web search results for \"rust 2024\"" + }]); + let r = record("resp_1", None, json!("hi"), output); + let mut items = Vec::new(); + record_to_items(&r, &mut items).expect("valid record"); + // user "hi" then the web_search_call + assert_eq!(items.len(), 2); + assert!( + matches!(items[1], ResponsesInputItem::WebSearchCall(_)), + "must deserialize as the WebSearchCall variant" + ); + let ws = serde_json::to_value(&items[1]).unwrap(); + assert_eq!(ws["type"], "web_search_call"); + assert_eq!(ws["action"]["query"], "rust 2024"); + assert_eq!(ws["action"]["sources"][0]["url"], "https://example.com"); + assert_eq!(ws["replay_content"], "Web search results for \"rust 2024\""); + } + #[test] fn parent_link_is_followed() { let root = record("resp_1", None, json!("Hi?"), assistant_output("Hello!")); diff --git a/src/services/server_tool_history.rs b/src/services/server_tool_history.rs new file mode 100644 index 0000000..1f1d884 --- /dev/null +++ b/src/services/server_tool_history.rs @@ -0,0 +1,152 @@ +//! Shared history-rewriting for server-executed tools. +//! +//! Hadrian self-executes its server tools by rewriting each to a function tool +//! (`web_search` / `file_search` / `mcp_