From 37c1dd18786da53e22df56e63681968c7ebe3532 Mon Sep 17 00:00:00 2001 From: Roy Han Date: Sun, 14 Jun 2026 17:42:15 -0700 Subject: [PATCH 1/4] Add internal auto-compaction opt-out --- codex-rs/core/config.schema.json | 8 +- codex-rs/core/src/session/turn.rs | 9 +- codex-rs/core/tests/suite/compact.rs | 141 +++++++++++++++++++++++++++ codex-rs/features/src/lib.rs | 8 ++ 4 files changed, 164 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 31daf2feb428..4a0ddc9efdd9 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -425,6 +425,9 @@ "auth_elicitation": { "type": "boolean" }, + "auto_compaction": { + "type": "boolean" + }, "browser_use": { "type": "boolean" }, @@ -4580,6 +4583,9 @@ "auth_elicitation": { "type": "boolean" }, + "auto_compaction": { + "type": "boolean" + }, "browser_use": { "type": "boolean" }, @@ -5274,4 +5280,4 @@ }, "title": "ConfigToml", "type": "object" -} +} \ No newline at end of file diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index 5857d8f32034..9429455a1b16 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -299,7 +299,10 @@ pub(crate) async fn run_turn( } // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop. - if token_limit_reached && needs_follow_up { + if turn_context.features.enabled(Feature::AutoCompaction) + && token_limit_reached + && needs_follow_up + { if let Err(err) = run_auto_compact( &sess, &turn_context, @@ -785,6 +788,10 @@ async fn run_pre_sampling_compact( turn_context: &Arc, client_session: &mut ModelClientSession, ) -> CodexResult<()> { + if !turn_context.features.enabled(Feature::AutoCompaction) { + return Ok(()); + } + maybe_run_previous_model_inline_compact(sess, turn_context, client_session).await?; let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await; // Compact if the configured auto-compaction budget or usable context window is exhausted. diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 1d1d387f2c88..1735dbe98f1a 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -3671,6 +3671,76 @@ async fn snapshot_request_shape_mid_turn_continuation_compaction() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compaction_feature_disabled_skips_mid_turn_compaction() { + skip_if_no_network!(); + + let server = start_mock_server().await; + let context_window = 100; + let over_limit_tokens = context_window * 95 / 100 + 1; + let first_turn = sse(vec![ + ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"), + ev_completed_with_tokens("r1", over_limit_tokens), + ]); + let context_window_error = sse_failed( + "response-failed", + "context_length_exceeded", + CONTEXT_LIMIT_MESSAGE, + ); + let request_log = mount_sse_sequence(&server, vec![first_turn, context_window_error]).await; + + let mut model_provider = non_openai_model_provider(&server); + model_provider.stream_max_retries = Some(0); + let codex = test_codex() + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + config.model_context_window = Some(context_window); + let _ = config.features.disable(Feature::AutoCompaction); + }) + .build(&server) + .await + .expect("build codex") + .codex; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: FUNCTION_CALL_LIMIT_MSG.into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + responsesapi_client_metadata: None, + additional_context: Default::default(), + thread_settings: Default::default(), + }) + .await + .expect("submit user input"); + + let error_message = wait_for_event_match(&codex, |event| match event { + EventMsg::Error(err) => Some(err.message.clone()), + _ => None, + }) + .await; + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + + let requests = request_log.requests(); + assert_eq!(requests.len(), 2); + let continuation_request = &requests[1]; + continuation_request.function_call_output(DUMMY_CALL_ID); + assert!( + !body_contains_text( + &continuation_request.body_json().to_string(), + SUMMARIZATION_PROMPT + ), + "disabled auto-compaction should continue without a compaction request" + ); + assert!( + error_message.contains("ran out of room in the model's context window"), + "expected context window exceeded message, got {error_message}" + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn auto_compact_clamps_config_limit_to_context_window() { skip_if_no_network!(); @@ -4498,6 +4568,77 @@ async fn snapshot_request_shape_pre_turn_compaction_context_window_exceeded() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compaction_feature_disabled_skips_pre_turn_compaction() { + skip_if_no_network!(); + + let server = start_mock_server().await; + let first_turn = sse(vec![ + ev_assistant_message("m1", FIRST_REPLY), + ev_completed_with_tokens("r1", /*total_tokens*/ 500), + ]); + let context_window_error = sse_failed( + "response-failed", + "context_length_exceeded", + CONTEXT_LIMIT_MESSAGE, + ); + let request_log = mount_sse_sequence(&server, vec![first_turn, context_window_error]).await; + + let mut model_provider = non_openai_model_provider(&server); + model_provider.stream_max_retries = Some(0); + let codex = test_codex() + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + config.model_auto_compact_token_limit = Some(200); + let _ = config.features.disable(Feature::AutoCompaction); + }) + .build(&server) + .await + .expect("build codex") + .codex; + + for user in ["USER_ONE", "USER_TWO"] { + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: user.to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + responsesapi_client_metadata: None, + additional_context: Default::default(), + thread_settings: Default::default(), + }) + .await + .expect("submit user input"); + + if user == "USER_ONE" { + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + } + } + + let error_message = wait_for_event_match(&codex, |event| match event { + EventMsg::Error(err) => Some(err.message.clone()), + _ => None, + }) + .await; + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + + let requests = request_log.requests(); + assert_eq!(requests.len(), 2); + let second_request_body = requests[1].body_json().to_string(); + assert!(second_request_body.contains("USER_TWO")); + assert!( + !body_contains_text(&second_request_body, SUMMARIZATION_PROMPT), + "disabled auto-compaction should sample without a pre-turn compaction request" + ); + assert!( + error_message.contains("ran out of room in the model's context window"), + "expected context window exceeded message, got {error_message}" + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn snapshot_request_shape_manual_compact_without_previous_user_messages() { skip_if_no_network!(); diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index c907849cb243..c24867d176f9 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -215,6 +215,8 @@ pub enum Feature { RealtimeConversation, /// Prevent idle system sleep while a turn is actively running. PreventIdleSleep, + /// Enable automatic context compaction before or during a turn. + AutoCompaction, /// Enable remote compaction v2 over the normal Responses API. RemoteCompactionV2, /// Enable workspace dependency support. @@ -1254,6 +1256,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Removed, default_enabled: false, }, + FeatureSpec { + id: Feature::AutoCompaction, + key: "auto_compaction", + stage: Stage::Stable, + default_enabled: true, + }, FeatureSpec { id: Feature::RemoteCompactionV2, key: "remote_compaction_v2", From 3138ea21a7cd14826539c8d3d4d6090b01f16ba5 Mon Sep 17 00:00:00 2001 From: rhan-oai Date: Mon, 15 Jun 2026 09:13:07 -0700 Subject: [PATCH 2/4] lint --- codex-rs/core/config.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 4a0ddc9efdd9..25e405b0a2c0 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -5280,4 +5280,4 @@ }, "title": "ConfigToml", "type": "object" -} \ No newline at end of file +} From df364c3f83035cacb027447b45cc481d0b4ca55e Mon Sep 17 00:00:00 2001 From: Roy Han Date: Mon, 15 Jun 2026 09:22:03 -0700 Subject: [PATCH 3/4] Condense auto-compaction tests --- codex-rs/core/tests/suite/compact.rs | 170 ++++++++++++--------------- 1 file changed, 75 insertions(+), 95 deletions(-) diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 1735dbe98f1a..75906af303bc 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -1,6 +1,7 @@ #![allow(clippy::expect_used)] use anyhow::Result; use anyhow::anyhow; +use codex_core::CodexThread; use codex_core::compact::SUMMARIZATION_PROMPT; use codex_core::compact::SUMMARY_PREFIX; use codex_core::config::Config; @@ -35,6 +36,7 @@ use core_test_support::responses; use core_test_support::responses::ev_reasoning_item; use core_test_support::responses::mount_models_once; use core_test_support::skip_if_no_network; +use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::local_selections; use core_test_support::test_codex::test_codex; use core_test_support::test_codex::turn_permission_fields; @@ -92,6 +94,48 @@ const REMOTE_V2_SUMMARY: &str = "global-instructions-remote-v2-summary"; pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long threads and multiple compactions can cause the model to be less accurate. Start a new thread when possible to keep threads small and targeted."; +async fn build_auto_compaction_disabled_codex(server: &MockServer) -> TestCodex { + let mut model_provider = non_openai_model_provider(server); + model_provider.stream_max_retries = Some(0); + test_codex() + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + config.model_context_window = Some(100); + config.model_auto_compact_token_limit = Some(90); + let _ = config.features.disable(Feature::AutoCompaction); + }) + .build(server) + .await + .expect("build codex") +} + +async fn submit_context_window_exceeded_turn(codex: &Arc, text: &str) { + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: text.to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + responsesapi_client_metadata: None, + additional_context: Default::default(), + thread_settings: Default::default(), + }) + .await + .expect("submit context window exceeded turn"); + let error_message = wait_for_event_match(codex, |event| match event { + EventMsg::Error(err) => Some(err.message.clone()), + _ => None, + }) + .await; + wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + assert!( + error_message.contains("ran out of room in the model's context window"), + "expected context window exceeded message, got {error_message}" + ); +} + fn ev_shell_command_call(call_id: &str, command: &str) -> serde_json::Value { ev_function_call( call_id, @@ -3676,53 +3720,26 @@ async fn auto_compaction_feature_disabled_skips_mid_turn_compaction() { skip_if_no_network!(); let server = start_mock_server().await; - let context_window = 100; - let over_limit_tokens = context_window * 95 / 100 + 1; + let over_limit_tokens = 100 * 95 / 100 + 1; let first_turn = sse(vec![ ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"), ev_completed_with_tokens("r1", over_limit_tokens), ]); - let context_window_error = sse_failed( - "response-failed", - "context_length_exceeded", - CONTEXT_LIMIT_MESSAGE, - ); - let request_log = mount_sse_sequence(&server, vec![first_turn, context_window_error]).await; - - let mut model_provider = non_openai_model_provider(&server); - model_provider.stream_max_retries = Some(0); - let codex = test_codex() - .with_config(move |config| { - config.model_provider = model_provider; - set_test_compact_prompt(config); - config.model_context_window = Some(context_window); - let _ = config.features.disable(Feature::AutoCompaction); - }) - .build(&server) - .await - .expect("build codex") - .codex; - - codex - .submit(Op::UserInput { - items: vec![UserInput::Text { - text: FUNCTION_CALL_LIMIT_MSG.into(), - text_elements: Vec::new(), - }], - final_output_json_schema: None, - responsesapi_client_metadata: None, - additional_context: Default::default(), - thread_settings: Default::default(), - }) - .await - .expect("submit user input"); - - let error_message = wait_for_event_match(&codex, |event| match event { - EventMsg::Error(err) => Some(err.message.clone()), - _ => None, - }) + let request_log = mount_sse_sequence( + &server, + vec![ + first_turn, + sse_failed( + "response-failed", + "context_length_exceeded", + CONTEXT_LIMIT_MESSAGE, + ), + ], + ) .await; - wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + let test = build_auto_compaction_disabled_codex(&server).await; + + submit_context_window_exceeded_turn(&test.codex, FUNCTION_CALL_LIMIT_MSG).await; let requests = request_log.requests(); assert_eq!(requests.len(), 2); @@ -3735,10 +3752,6 @@ async fn auto_compaction_feature_disabled_skips_mid_turn_compaction() { ), "disabled auto-compaction should continue without a compaction request" ); - assert!( - error_message.contains("ran out of room in the model's context window"), - "expected context window exceeded message, got {error_message}" - ); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -4577,53 +4590,24 @@ async fn auto_compaction_feature_disabled_skips_pre_turn_compaction() { ev_assistant_message("m1", FIRST_REPLY), ev_completed_with_tokens("r1", /*total_tokens*/ 500), ]); - let context_window_error = sse_failed( - "response-failed", - "context_length_exceeded", - CONTEXT_LIMIT_MESSAGE, - ); - let request_log = mount_sse_sequence(&server, vec![first_turn, context_window_error]).await; + let request_log = mount_sse_sequence( + &server, + vec![ + first_turn, + sse_failed( + "response-failed", + "context_length_exceeded", + CONTEXT_LIMIT_MESSAGE, + ), + ], + ) + .await; + let test = build_auto_compaction_disabled_codex(&server).await; - let mut model_provider = non_openai_model_provider(&server); - model_provider.stream_max_retries = Some(0); - let codex = test_codex() - .with_config(move |config| { - config.model_provider = model_provider; - set_test_compact_prompt(config); - config.model_auto_compact_token_limit = Some(200); - let _ = config.features.disable(Feature::AutoCompaction); - }) - .build(&server) + test.submit_turn("USER_ONE") .await - .expect("build codex") - .codex; - - for user in ["USER_ONE", "USER_TWO"] { - codex - .submit(Op::UserInput { - items: vec![UserInput::Text { - text: user.to_string(), - text_elements: Vec::new(), - }], - final_output_json_schema: None, - responsesapi_client_metadata: None, - additional_context: Default::default(), - thread_settings: Default::default(), - }) - .await - .expect("submit user input"); - - if user == "USER_ONE" { - wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; - } - } - - let error_message = wait_for_event_match(&codex, |event| match event { - EventMsg::Error(err) => Some(err.message.clone()), - _ => None, - }) - .await; - wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + .expect("submit first turn"); + submit_context_window_exceeded_turn(&test.codex, "USER_TWO").await; let requests = request_log.requests(); assert_eq!(requests.len(), 2); @@ -4633,10 +4617,6 @@ async fn auto_compaction_feature_disabled_skips_pre_turn_compaction() { !body_contains_text(&second_request_body, SUMMARIZATION_PROMPT), "disabled auto-compaction should sample without a pre-turn compaction request" ); - assert!( - error_message.contains("ran out of room in the model's context window"), - "expected context window exceeded message, got {error_message}" - ); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] From ef97981028b5f8264a0007e7a7c821b7638a668d Mon Sep 17 00:00:00 2001 From: Roy Han Date: Sun, 21 Jun 2026 19:36:58 -0700 Subject: [PATCH 4/4] Honor auto-compaction opt-out for new context --- codex-rs/core/src/tools/spec_plan.rs | 4 +- codex-rs/core/tests/suite/compact.rs | 87 +++++++++++++++++++++++ codex-rs/core/tests/suite/token_budget.rs | 48 +++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) diff --git a/codex-rs/core/src/tools/spec_plan.rs b/codex-rs/core/src/tools/spec_plan.rs index 76c1e4389c84..adca9018cf21 100644 --- a/codex-rs/core/src/tools/spec_plan.rs +++ b/codex-rs/core/src/tools/spec_plan.rs @@ -656,7 +656,9 @@ fn add_core_utility_tools(context: &CoreToolPlanContext<'_>, planned_tools: &mut } if features.enabled(Feature::TokenBudget) { - planned_tools.add_with_exposure(NewContextWindowHandler, ToolExposure::DirectModelOnly); + if features.enabled(Feature::AutoCompaction) { + planned_tools.add_with_exposure(NewContextWindowHandler, ToolExposure::DirectModelOnly); + } planned_tools.add(GetContextRemainingHandler); } diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 75906af303bc..ad000fa8b11d 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -2327,6 +2327,93 @@ async fn pre_sampling_compact_runs_when_comp_hash_changes() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compaction_feature_disabled_skips_comp_hash_model_switch_compaction() { + skip_if_no_network!(); + + let server = MockServer::start().await; + let previous_model = "gpt-5.3-codex"; + let next_model = "gpt-5.2"; + + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: vec![ + model_info_with_optional_comp_hash(previous_model, Some("hash-a")), + model_info_with_optional_comp_hash(next_model, Some("hash-b")), + ], + }, + ) + .await; + let request_log = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_assistant_message("m1", "before switch"), + ev_completed_with_tokens("r1", /*total_tokens*/ 100), + ]), + sse(vec![ + ev_assistant_message("m2", "after switch"), + ev_completed_with_tokens("r2", /*total_tokens*/ 100), + ]), + ], + ) + .await; + let model_provider = non_openai_model_provider(&server); + let mut builder = test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + .with_model(previous_model) + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + let _ = config.features.disable(Feature::AutoCompaction); + }); + let test = builder.build(&server).await.expect("build test codex"); + + test.codex + .submit(disabled_permission_user_turn( + "before switch", + test.cwd.path().to_path_buf(), + previous_model.to_string(), + )) + .await + .expect("submit first user turn"); + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + test.codex + .submit(disabled_permission_user_turn( + "after switch", + test.cwd.path().to_path_buf(), + next_model.to_string(), + )) + .await + .expect("submit second user turn"); + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + let requests = request_log.requests(); + assert_eq!(models_mock.requests().len(), 1); + assert_eq!( + requests.len(), + 2, + "disabled auto-compaction should skip compaction on a comp-hash model switch" + ); + let first = requests[0].body_json(); + let second = requests[1].body_json(); + assert_eq!(first["model"].as_str(), Some(previous_model)); + assert_eq!(second["model"].as_str(), Some(next_model)); + assert!(second.to_string().contains("before switch")); + assert!(second.to_string().contains("after switch")); + assert!( + !body_contains_text(&second.to_string(), SUMMARIZATION_PROMPT), + "disabled auto-compaction should preserve history instead of requesting a summary" + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn pre_sampling_compact_skips_when_either_comp_hash_is_missing() { skip_if_no_network!(); diff --git a/codex-rs/core/tests/suite/token_budget.rs b/codex-rs/core/tests/suite/token_budget.rs index c64251e24aa2..28ba9f260df7 100644 --- a/codex-rs/core/tests/suite/token_budget.rs +++ b/codex-rs/core/tests/suite/token_budget.rs @@ -456,3 +456,51 @@ async fn new_context_tool_starts_new_window_before_follow_up() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compaction_feature_disabled_hides_new_context_tool() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let responses = mount_sse_sequence( + &server, + vec![sse(vec![ + ev_response_created("resp-1"), + ev_completed("resp-1"), + ])], + ) + .await; + let test = test_codex() + .with_config(|config| { + config.model_context_window = Some(CONFIGURED_CONTEXT_WINDOW); + config + .features + .enable(Feature::TokenBudget) + .expect("test config should allow token budget"); + config + .features + .disable(Feature::AutoCompaction) + .expect("test config should allow disabling auto-compaction"); + }) + .build(&server) + .await?; + + test.submit_turn("preserve the current context window") + .await?; + + let requests = responses.requests(); + assert_eq!(requests.len(), 1); + let tool_names = tool_names(&requests[0]); + assert!( + tool_names + .iter() + .any(|name| name == "get_context_remaining"), + "token budget should continue to expose get_context_remaining" + ); + assert!( + !tool_names.iter().any(|name| name == "new_context"), + "disabled auto-compaction should hide new_context" + ); + + Ok(()) +}