diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 65dad461fac..a32ff0231db 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2366,10 +2366,6 @@ mod tests { use crate::state::TaskKind; use crate::tasks::SessionTask; use crate::tasks::SessionTaskContext; - use crate::tools::MODEL_FORMAT_HEAD_LINES; - use crate::tools::MODEL_FORMAT_MAX_BYTES; - use crate::tools::MODEL_FORMAT_MAX_LINES; - use crate::tools::MODEL_FORMAT_TAIL_LINES; use crate::tools::ToolRouter; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; @@ -2456,95 +2452,6 @@ mod tests { assert_eq!(expected, got); } - #[test] - fn model_truncation_head_tail_by_lines() { - // Build 400 short lines so line-count limit, not byte budget, triggers truncation - let lines: Vec = (1..=400).map(|i| format!("line{i}")).collect(); - let full = lines.join("\n"); - - let exec = ExecToolCallOutput { - exit_code: 0, - stdout: StreamOutput::new(String::new()), - stderr: StreamOutput::new(String::new()), - aggregated_output: StreamOutput::new(full), - duration: StdDuration::from_secs(1), - timed_out: false, - }; - - let out = format_exec_output_str(&exec); - - // Strip truncation header if present for subsequent assertions - let body = out - .strip_prefix("Total output lines: ") - .and_then(|rest| rest.split_once("\n\n").map(|x| x.1)) - .unwrap_or(out.as_str()); - - // Expect elision marker with correct counts - let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144 - let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n"); - assert!(out.contains(&marker), "missing marker: {out}"); - - // Validate head and tail - let parts: Vec<&str> = body.split(&marker).collect(); - assert_eq!(parts.len(), 2, "expected one marker split"); - let head = parts[0]; - let tail = parts[1]; - - let expected_head: String = (1..=MODEL_FORMAT_HEAD_LINES) - .map(|i| format!("line{i}")) - .collect::>() - .join("\n"); - assert!(head.starts_with(&expected_head), "head mismatch"); - - let expected_tail: String = ((400 - MODEL_FORMAT_TAIL_LINES + 1)..=400) - .map(|i| format!("line{i}")) - .collect::>() - .join("\n"); - assert!(tail.ends_with(&expected_tail), "tail mismatch"); - } - - #[test] - fn model_truncation_respects_byte_budget() { - // Construct a large output (about 100kB) so byte budget dominates - let big_line = "x".repeat(100); - let full = std::iter::repeat_n(big_line, 1000) - .collect::>() - .join("\n"); - - let exec = ExecToolCallOutput { - exit_code: 0, - stdout: StreamOutput::new(String::new()), - stderr: StreamOutput::new(String::new()), - aggregated_output: StreamOutput::new(full.clone()), - duration: StdDuration::from_secs(1), - timed_out: false, - }; - - let out = format_exec_output_str(&exec); - // Keep strict budget on the truncated body (excluding header) - let body = out - .strip_prefix("Total output lines: ") - .and_then(|rest| rest.split_once("\n\n").map(|x| x.1)) - .unwrap_or(out.as_str()); - assert!(body.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget"); - assert!(out.contains("omitted"), "should contain elision marker"); - - // Ensure head and tail are drawn from the original - assert!(full.starts_with(body.chars().take(8).collect::().as_str())); - assert!( - full.ends_with( - body.chars() - .rev() - .take(8) - .collect::() - .chars() - .rev() - .collect::() - .as_str() - ) - ); - } - #[test] fn includes_timed_out_message() { let exec = ExecToolCallOutput { diff --git a/codex-rs/core/src/conversation_history.rs b/codex-rs/core/src/conversation_history.rs index 08fa8cebede..f230a7979bc 100644 --- a/codex-rs/core/src/conversation_history.rs +++ b/codex-rs/core/src/conversation_history.rs @@ -2,9 +2,18 @@ use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::TokenUsage; use codex_protocol::protocol::TokenUsageInfo; +use codex_utils_string::take_bytes_at_char_boundary; +use codex_utils_string::take_last_bytes_at_char_boundary; use std::ops::Deref; use tracing::error; +// Model-formatting limits: clients get full streams; only content sent to the model is truncated. +pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB +pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines +pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2; +pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128 +pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2; + /// Transcript of conversation history #[derive(Debug, Clone, Default)] pub(crate) struct ConversationHistory { @@ -47,7 +56,8 @@ impl ConversationHistory { continue; } - self.items.push(item.clone()); + let processed = Self::process_item(&item); + self.items.push(processed); } } @@ -68,6 +78,22 @@ impl ConversationHistory { } } + pub(crate) fn replace(&mut self, items: Vec) { + self.items = items; + } + + pub(crate) fn update_token_info( + &mut self, + usage: &TokenUsage, + model_context_window: Option, + ) { + self.token_info = TokenUsageInfo::new_or_append( + &self.token_info, + &Some(usage.clone()), + model_context_window, + ); + } + /// This function enforces a couple of invariants on the in-memory history: /// 1. every call (function/custom) has a corresponding output entry /// 2. every output has a corresponding call entry @@ -253,10 +279,6 @@ impl ConversationHistory { } } - pub(crate) fn replace(&mut self, items: Vec) { - self.items = items; - } - /// Removes the corresponding paired item for the provided `item`, if any. /// /// Pairs: @@ -326,17 +348,106 @@ impl ConversationHistory { } } - pub(crate) fn update_token_info( - &mut self, - usage: &TokenUsage, - model_context_window: Option, - ) { - self.token_info = TokenUsageInfo::new_or_append( - &self.token_info, - &Some(usage.clone()), - model_context_window, - ); + fn process_item(item: &ResponseItem) -> ResponseItem { + match item { + ResponseItem::FunctionCallOutput { call_id, output } => { + let truncated = format_output_for_model_body(output.content.as_str()); + ResponseItem::FunctionCallOutput { + call_id: call_id.clone(), + output: FunctionCallOutputPayload { + content: truncated, + success: output.success, + }, + } + } + ResponseItem::CustomToolCallOutput { call_id, output } => { + let truncated = format_output_for_model_body(output); + ResponseItem::CustomToolCallOutput { + call_id: call_id.clone(), + output: truncated, + } + } + ResponseItem::Message { .. } + | ResponseItem::Reasoning { .. } + | ResponseItem::LocalShellCall { .. } + | ResponseItem::FunctionCall { .. } + | ResponseItem::WebSearchCall { .. } + | ResponseItem::CustomToolCall { .. } + | ResponseItem::GhostSnapshot { .. } + | ResponseItem::Other => item.clone(), + } + } +} + +pub(crate) fn format_output_for_model_body(content: &str) -> String { + // Head+tail truncation for the model: show the beginning and end with an elision. + // Clients still receive full streams; only this formatted summary is capped. + let total_lines = content.lines().count(); + if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES { + return content.to_string(); + } + let output = truncate_formatted_exec_output(content, total_lines); + format!("Total output lines: {total_lines}\n\n{output}") +} + +fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String { + let segments: Vec<&str> = content.split_inclusive('\n').collect(); + let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len()); + let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take)); + let omitted = segments.len().saturating_sub(head_take + tail_take); + + let head_slice_end: usize = segments + .iter() + .take(head_take) + .map(|segment| segment.len()) + .sum(); + let tail_slice_start: usize = if tail_take == 0 { + content.len() + } else { + content.len() + - segments + .iter() + .rev() + .take(tail_take) + .map(|segment| segment.len()) + .sum::() + }; + let head_slice = &content[..head_slice_end]; + let tail_slice = &content[tail_slice_start..]; + let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES; + // this is a bit wrong. We are counting metadata lines and not just shell output lines. + let marker = if omitted > 0 { + Some(format!( + "\n[... omitted {omitted} of {total_lines} lines ...]\n\n" + )) + } else if truncated_by_bytes { + Some(format!( + "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n" + )) + } else { + None + }; + + let marker_len = marker.as_ref().map_or(0, String::len); + let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES); + let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len)); + let head_part = take_bytes_at_char_boundary(head_slice, head_budget); + let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len())); + + result.push_str(head_part); + if let Some(marker_text) = marker.as_ref() { + result.push_str(marker_text); + } + + let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len()); + if remaining == 0 { + return result; } + + let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining); + result.push_str(tail_part); + + result } #[inline] @@ -533,6 +644,183 @@ mod tests { assert_eq!(h.contents(), vec![]); } + #[test] + fn record_items_truncates_function_call_output_content() { + let mut history = ConversationHistory::new(); + let long_line = "a very long line to trigger truncation\n"; + let long_output = long_line.repeat(2_500); + let item = ResponseItem::FunctionCallOutput { + call_id: "call-100".to_string(), + output: FunctionCallOutputPayload { + content: long_output.clone(), + success: Some(true), + }, + }; + + history.record_items([&item]); + + assert_eq!(history.items.len(), 1); + match &history.items[0] { + ResponseItem::FunctionCallOutput { output, .. } => { + assert_ne!(output.content, long_output); + assert!( + output.content.starts_with("Total output lines:"), + "expected truncated summary, got {}", + output.content + ); + } + other => panic!("unexpected history item: {other:?}"), + } + } + + #[test] + fn record_items_truncates_custom_tool_call_output_content() { + let mut history = ConversationHistory::new(); + let line = "custom output that is very long\n"; + let long_output = line.repeat(2_500); + let item = ResponseItem::CustomToolCallOutput { + call_id: "tool-200".to_string(), + output: long_output.clone(), + }; + + history.record_items([&item]); + + assert_eq!(history.items.len(), 1); + match &history.items[0] { + ResponseItem::CustomToolCallOutput { output, .. } => { + assert_ne!(output, &long_output); + assert!( + output.starts_with("Total output lines:"), + "expected truncated summary, got {output}" + ); + } + other => panic!("unexpected history item: {other:?}"), + } + } + + // The following tests were adapted from tools::mod truncation tests to + // target the new truncation functions in conversation_history. + + use regex_lite::Regex; + + fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { + let pattern = truncated_message_pattern(line, total_lines); + let regex = Regex::new(&pattern).unwrap_or_else(|err| { + panic!("failed to compile regex {pattern}: {err}"); + }); + let captures = regex + .captures(message) + .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}")); + let body = captures + .name("body") + .expect("missing body capture") + .as_str(); + assert!( + body.len() <= MODEL_FORMAT_MAX_BYTES, + "body exceeds byte limit: {} bytes", + body.len() + ); + } + + fn truncated_message_pattern(line: &str, total_lines: usize) -> String { + let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines); + let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take)); + let omitted = total_lines.saturating_sub(head_take + tail_take); + let escaped_line = regex_lite::escape(line); + if omitted == 0 { + return format!( + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$", + ); + } + format!( + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$", + ) + } + + #[test] + fn format_exec_output_truncates_large_error() { + let line = "very long execution error line that should trigger truncation\n"; + let large_error = line.repeat(2_500); // way beyond both byte and line limits + + let truncated = format_output_for_model_body(&large_error); + + let total_lines = large_error.lines().count(); + assert_truncated_message_matches(&truncated, line, total_lines); + assert_ne!(truncated, large_error); + } + + #[test] + fn format_exec_output_marks_byte_truncation_without_omitted_lines() { + let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50); + let truncated = format_output_for_model_body(&long_line); + + assert_ne!(truncated, long_line); + let marker_line = + format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]"); + assert!( + truncated.contains(&marker_line), + "missing byte truncation marker: {truncated}" + ); + assert!( + !truncated.contains("omitted"), + "line omission marker should not appear when no lines were dropped: {truncated}" + ); + } + + #[test] + fn format_exec_output_returns_original_when_within_limits() { + let content = "example output\n".repeat(10); + + assert_eq!(format_output_for_model_body(&content), content); + } + + #[test] + fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { + let total_lines = MODEL_FORMAT_MAX_LINES + 100; + let content: String = (0..total_lines) + .map(|idx| format!("line-{idx}\n")) + .collect(); + + let truncated = format_output_for_model_body(&content); + let omitted = total_lines - MODEL_FORMAT_MAX_LINES; + let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); + + assert!( + truncated.contains(&expected_marker), + "missing omitted marker: {truncated}" + ); + assert!( + truncated.contains("line-0\n"), + "expected head line to remain: {truncated}" + ); + + let last_line = format!("line-{}\n", total_lines - 1); + assert!( + truncated.contains(&last_line), + "expected tail line to remain: {truncated}" + ); + } + + #[test] + fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { + let total_lines = MODEL_FORMAT_MAX_LINES + 42; + let long_line = "x".repeat(256); + let content: String = (0..total_lines) + .map(|idx| format!("line-{idx}-{long_line}\n")) + .collect(); + + let truncated = format_output_for_model_body(&content); + + assert!( + truncated.contains("[... omitted 42 of 298 lines ...]"), + "expected omitted marker when line count exceeds limit: {truncated}" + ); + assert!( + !truncated.contains("output truncated to fit"), + "line omission marker should take precedence over byte marker: {truncated}" + ); + } + //TODO(aibrahim): run CI in release mode. #[cfg(not(debug_assertions))] #[test] diff --git a/codex-rs/core/src/tools/events.rs b/codex-rs/core/src/tools/events.rs index 55782955479..cb267c8917f 100644 --- a/codex-rs/core/src/tools/events.rs +++ b/codex-rs/core/src/tools/events.rs @@ -19,7 +19,6 @@ use std::path::Path; use std::path::PathBuf; use std::time::Duration; -use super::format_exec_output; use super::format_exec_output_str; #[derive(Clone, Copy)] @@ -146,7 +145,7 @@ impl ToolEmitter { (*message).to_string(), -1, Duration::ZERO, - format_exec_output(&message), + message.clone(), ) .await; } @@ -241,7 +240,7 @@ impl ToolEmitter { (*message).to_string(), -1, Duration::ZERO, - format_exec_output(&message), + message.clone(), ) .await; } @@ -277,7 +276,7 @@ impl ToolEmitter { } Err(ToolError::Codex(err)) => { let message = format!("execution error: {err:?}"); - let response = super::format_exec_output(&message); + let response = message.clone(); event = ToolEventStage::Failure(ToolEventFailure::Message(message)); Err(FunctionCallError::RespondToModel(response)) } @@ -289,9 +288,9 @@ impl ToolEmitter { } else { msg }; - let response = super::format_exec_output(&normalized); - event = ToolEventStage::Failure(ToolEventFailure::Message(normalized)); - Err(FunctionCallError::RespondToModel(response)) + let response = &normalized; + event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone())); + Err(FunctionCallError::RespondToModel(response.clone())) } }; self.emit(ctx, event).await; diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index f22d064b51c..f5ae4a12e1d 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -9,19 +9,11 @@ pub mod runtimes; pub mod sandboxing; pub mod spec; +use crate::conversation_history::format_output_for_model_body; use crate::exec::ExecToolCallOutput; -use codex_utils_string::take_bytes_at_char_boundary; -use codex_utils_string::take_last_bytes_at_char_boundary; pub use router::ToolRouter; use serde::Serialize; -// Model-formatting limits: clients get full streams; only content sent to the model is truncated. -pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB -pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines -pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2; -pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128 -pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2; - // Telemetry preview limits: keep log events smaller than model budgets. pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines @@ -73,249 +65,15 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { let content = aggregated_output.text.as_str(); - if exec_output.timed_out { - let prefixed = format!( + let body = if exec_output.timed_out { + format!( "command timed out after {} milliseconds\n{content}", exec_output.duration.as_millis() - ); - return format_exec_output(&prefixed); - } - - format_exec_output(content) -} - -pub(super) fn format_exec_output(content: &str) -> String { - // Head+tail truncation for the model: show the beginning and end with an elision. - // Clients still receive full streams; only this formatted summary is capped. - let total_lines = content.lines().count(); - if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES { - return content.to_string(); - } - let output = truncate_formatted_exec_output(content, total_lines); - format!("Total output lines: {total_lines}\n\n{output}") -} - -fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String { - let segments: Vec<&str> = content.split_inclusive('\n').collect(); - let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len()); - let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take)); - let omitted = segments.len().saturating_sub(head_take + tail_take); - - let head_slice_end: usize = segments - .iter() - .take(head_take) - .map(|segment| segment.len()) - .sum(); - let tail_slice_start: usize = if tail_take == 0 { - content.len() - } else { - content.len() - - segments - .iter() - .rev() - .take(tail_take) - .map(|segment| segment.len()) - .sum::() - }; - let head_slice = &content[..head_slice_end]; - let tail_slice = &content[tail_slice_start..]; - let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES; - let marker = if omitted > 0 { - Some(format!( - "\n[... omitted {omitted} of {total_lines} lines ...]\n\n" - )) - } else if truncated_by_bytes { - Some(format!( - "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n" - )) + ) } else { - None + content.to_string() }; - let marker_len = marker.as_ref().map_or(0, String::len); - let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES); - let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len)); - let head_part = take_bytes_at_char_boundary(head_slice, head_budget); - let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len())); - - result.push_str(head_part); - if let Some(marker_text) = marker.as_ref() { - result.push_str(marker_text); - } - - let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len()); - if remaining == 0 { - return result; - } - - let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining); - result.push_str(tail_part); - - result -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::function_tool::FunctionCallError; - use regex_lite::Regex; - - fn truncate_function_error(err: FunctionCallError) -> FunctionCallError { - match err { - FunctionCallError::RespondToModel(msg) => { - FunctionCallError::RespondToModel(format_exec_output(&msg)) - } - FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)), - FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)), - other => other, - } - } - - fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { - let pattern = truncated_message_pattern(line, total_lines); - let regex = Regex::new(&pattern).unwrap_or_else(|err| { - panic!("failed to compile regex {pattern}: {err}"); - }); - let captures = regex - .captures(message) - .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}")); - let body = captures - .name("body") - .expect("missing body capture") - .as_str(); - assert!( - body.len() <= MODEL_FORMAT_MAX_BYTES, - "body exceeds byte limit: {} bytes", - body.len() - ); - } - - fn truncated_message_pattern(line: &str, total_lines: usize) -> String { - let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines); - let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take)); - let omitted = total_lines.saturating_sub(head_take + tail_take); - let escaped_line = regex_lite::escape(line); - if omitted == 0 { - return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$", - ); - } - format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$", - ) - } - - #[test] - fn truncate_formatted_exec_output_truncates_large_error() { - let line = "very long execution error line that should trigger truncation\n"; - let large_error = line.repeat(2_500); // way beyond both byte and line limits - - let truncated = format_exec_output(&large_error); - - let total_lines = large_error.lines().count(); - assert_truncated_message_matches(&truncated, line, total_lines); - assert_ne!(truncated, large_error); - } - - #[test] - fn truncate_function_error_trims_respond_to_model() { - let line = "respond-to-model error that should be truncated\n"; - let huge = line.repeat(3_000); - let total_lines = huge.lines().count(); - - let err = truncate_function_error(FunctionCallError::RespondToModel(huge)); - match err { - FunctionCallError::RespondToModel(message) => { - assert_truncated_message_matches(&message, line, total_lines); - } - other => panic!("unexpected error variant: {other:?}"), - } - } - - #[test] - fn truncate_function_error_trims_fatal() { - let line = "fatal error output that should be truncated\n"; - let huge = line.repeat(3_000); - let total_lines = huge.lines().count(); - - let err = truncate_function_error(FunctionCallError::Fatal(huge)); - match err { - FunctionCallError::Fatal(message) => { - assert_truncated_message_matches(&message, line, total_lines); - } - other => panic!("unexpected error variant: {other:?}"), - } - } - - #[test] - fn truncate_formatted_exec_output_marks_byte_truncation_without_omitted_lines() { - let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50); - let truncated = format_exec_output(&long_line); - - assert_ne!(truncated, long_line); - let marker_line = - format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]"); - assert!( - truncated.contains(&marker_line), - "missing byte truncation marker: {truncated}" - ); - assert!( - !truncated.contains("omitted"), - "line omission marker should not appear when no lines were dropped: {truncated}" - ); - } - - #[test] - fn truncate_formatted_exec_output_returns_original_when_within_limits() { - let content = "example output\n".repeat(10); - - assert_eq!(format_exec_output(&content), content); - } - - #[test] - fn truncate_formatted_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { - let total_lines = MODEL_FORMAT_MAX_LINES + 100; - let content: String = (0..total_lines) - .map(|idx| format!("line-{idx}\n")) - .collect(); - - let truncated = format_exec_output(&content); - let omitted = total_lines - MODEL_FORMAT_MAX_LINES; - let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); - - assert!( - truncated.contains(&expected_marker), - "missing omitted marker: {truncated}" - ); - assert!( - truncated.contains("line-0\n"), - "expected head line to remain: {truncated}" - ); - - let last_line = format!("line-{}\n", total_lines - 1); - assert!( - truncated.contains(&last_line), - "expected tail line to remain: {truncated}" - ); - } - - #[test] - fn truncate_formatted_exec_output_prefers_line_marker_when_both_limits_exceeded() { - let total_lines = MODEL_FORMAT_MAX_LINES + 42; - let long_line = "x".repeat(256); - let content: String = (0..total_lines) - .map(|idx| format!("line-{idx}-{long_line}\n")) - .collect(); - - let truncated = format_exec_output(&content); - - assert!( - truncated.contains("[... omitted 42 of 298 lines ...]"), - "expected omitted marker when line count exceeds limit: {truncated}" - ); - assert!( - !truncated.contains("output truncated to fit"), - "line omission marker should take precedence over byte marker: {truncated}" - ); - } + // Truncate for model consumption before serialization. + format_output_for_model_body(&body) } diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index bfaef15a924..300ca146ecb 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -33,6 +33,7 @@ mod stream_no_completed; mod tool_harness; mod tool_parallelism; mod tools; +mod truncation; mod unified_exec; mod user_notification; mod view_image; diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs new file mode 100644 index 00000000000..74d90f1de7e --- /dev/null +++ b/codex-rs/core/tests/suite/truncation.rs @@ -0,0 +1,270 @@ +#![cfg(not(target_os = "windows"))] +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use anyhow::Context; +use anyhow::Result; +use codex_core::features::Feature; +use codex_core::model_family::find_family_for_model; +use codex_core::protocol::SandboxPolicy; +use core_test_support::assert_regex_match; +use core_test_support::responses; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_sse_once_match; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::test_codex; +use escargot::CargoBuild; +use regex_lite::Regex; +use serde_json::Value; +use serde_json::json; +use wiremock::matchers::any; + +// Verifies byte-truncation formatting for function error output (RespondToModel errors) +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn truncate_function_error_trims_respond_to_model() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + // Use the test model that wires function tools like grep_files + config.model = "test-gpt-5-codex".to_string(); + config.model_family = + find_family_for_model("test-gpt-5-codex").expect("model family for test model"); + }); + let test = builder.build(&server).await?; + + // Construct a very long, non-existent path to force a RespondToModel error with a large message + let long_path = "a".repeat(20_000); + let call_id = "grep-huge-error"; + let args = json!({ + "pattern": "alpha", + "path": long_path, + "limit": 10 + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + let mock = mount_sse_sequence(&server, responses).await; + + test.submit_turn_with_policy( + "trigger grep_files with long path to test truncation", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let output = mock + .function_call_output_text(call_id) + .context("function error output present")?; + + tracing::debug!(output = %output, "truncated function error output"); + + // Expect plaintext with byte-truncation marker and no omitted-lines marker + assert!( + serde_json::from_str::(&output).is_err(), + "expected error output to be plain text", + ); + let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]\s*$"#; + assert_regex_match(truncated_pattern, &output); + assert!( + !output.contains("omitted"), + "line omission marker should not appear when no lines were dropped: {output}" + ); + + Ok(()) +} + +// Verifies that a standard tool call (shell) exceeding the model formatting +// limits is truncated before being sent back to the model. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + + // Use a model that exposes the generic shell tool. + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5-codex".to_string(); + config.model_family = + find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family"); + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-too-large"; + let args = serde_json::json!({ + "command": ["/bin/sh", "-c", "seq 1 400"], + "timeout_ms": 5_000, + }); + + // First response: model tells us to run the tool; second: complete the turn. + mount_sse_once_match( + &server, + any(), + sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let mock2 = mount_sse_once_match( + &server, + any(), + sse(vec![ + responses::ev_assistant_message("msg-1", "done"), + responses::ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess) + .await?; + + // Inspect what we sent back to the model; it should contain a truncated + // function_call_output for the shell call. + let output = mock2 + .single_request() + .function_call_output_text(call_id) + .context("function_call_output present for shell call")?; + + // Expect plain text (not JSON) with truncation markers and line elision. + assert!( + serde_json::from_str::(&output).is_err(), + "expected truncated shell output to be plain text" + ); + let truncated_pattern = r#"(?s)^Exit code: 0 +Wall time: .* seconds +Total output lines: 400 +Output: +1 +2 +3 +4 +5 +6 +.* +\[\.{3} omitted 144 of 400 lines \.{3}\] + +.* +396 +397 +398 +399 +400 +$"#; + assert_regex_match(truncated_pattern, &output); + + Ok(()) +} + +// Verifies that an MCP tool call result exceeding the model formatting limits +// is truncated before being sent back to the model. +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + + let call_id = "rmcp-truncated"; + let server_name = "rmcp"; + let tool_name = format!("mcp__{server_name}__echo"); + + // Build a very large message to exceed 10KiB once serialized. + let large_msg = "long-message-with-newlines-".repeat(600); + let args_json = serde_json::json!({ "message": large_msg }); + + mount_sse_once_match( + &server, + any(), + sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, &tool_name, &args_json.to_string()), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let mock2 = mount_sse_once_match( + &server, + any(), + sse(vec![ + responses::ev_assistant_message("msg-1", "rmcp echo tool completed."), + responses::ev_completed("resp-2"), + ]), + ) + .await; + + // Compile the rmcp stdio test server and configure it. + let rmcp_test_server_bin = CargoBuild::new() + .package("codex-rmcp-client") + .bin("test_stdio_server") + .run()? + .path() + .to_string_lossy() + .into_owned(); + + let mut builder = test_codex().with_config(move |config| { + config.features.enable(Feature::RmcpClient); + config.mcp_servers.insert( + server_name.to_string(), + codex_core::config_types::McpServerConfig { + transport: codex_core::config_types::McpServerTransportConfig::Stdio { + command: rmcp_test_server_bin, + args: Vec::new(), + env: None, + env_vars: Vec::new(), + cwd: None, + }, + enabled: true, + startup_timeout_sec: Some(std::time::Duration::from_secs(10)), + tool_timeout_sec: None, + enabled_tools: None, + disabled_tools: None, + }, + ); + }); + let fixture = builder.build(&server).await?; + + fixture + .submit_turn_with_policy( + "call the rmcp echo tool with a very large message", + SandboxPolicy::ReadOnly, + ) + .await?; + + // The MCP tool call output is converted to a function_call_output for the model. + let output = mock2 + .single_request() + .function_call_output_text(call_id) + .context("function_call_output present for rmcp call")?; + + // Expect plain text with byte-based truncation marker. + assert!( + serde_json::from_str::(&output).is_err(), + "expected truncated MCP output to be plain text" + ); + assert!( + output.starts_with("Total output lines: 1\n\n{"), + "expected total line header and JSON head, got: {output}" + ); + let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]") + .expect("compile regex"); + assert!( + byte_marker.is_match(&output), + "expected byte truncation marker, got: {output}" + ); + + Ok(()) +}