diff --git a/src/services/container_session.rs b/src/services/container_session.rs index 4f3945e..108b906 100644 --- a/src/services/container_session.rs +++ b/src/services/container_session.rs @@ -713,7 +713,13 @@ impl ContainerSession { } // Enforce the cumulative session cap. If adding this file - // would put us over, skip with a warning. + // would put us over, skip it — but still advance the + // snapshot to this version's hash so we treat it as + // accounted-for. Without this, the file stays absent from + // (or stale in) `snapshot`, so every later command re-flags + // it as changed, re-reads its bytes out of the VM, and + // re-emits the same warning. Advancing the snapshot means we + // only warn again if the file's content actually changes. let prior_bytes = state.captured.get(path).map(|f| f.bytes).unwrap_or(0); let projected = state.bytes_total.saturating_sub(prior_bytes) + bytes.len() as u64; if projected > self.config.max_bytes_per_session { @@ -725,6 +731,7 @@ impl ContainerSession { limit = self.config.max_bytes_per_session, "Captured file would exceed max_bytes_per_session; skipping" ); + state.snapshot.insert(path.clone(), *hash); continue; } @@ -1171,4 +1178,148 @@ abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd /mnt/data/sub/ assert_eq!(c.len(), 5 + 32); assert_eq!(f.len(), 6 + 32); } + + // ---- capture_changes byte-budget regression coverage ---- + + use async_trait::async_trait; + use futures_util::stream; + + use crate::runtimes::{ExecEvent, ExecHandle, ExecRequest, RuntimeResult, ShellSession}; + + /// In-memory `/mnt/data` backing a fake session. `exec` answers the + /// `find … sha256sum` listing command; `read_file` serves stored + /// bytes and counts reads per path so a test can assert a file is + /// not re-read out of the VM on a subsequent capture. + struct FakeSession { + files: std::sync::Mutex>, + reads: std::sync::Mutex>, + } + + impl FakeSession { + fn new(files: HashMap) -> Self { + Self { + files: std::sync::Mutex::new(files), + reads: std::sync::Mutex::new(HashMap::new()), + } + } + + fn read_count(&self, path: &str) -> usize { + self.reads.lock().unwrap().get(path).copied().unwrap_or(0) + } + } + + #[async_trait] + impl ShellSession for FakeSession { + async fn exec(&self, _cmd: ExecRequest) -> RuntimeResult { + // Emulate the `find … sha256sum` listing the session uses to + // snapshot /mnt/data. + let mut out = String::new(); + for (path, bytes) in self.files.lock().unwrap().iter() { + out.push_str(&hex::encode(sha256_bytes(bytes))); + out.push_str(" "); + out.push_str(path); + out.push('\n'); + } + let events = vec![ + ExecEvent::Stdout(Bytes::from(out)), + ExecEvent::Exit { + code: 0, + signal: None, + }, + ]; + Ok(ExecHandle { + output: Box::pin(stream::iter(events)), + }) + } + + async fn read_file(&self, path: &str) -> RuntimeResult { + *self + .reads + .lock() + .unwrap() + .entry(path.to_string()) + .or_insert(0) += 1; + self.files + .lock() + .unwrap() + .get(path) + .cloned() + .ok_or_else(|| RuntimeError::Backend(format!("no such file: {path}"))) + } + + async fn terminate(&self) -> RuntimeResult<()> { + Ok(()) + } + } + + fn test_session(handle: SessionHandle, config: ContainersConfig) -> ContainerSession { + ContainerSession { + container_id: new_container_id(), + runtime_label: "test", + created_at: Instant::now(), + idle_ttl: Duration::from_secs(60), + file_io: true, + session: Some(handle), + config, + state: Mutex::new(SessionState::default()), + persistence: None, + } + } + + /// A file skipped by the session byte budget must advance the + /// snapshot so the next `capture_changes` doesn't re-detect it as + /// changed, re-read its bytes out of the VM, and re-warn. Without the + /// fix, the second capture re-reads the over-budget file. + #[tokio::test] + async fn over_budget_file_advances_snapshot_and_is_not_reread() { + let big = Bytes::from(vec![7u8; 4096]); + let path = format!("{MNT_DATA}/big.bin"); + let mut files = HashMap::new(); + files.insert(path.clone(), big); + let fake = Arc::new(FakeSession::new(files)); + let handle = SessionHandle::new("sess".into(), Box::new(FakeSessionProxy(fake.clone()))); + + // Cap below the file size so it's always over budget. + let config = ContainersConfig { + max_bytes_per_session: 1024, + ..ContainersConfig::default() + }; + let session = test_session(handle, config); + + // First capture: detects the change, reads it once, finds it over + // budget, skips it (no ref returned) but records the snapshot. + let refs = session.capture_changes().await.unwrap(); + assert!(refs.is_empty(), "over-budget file must not be captured"); + assert_eq!(fake.read_count(&path), 1, "expected exactly one read"); + + // Second capture with the file unchanged: must NOT re-read it. + let refs = session.capture_changes().await.unwrap(); + assert!(refs.is_empty()); + assert_eq!( + fake.read_count(&path), + 1, + "snapshot was not advanced: over-budget file re-read on the next command" + ); + } + + /// Thin wrapper so the test can keep an `Arc` to the fake for + /// assertions while still handing an owned `Box` to + /// `SessionHandle`. + struct FakeSessionProxy(Arc); + + #[async_trait] + impl ShellSession for FakeSessionProxy { + async fn exec(&self, cmd: ExecRequest) -> RuntimeResult { + self.0.exec(cmd).await + } + async fn read_file(&self, path: &str) -> RuntimeResult { + self.0.read_file(path).await + } + async fn write_file(&self, path: &str, bytes: Bytes) -> RuntimeResult<()> { + self.0.write_file(path, bytes).await + } + async fn terminate(&self) -> RuntimeResult<()> { + self.0.terminate().await + } + } }