-
Notifications
You must be signed in to change notification settings - Fork 1
Advance snapshot for budget-skipped container files #49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -713,7 +713,13 @@ impl ContainerSession { | |
| } | ||
|
|
||
| // Enforce the cumulative session cap. If adding this file | ||
| // would put us over, skip with a warning. | ||
| // would put us over, skip it — but still advance the | ||
| // snapshot to this version's hash so we treat it as | ||
| // accounted-for. Without this, the file stays absent from | ||
| // (or stale in) `snapshot`, so every later command re-flags | ||
| // it as changed, re-reads its bytes out of the VM, and | ||
| // re-emits the same warning. Advancing the snapshot means we | ||
| // only warn again if the file's content actually changes. | ||
| let prior_bytes = state.captured.get(path).map(|f| f.bytes).unwrap_or(0); | ||
| let projected = state.bytes_total.saturating_sub(prior_bytes) + bytes.len() as u64; | ||
| if projected > self.config.max_bytes_per_session { | ||
|
|
@@ -725,6 +731,7 @@ impl ContainerSession { | |
| limit = self.config.max_bytes_per_session, | ||
| "Captured file would exceed max_bytes_per_session; skipping" | ||
| ); | ||
| state.snapshot.insert(path.clone(), *hash); | ||
| continue; | ||
| } | ||
|
|
||
|
|
@@ -1171,4 +1178,148 @@ abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd /mnt/data/sub/ | |
| assert_eq!(c.len(), 5 + 32); | ||
| assert_eq!(f.len(), 6 + 32); | ||
| } | ||
|
|
||
| // ---- capture_changes byte-budget regression coverage ---- | ||
|
|
||
| use async_trait::async_trait; | ||
| use futures_util::stream; | ||
|
|
||
| use crate::runtimes::{ExecEvent, ExecHandle, ExecRequest, RuntimeResult, ShellSession}; | ||
|
|
||
| /// In-memory `/mnt/data` backing a fake session. `exec` answers the | ||
| /// `find … sha256sum` listing command; `read_file` serves stored | ||
| /// bytes and counts reads per path so a test can assert a file is | ||
| /// not re-read out of the VM on a subsequent capture. | ||
| struct FakeSession { | ||
| files: std::sync::Mutex<HashMap<String, Bytes>>, | ||
| reads: std::sync::Mutex<HashMap<String, usize>>, | ||
| } | ||
|
|
||
| impl FakeSession { | ||
| fn new(files: HashMap<String, Bytes>) -> Self { | ||
| Self { | ||
| files: std::sync::Mutex::new(files), | ||
| reads: std::sync::Mutex::new(HashMap::new()), | ||
| } | ||
| } | ||
|
|
||
| fn read_count(&self, path: &str) -> usize { | ||
| self.reads.lock().unwrap().get(path).copied().unwrap_or(0) | ||
| } | ||
| } | ||
|
|
||
| #[async_trait] | ||
| impl ShellSession for FakeSession { | ||
| async fn exec(&self, _cmd: ExecRequest) -> RuntimeResult<ExecHandle> { | ||
| // Emulate the `find … sha256sum` listing the session uses to | ||
| // snapshot /mnt/data. | ||
| let mut out = String::new(); | ||
| for (path, bytes) in self.files.lock().unwrap().iter() { | ||
| out.push_str(&hex::encode(sha256_bytes(bytes))); | ||
| out.push_str(" "); | ||
| out.push_str(path); | ||
| out.push('\n'); | ||
| } | ||
| let events = vec![ | ||
| ExecEvent::Stdout(Bytes::from(out)), | ||
| ExecEvent::Exit { | ||
| code: 0, | ||
| signal: None, | ||
| }, | ||
| ]; | ||
| Ok(ExecHandle { | ||
| output: Box::pin(stream::iter(events)), | ||
| }) | ||
| } | ||
|
|
||
| async fn read_file(&self, path: &str) -> RuntimeResult<Bytes> { | ||
| *self | ||
| .reads | ||
| .lock() | ||
| .unwrap() | ||
| .entry(path.to_string()) | ||
| .or_insert(0) += 1; | ||
| self.files | ||
| .lock() | ||
| .unwrap() | ||
| .get(path) | ||
| .cloned() | ||
| .ok_or_else(|| RuntimeError::Backend(format!("no such file: {path}"))) | ||
| } | ||
|
|
||
| async fn terminate(&self) -> RuntimeResult<()> { | ||
| Ok(()) | ||
| } | ||
| } | ||
|
|
||
| fn test_session(handle: SessionHandle, config: ContainersConfig) -> ContainerSession { | ||
| ContainerSession { | ||
| container_id: new_container_id(), | ||
| runtime_label: "test", | ||
| created_at: Instant::now(), | ||
| idle_ttl: Duration::from_secs(60), | ||
| file_io: true, | ||
| session: Some(handle), | ||
| config, | ||
| state: Mutex::new(SessionState::default()), | ||
| persistence: None, | ||
| } | ||
| } | ||
|
|
||
| /// A file skipped by the session byte budget must advance the | ||
| /// snapshot so the next `capture_changes` doesn't re-detect it as | ||
| /// changed, re-read its bytes out of the VM, and re-warn. Without the | ||
| /// fix, the second capture re-reads the over-budget file. | ||
| #[tokio::test] | ||
| async fn over_budget_file_advances_snapshot_and_is_not_reread() { | ||
| let big = Bytes::from(vec![7u8; 4096]); | ||
| let path = format!("{MNT_DATA}/big.bin"); | ||
| let mut files = HashMap::new(); | ||
| files.insert(path.clone(), big); | ||
| let fake = Arc::new(FakeSession::new(files)); | ||
| let handle = SessionHandle::new("sess".into(), Box::new(FakeSessionProxy(fake.clone()))); | ||
|
|
||
| // Cap below the file size so it's always over budget. | ||
| let config = ContainersConfig { | ||
| max_bytes_per_session: 1024, | ||
| ..ContainersConfig::default() | ||
| }; | ||
| let session = test_session(handle, config); | ||
|
|
||
| // First capture: detects the change, reads it once, finds it over | ||
| // budget, skips it (no ref returned) but records the snapshot. | ||
| let refs = session.capture_changes().await.unwrap(); | ||
| assert!(refs.is_empty(), "over-budget file must not be captured"); | ||
| assert_eq!(fake.read_count(&path), 1, "expected exactly one read"); | ||
|
|
||
| // Second capture with the file unchanged: must NOT re-read it. | ||
| let refs = session.capture_changes().await.unwrap(); | ||
| assert!(refs.is_empty()); | ||
| assert_eq!( | ||
| fake.read_count(&path), | ||
| 1, | ||
| "snapshot was not advanced: over-budget file re-read on the next command" | ||
| ); | ||
| } | ||
|
|
||
| /// Thin wrapper so the test can keep an `Arc` to the fake for | ||
| /// assertions while still handing an owned `Box<dyn ShellSession>` to | ||
| /// `SessionHandle`. | ||
|
Comment on lines
+1292
to
+1307
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Prompt To Fix With AIThis is a comment left during a code review.
Path: src/services/container_session.rs
Line: 1292-1307
Comment:
**`FakeSession.exec` ignores the command string**
`FakeSession::exec` always returns the full file listing regardless of the command text. `capture_changes` currently issues exactly one `exec` (for `list_mnt_data`), so the test passes today. If `capture_changes` ever gains a second `exec` call (e.g. a cleanup command, a size check), the fake will return listing-shaped stdout for every call, silently corrupting the parse and making the test unreliable. Checking `_cmd.command.contains("sha256sum")` and returning an empty/error stream otherwise would make the stub more defensive at negligible cost.
How can I resolve this? If you propose a fix, please make it concise. |
||
| struct FakeSessionProxy(Arc<FakeSession>); | ||
|
|
||
| #[async_trait] | ||
| impl ShellSession for FakeSessionProxy { | ||
| async fn exec(&self, cmd: ExecRequest) -> RuntimeResult<ExecHandle> { | ||
| self.0.exec(cmd).await | ||
| } | ||
| async fn read_file(&self, path: &str) -> RuntimeResult<Bytes> { | ||
| self.0.read_file(path).await | ||
| } | ||
| async fn write_file(&self, path: &str, bytes: Bytes) -> RuntimeResult<()> { | ||
| self.0.write_file(path, bytes).await | ||
| } | ||
| async fn terminate(&self) -> RuntimeResult<()> { | ||
| self.0.terminate().await | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new test proves the happy-path fix (no re-read when content is unchanged), but there is no assertion that changing the file's content after a budget-skip causes the next capture to re-detect it and warn again. Without that test, a future regression that silently advances the snapshot on content changes (e.g. always using the old hash instead of the new one) would go unnoticed. A short extension of the existing test that mutates the
filesmap and assertsread_count == 2on the third capture would cover this path.Prompt To Fix With AI
Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!