Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 152 additions & 1 deletion src/services/container_session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,13 @@ impl ContainerSession {
}

// Enforce the cumulative session cap. If adding this file
// would put us over, skip with a warning.
// would put us over, skip it — but still advance the
// snapshot to this version's hash so we treat it as
// accounted-for. Without this, the file stays absent from
// (or stale in) `snapshot`, so every later command re-flags
// it as changed, re-reads its bytes out of the VM, and
// re-emits the same warning. Advancing the snapshot means we
// only warn again if the file's content actually changes.
let prior_bytes = state.captured.get(path).map(|f| f.bytes).unwrap_or(0);
let projected = state.bytes_total.saturating_sub(prior_bytes) + bytes.len() as u64;
if projected > self.config.max_bytes_per_session {
Expand All @@ -725,6 +731,7 @@ impl ContainerSession {
limit = self.config.max_bytes_per_session,
"Captured file would exceed max_bytes_per_session; skipping"
);
state.snapshot.insert(path.clone(), *hash);
continue;
}

Expand Down Expand Up @@ -1171,4 +1178,148 @@ abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd /mnt/data/sub/
assert_eq!(c.len(), 5 + 32);
assert_eq!(f.len(), 6 + 32);
}

// ---- capture_changes byte-budget regression coverage ----

use async_trait::async_trait;
use futures_util::stream;

use crate::runtimes::{ExecEvent, ExecHandle, ExecRequest, RuntimeResult, ShellSession};

/// In-memory `/mnt/data` backing a fake session. `exec` answers the
/// `find … sha256sum` listing command; `read_file` serves stored
/// bytes and counts reads per path so a test can assert a file is
/// not re-read out of the VM on a subsequent capture.
struct FakeSession {
files: std::sync::Mutex<HashMap<String, Bytes>>,
reads: std::sync::Mutex<HashMap<String, usize>>,
}

impl FakeSession {
fn new(files: HashMap<String, Bytes>) -> Self {
Self {
files: std::sync::Mutex::new(files),
reads: std::sync::Mutex::new(HashMap::new()),
}
}

fn read_count(&self, path: &str) -> usize {
self.reads.lock().unwrap().get(path).copied().unwrap_or(0)
}
}

#[async_trait]
impl ShellSession for FakeSession {
async fn exec(&self, _cmd: ExecRequest) -> RuntimeResult<ExecHandle> {
// Emulate the `find … sha256sum` listing the session uses to
// snapshot /mnt/data.
let mut out = String::new();
for (path, bytes) in self.files.lock().unwrap().iter() {
out.push_str(&hex::encode(sha256_bytes(bytes)));
out.push_str(" ");
out.push_str(path);
out.push('\n');
}
let events = vec![
ExecEvent::Stdout(Bytes::from(out)),
ExecEvent::Exit {
code: 0,
signal: None,
},
];
Ok(ExecHandle {
output: Box::pin(stream::iter(events)),
})
}

async fn read_file(&self, path: &str) -> RuntimeResult<Bytes> {
*self
.reads
.lock()
.unwrap()
.entry(path.to_string())
.or_insert(0) += 1;
self.files
.lock()
.unwrap()
.get(path)
.cloned()
.ok_or_else(|| RuntimeError::Backend(format!("no such file: {path}")))
}

async fn terminate(&self) -> RuntimeResult<()> {
Ok(())
}
}

fn test_session(handle: SessionHandle, config: ContainersConfig) -> ContainerSession {
ContainerSession {
container_id: new_container_id(),
runtime_label: "test",
created_at: Instant::now(),
idle_ttl: Duration::from_secs(60),
file_io: true,
session: Some(handle),
config,
state: Mutex::new(SessionState::default()),
persistence: None,
}
}

/// A file skipped by the session byte budget must advance the
/// snapshot so the next `capture_changes` doesn't re-detect it as
Comment on lines +1257 to +1270
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Missing companion test: content-change re-detection after budget-skip

The new test proves the happy-path fix (no re-read when content is unchanged), but there is no assertion that changing the file's content after a budget-skip causes the next capture to re-detect it and warn again. Without that test, a future regression that silently advances the snapshot on content changes (e.g. always using the old hash instead of the new one) would go unnoticed. A short extension of the existing test that mutates the files map and asserts read_count == 2 on the third capture would cover this path.

Prompt To Fix With AI
This is a comment left during a code review.
Path: src/services/container_session.rs
Line: 1257-1270

Comment:
**Missing companion test: content-change re-detection after budget-skip**

The new test proves the happy-path fix (no re-read when content is unchanged), but there is no assertion that changing the file's content after a budget-skip causes the next capture to re-detect it and warn again. Without that test, a future regression that silently advances the snapshot on content changes (e.g. always using the old hash instead of the new one) would go unnoticed. A short extension of the existing test that mutates the `files` map and asserts `read_count == 2` on the third capture would cover this path.

How can I resolve this? If you propose a fix, please make it concise.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

/// changed, re-read its bytes out of the VM, and re-warn. Without the
/// fix, the second capture re-reads the over-budget file.
#[tokio::test]
async fn over_budget_file_advances_snapshot_and_is_not_reread() {
let big = Bytes::from(vec![7u8; 4096]);
let path = format!("{MNT_DATA}/big.bin");
let mut files = HashMap::new();
files.insert(path.clone(), big);
let fake = Arc::new(FakeSession::new(files));
let handle = SessionHandle::new("sess".into(), Box::new(FakeSessionProxy(fake.clone())));

// Cap below the file size so it's always over budget.
let config = ContainersConfig {
max_bytes_per_session: 1024,
..ContainersConfig::default()
};
let session = test_session(handle, config);

// First capture: detects the change, reads it once, finds it over
// budget, skips it (no ref returned) but records the snapshot.
let refs = session.capture_changes().await.unwrap();
assert!(refs.is_empty(), "over-budget file must not be captured");
assert_eq!(fake.read_count(&path), 1, "expected exactly one read");

// Second capture with the file unchanged: must NOT re-read it.
let refs = session.capture_changes().await.unwrap();
assert!(refs.is_empty());
assert_eq!(
fake.read_count(&path),
1,
"snapshot was not advanced: over-budget file re-read on the next command"
);
}

/// Thin wrapper so the test can keep an `Arc` to the fake for
/// assertions while still handing an owned `Box<dyn ShellSession>` to
/// `SessionHandle`.
Comment on lines +1292 to +1307
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 FakeSession.exec ignores the command string

FakeSession::exec always returns the full file listing regardless of the command text. capture_changes currently issues exactly one exec (for list_mnt_data), so the test passes today. If capture_changes ever gains a second exec call (e.g. a cleanup command, a size check), the fake will return listing-shaped stdout for every call, silently corrupting the parse and making the test unreliable. Checking _cmd.command.contains("sha256sum") and returning an empty/error stream otherwise would make the stub more defensive at negligible cost.

Prompt To Fix With AI
This is a comment left during a code review.
Path: src/services/container_session.rs
Line: 1292-1307

Comment:
**`FakeSession.exec` ignores the command string**

`FakeSession::exec` always returns the full file listing regardless of the command text. `capture_changes` currently issues exactly one `exec` (for `list_mnt_data`), so the test passes today. If `capture_changes` ever gains a second `exec` call (e.g. a cleanup command, a size check), the fake will return listing-shaped stdout for every call, silently corrupting the parse and making the test unreliable. Checking `_cmd.command.contains("sha256sum")` and returning an empty/error stream otherwise would make the stub more defensive at negligible cost.

How can I resolve this? If you propose a fix, please make it concise.

struct FakeSessionProxy(Arc<FakeSession>);

#[async_trait]
impl ShellSession for FakeSessionProxy {
async fn exec(&self, cmd: ExecRequest) -> RuntimeResult<ExecHandle> {
self.0.exec(cmd).await
}
async fn read_file(&self, path: &str) -> RuntimeResult<Bytes> {
self.0.read_file(path).await
}
async fn write_file(&self, path: &str, bytes: Bytes) -> RuntimeResult<()> {
self.0.write_file(path, bytes).await
}
async fn terminate(&self) -> RuntimeResult<()> {
self.0.terminate().await
}
}
}
Loading