Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions lib/diskutilization/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Disk Utilization

This package measures the actual on-disk footprint that Hypeman is consuming so operators can answer a different question than the existing allocation metrics.

`hypeman_resources_disk_breakdown_bytes` remains the allocation and provisioned view.
`hypeman_disk_utilization_bytes` is the actual filesystem utilization view.

The utilization metric reports bytes allocated on disk for these components:

- `images`
- `oci_cache`
- `volumes`
- `rootfs_overlays`
- `volume_overlays`
- `snapshot_uncompressed`
- `snapshot_compressed`
- `snapshot_other`

## How Measurement Works

The measurement is done by walking only the known Hypeman storage roots instead of scanning the whole data filesystem.

- `images` is measured from exported image disk files such as `rootfs.erofs` or `rootfs.ext4`
- `oci_cache` is measured from the OCI cache tree
- `volumes` is measured from volume `data.raw` files
- `rootfs_overlays` is measured from each guest `overlay.raw`
- `volume_overlays` is measured from each guest `vol-overlays/*.raw`
- snapshots are measured from each guest `snapshots/snapshot-latest` directory

The measurement is based on filesystem allocated blocks rather than logical file size. That means sparse disks and overlays report the bytes they really occupy on disk, not the size they were provisioned with.

Concretely, the collector reads filesystem metadata and uses the allocated block count for each file or directory entry, so the result reflects actual blocks consumed on disk. Directory walks are limited to Hypeman-managed paths that are already known from the data layout.

Snapshots are classified by the memory artifact present in `snapshot-latest`:

- `snapshot_compressed` for compressed memory files such as `memory-ranges.zst` or `memory-ranges.lz4`
- `snapshot_uncompressed` for raw `memory-ranges`
- `snapshot_other` when a snapshot directory exists but does not match a recognized memory artifact shape

The full `snapshot-latest` directory is counted once under its classified snapshot component so the metric includes related config and state files, not just the memory artifact itself.

## How It Is Stored For Metrics

This package returns a per-component breakdown to the resource monitoring refresh loop. The refresh loop stores the latest measured values in the in-memory monitoring snapshot, and the Prometheus callback only reads that cached snapshot.

That means:

- the expensive filesystem work happens on the refresh interval
- the `/metrics` scrape path does not walk the filesystem
- each scrape simply emits the latest cached component values

## Efficiency Expectations

This should be efficient enough for the current design because it avoids whole-filesystem scans and avoids any disk walking during Prometheus scrapes.

The main cost is the periodic walk over Hypeman-managed storage roots. That is still proportional to the number of tracked files and snapshot directories, so it is not free, but it is bounded and predictable. For v1, this is a good tradeoff: correct sparse-file accounting, accurate snapshot classification, cheap scrapes, and much lower complexity than a change-tracking cache.
229 changes: 229 additions & 0 deletions lib/diskutilization/diskutilization.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
package diskutilization

import (
"io/fs"
"os"
"path/filepath"
"syscall"

"github.com/kernel/hypeman/lib/paths"
)

const (
ComponentImages = "images"
ComponentOCICache = "oci_cache"
ComponentVolumes = "volumes"
ComponentRootfsOverlays = "rootfs_overlays"
ComponentVolumeOverlays = "volume_overlays"
ComponentSnapshotUncompressed = "snapshot_uncompressed"
ComponentSnapshotCompressed = "snapshot_compressed"
ComponentSnapshotOther = "snapshot_other"
)

type Breakdown struct {
Images int64
OCICache int64
Volumes int64
RootfsOverlays int64
VolumeOverlays int64
SnapshotUncompressed int64
SnapshotCompressed int64
SnapshotOther int64
}

func (b Breakdown) Components() map[string]int64 {
return map[string]int64{
ComponentImages: b.Images,
ComponentOCICache: b.OCICache,
ComponentVolumes: b.Volumes,
ComponentRootfsOverlays: b.RootfsOverlays,
ComponentVolumeOverlays: b.VolumeOverlays,
ComponentSnapshotUncompressed: b.SnapshotUncompressed,
ComponentSnapshotCompressed: b.SnapshotCompressed,
ComponentSnapshotOther: b.SnapshotOther,
}
}

func Collect(p *paths.Paths) (Breakdown, error) {
var breakdown Breakdown

var err error
breakdown.Images, err = sumMatchingFilesAllocatedBytes(p.ImagesDir(), func(path string, entry fs.DirEntry) bool {
if entry.IsDir() {
return false
}
name := entry.Name()
return name == "rootfs.erofs" || name == "rootfs.ext4"
})
if err != nil {
return Breakdown{}, err
}

breakdown.OCICache, err = sumTreeAllocatedBytes(p.SystemOCICache())
if err != nil {
return Breakdown{}, err
}

breakdown.Volumes, err = sumDirectChildFileAllocatedBytes(p.VolumesDir(), "data.raw")
if err != nil {
return Breakdown{}, err
}

guestEntries, err := os.ReadDir(p.GuestsDir())
if err != nil {
if os.IsNotExist(err) {
return breakdown, nil
}
return Breakdown{}, err
}

for _, guest := range guestEntries {
if !guest.IsDir() {
continue
}

instanceID := guest.Name()

breakdown.RootfsOverlays += allocatedBytesForPath(p.InstanceOverlay(instanceID))

volumeOverlays, err := sumMatchingFilesAllocatedBytes(p.InstanceVolumeOverlaysDir(instanceID), func(path string, entry fs.DirEntry) bool {
return !entry.IsDir() && filepath.Ext(entry.Name()) == ".raw"
})
if err != nil {
return Breakdown{}, err
}
breakdown.VolumeOverlays += volumeOverlays

snapshotDir := p.InstanceSnapshotLatest(instanceID)
classification, exists, err := classifySnapshotDir(snapshotDir)
if err != nil {
return Breakdown{}, err
}
if !exists {
continue
}

snapshotBytes, err := sumTreeAllocatedBytes(snapshotDir)
if err != nil {
return Breakdown{}, err
}

switch classification {
case ComponentSnapshotCompressed:
breakdown.SnapshotCompressed += snapshotBytes
case ComponentSnapshotUncompressed:
breakdown.SnapshotUncompressed += snapshotBytes
default:
breakdown.SnapshotOther += snapshotBytes
}
}

return breakdown, nil
}

func classifySnapshotDir(snapshotDir string) (component string, exists bool, err error) {
info, err := os.Stat(snapshotDir)
if err != nil {
if os.IsNotExist(err) {
return "", false, nil
}
return "", false, err
}
if !info.IsDir() {
return ComponentSnapshotOther, true, nil
}

switch {
case pathExists(filepath.Join(snapshotDir, "memory-ranges.zst")):
return ComponentSnapshotCompressed, true, nil
case pathExists(filepath.Join(snapshotDir, "memory-ranges.lz4")):
return ComponentSnapshotCompressed, true, nil
case pathExists(filepath.Join(snapshotDir, "memory-ranges")):
return ComponentSnapshotUncompressed, true, nil
default:
return ComponentSnapshotOther, true, nil
}
}

func sumDirectChildFileAllocatedBytes(root string, childFile string) (int64, error) {
entries, err := os.ReadDir(root)
if err != nil {
if os.IsNotExist(err) {
return 0, nil
}
return 0, err
}

var total int64
for _, entry := range entries {
if !entry.IsDir() {
continue
}
total += allocatedBytesForPath(filepath.Join(root, entry.Name(), childFile))
}

return total, nil
}

func sumMatchingFilesAllocatedBytes(root string, match func(path string, entry fs.DirEntry) bool) (int64, error) {
var total int64
err := filepath.WalkDir(root, func(path string, entry fs.DirEntry, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
if match(path, entry) {
total += allocatedBytesForPath(path)
}
return nil
})
if err != nil {
if os.IsNotExist(err) {
return 0, nil
}
return 0, err
}
return total, nil
}

func sumTreeAllocatedBytes(root string) (int64, error) {
var total int64
err := filepath.WalkDir(root, func(path string, entry fs.DirEntry, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
total += allocatedBytesForPath(path)
return nil
})
if err != nil {
if os.IsNotExist(err) {
return 0, nil
}
return 0, err
}
return total, nil
}

func allocatedBytesForPath(path string) int64 {
info, err := os.Lstat(path)
if err != nil {
return 0
}

stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return 0
}

return stat.Blocks * 512
}

func pathExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}
Loading
Loading