flyingrobots · flyingrobots · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
@@ -220,12 +220,17 @@ jobs:
       - name: Run benchmarks
         run: |
           cargo bench -p warp-benches --bench materialization_hotpath -- --output-format bencher | tee perf.log
+      - name: Check regression against baseline
+        run: |
+          node scripts/check_perf_regression.cjs perf-baseline.json perf.log --threshold 15
       - name: Upload perf artifacts
         if: always()
         uses: actions/upload-artifact@v4
         with:
           name: perf-artifacts
-          path: perf.log
+          path: |
+            perf.log
+            perf-report.json
 
   build-repro:
     name: G4 build reproducibility (wasm)

@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: Apache-2.0
+# © James Ross Ω FLYING•ROBOTS <https://github.com/flyingrobots>
+#
+# Auto-update perf-baseline.json on main after merges that touch DET-critical
+# or DET-important code. Creates a PR with the new baseline so it's reviewed
+# (never force-pushes or commits directly to main).
+name: Update perf baseline
+
+on:
+  push:
+    branches: [main]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: perf-baseline-update
+  cancel-in-progress: true
+
+jobs:
+  update-baseline:
+    name: Update perf baseline
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Check if perf-relevant files changed
+        id: changed
+        run: |
+          CHANGED=$(git diff --name-only HEAD~1..HEAD -- 'crates/**/*.rs' 'crates/**/Cargo.toml' || true)
+          if [ -z "$CHANGED" ]; then
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+            echo "No Rust source changes — skipping baseline update"
+          else
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Setup Rust
+        if: steps.changed.outputs.skip != 'true'
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Run benchmarks
+        if: steps.changed.outputs.skip != 'true'
+        run: |
+          cargo bench -p warp-benches --bench materialization_hotpath -- --output-format bencher | tee perf.log
+
+      - name: Generate baseline JSON
+        id: generate
+        if: steps.changed.outputs.skip != 'true'
+        run: |
+          node scripts/generate_perf_baseline.cjs perf.log > perf-baseline-new.json
+          if diff -q perf-baseline.json perf-baseline-new.json >/dev/null 2>&1; then
+            echo "Baseline unchanged — no PR needed"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            mv perf-baseline-new.json perf-baseline.json
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Create baseline PR
+        if: steps.changed.outputs.skip != 'true' && steps.generate.outputs.skip != 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          BRANCH="chore/perf-baseline-$(date +%Y%m%d)-$(git rev-parse --short HEAD)"
+          git checkout -b "$BRANCH"
+          git add perf-baseline.json
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git commit -m "chore(perf): update perf-baseline.json from $(git rev-parse --short HEAD~1)"
+          git push origin "$BRANCH"
+          gh pr create \
+            --title "chore(perf): update perf baseline" \
+            --body "Auto-generated baseline update from main push $(git rev-parse HEAD~1)." \
+            --base main \
+            --head "$BRANCH"
@@ -5,6 +5,25 @@
 
 ## Unreleased
 
+### CI: G3 perf regression gate (#280)
+
+- **CI:** G3 perf regression gate now compares criterion benchmark output
+  against a git-tracked `perf-baseline.json` and fails if any benchmark
+  regresses beyond 15% (configurable via `--threshold`). Structured
+  `perf-report.json` artifact uploaded alongside raw `perf.log`.
+- **CI:** New `perf-baseline-update.yml` workflow auto-generates baseline
+  update PRs on main pushes that touch Rust sources.
+- **Scripts:** Added `check_perf_regression.cjs` (gate comparison) and
+  `generate_perf_baseline.cjs` (baseline generation from bencher output).
+
+### Docs: Allowlist governance (#287)
+
+- **Policy:** Added "Determinism Allowlist Governance" section to
+  `docs/RELEASE_POLICY.md` documenting acceptable exemption criteria,
+  approval requirements, and audit cadence for `.ban-nondeterminism-allowlist`.
+- **Scripts:** Added cross-reference from `ban-nondeterminism.sh` header to
+  the governance policy.
+
 ### Docs Polish (#41)
 
 - **License:** Renamed SPDX identifier `MIND-UCAL-1.0` →

@@ -69,6 +69,55 @@ A gate may be marked VERIFIED only with immutable pointers:
 
 No immutable evidence => gate must be INFERRED or UNVERIFIED.
 
+## Determinism Allowlist Governance
+
+The static inspection gate (G1 / DET-001) uses `scripts/ban-nondeterminism.sh`
+to scan DET-critical crate sources for nondeterministic API patterns. A
+file-level allowlist (`.ban-nondeterminism-allowlist`) may exempt specific paths
+from this scan.
+
+### Allowlist Location and Format
+
+- **File:** `.ban-nondeterminism-allowlist` (project root)
+- **Format:** One file path per line. Blank lines and `#`-prefixed comments are
+  ignored. Inline justifications may follow the path, separated by whitespace.
+- **Env override:** `DETERMINISM_ALLOWLIST` (defaults to
+  `.ban-nondeterminism-allowlist`).
+
+### When an Exemption Is Acceptable
+
+An allowlist entry is appropriate **only** when all of the following hold:
+
+1. The nondeterministic API is **not reachable from the WASM deterministic
+   execution path** (e.g., native-only tooling, test-only I/O, build-time
+   configuration read once at startup).
+2. The call site is **guarded** by a feature gate, `#[cfg(test)]`, or an
+   explicit runtime assertion that prevents it from executing in the
+   deterministic engine loop.
+3. Refactoring to remove the API usage would introduce **worse** architectural
+   trade-offs (e.g., duplicating an entire module to avoid a single `std::fs`
+   call in a CLI-only code path).
+
+If the API is reachable from the deterministic engine loop under any
+configuration, **do not allowlist it. Refactor instead.**
+
+### Approval Requirements
+
+- Every new allowlist entry **must** include an inline justification explaining
+  why the exemption is safe.
+- The entry must be approved by the **Architect** or **crate owner** as defined
+  in `det-policy.yaml` for the affected crate.
+- PRs adding allowlist entries must tag the determinism label and reference this
+  policy section.
+
+### Audit
+
+- Existing entries are reviewed during each milestone closeout.
+- Entries whose justification no longer holds (e.g., the guarding feature gate
+  was removed) must be deleted and the underlying code refactored.
+- The `check_task_lists.sh` pre-commit hook does **not** cover allowlist
+  auditing; this is a manual review gate.
+
 ## Escalation
 
 If staging/prod blocker state conflicts with recommendation:

@@ -0,0 +1,2 @@
+{
+}
@@ -11,6 +11,9 @@ set -euo pipefail
 # Optional env:
 #   DETERMINISM_PATHS="crates/warp-core crates/warp-wasm crates/echo-wasm-abi"
 #   DETERMINISM_ALLOWLIST=".ban-nondeterminism-allowlist"
+#
+# Allowlist governance: see docs/RELEASE_POLICY.md § "Determinism Allowlist Governance"
+# for approval requirements, acceptable exemption criteria, and audit cadence.
 
 ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 cd "$ROOT"

@@ -0,0 +1,169 @@
+#!/usr/bin/env node
+// SPDX-License-Identifier: Apache-2.0
+// © James Ross Ω FLYING•ROBOTS <https://github.com/flyingrobots>
+//
+// G3 perf regression gate: compare current criterion bencher output against a
+// git-tracked baseline and fail if any benchmark regresses beyond the allowed
+// threshold.
+//
+// Usage:
+//   node scripts/check_perf_regression.cjs <baseline.json> <current.log> [--threshold 15]
+//
+// Baseline format (perf-baseline.json):
+//   { "<bench_name>": <median_ns>, ... }
+//
+// Current format (criterion --output-format bencher):
+//   test <bench_name> ... bench:   <N> ns/iter (+/- <M>)
+//
+// Exit codes:
+//   0 — no regressions above threshold
+//   1 — one or more regressions above threshold
+//   2 — usage error
+
+"use strict";
+
+const fs = require("fs");
+
+const USAGE = "Usage: node scripts/check_perf_regression.cjs <baseline.json> <current.log> [--threshold <percent>]";
+
+function parseArgs(argv) {
+  const args = argv.slice(2);
+  let threshold = 15;
+  const positional = [];
+
+  for (let i = 0; i < args.length; i++) {
+    if (args[i] === "--threshold" && i + 1 < args.length) {
+      threshold = Number(args[++i]);
+      if (Number.isNaN(threshold) || threshold <= 0) {
+        console.error("ERROR: --threshold must be a positive number");
+        process.exit(2);
+      }
+    } else if (args[i].startsWith("-")) {
+      console.error(`ERROR: unknown flag: ${args[i]}`);
+      console.error(USAGE);
+      process.exit(2);
+    } else {
+      positional.push(args[i]);
+    }
+  }
+
+  if (positional.length !== 2) {
+    console.error(USAGE);
+    process.exit(2);
+  }
+
+  return { baselinePath: positional[0], currentPath: positional[1], threshold };
+}
+
+/** Parse criterion bencher output into { name: median_ns } */
+function parseBencherOutput(text) {
+  const results = {};
+  // Format: "test <name> ... bench:       <N> ns/iter (+/- <M>)"
+  const re = /^test\s+(\S+)\s+\.\.\.\s+bench:\s+([\d,]+)\s+ns\/iter/gm;
+  let match;
+  while ((match = re.exec(text)) !== null) {
+    const name = match[1];
+    const ns = Number(match[2].replace(/,/g, ""));
+    results[name] = ns;
+  }
+  return results;
+}
+
+function main() {
+  const { baselinePath, currentPath, threshold } = parseArgs(process.argv);
+
+  if (!fs.existsSync(baselinePath)) {
+    console.log(`No baseline found at ${baselinePath} — recording current run as baseline.`);
+    console.log("G3: SKIP (no baseline to compare against)");
+    process.exit(0);
+  }
+
+  const baseline = JSON.parse(fs.readFileSync(baselinePath, "utf-8"));
+  const currentText = fs.readFileSync(currentPath, "utf-8");
+  const current = parseBencherOutput(currentText);
+
+  const benchNames = Object.keys(current);
+  if (benchNames.length === 0) {
+    console.error("ERROR: no benchmark results found in current output");
+    process.exit(2);
+  }
+
+  console.log(`G3 perf regression gate (threshold: ${threshold}%)`);
+  console.log("─".repeat(72));
+
+  const report = [];
+  let regressions = 0;
+
+  for (const name of benchNames) {
+    const cur = current[name];
+    const base = baseline[name];
+
+    if (base == null) {
+      report.push({ name, cur, base: null, delta: null, status: "NEW" });
+      continue;
+    }
+
+    const deltaPct = ((cur - base) / base) * 100;
+    const regressed = deltaPct > threshold;
+    if (regressed) regressions++;
+
+    report.push({
+      name,
+      cur,
+      base,
+      delta: deltaPct,
+      status: regressed ? "REGRESSED" : "OK",
+    });
+  }
+
+  // Fail when baseline benchmarks disappear from the current run.
+  // This prevents silent bypass of regression enforcement via benchmark
+  // renames/removals. To resolve: update perf-baseline.json to remove
+  // the stale entry (via the baseline update workflow or manually).
+  for (const name of Object.keys(baseline)) {
+    if (current[name] == null) {
+      regressions++;
+      report.push({ name, cur: null, base: baseline[name], delta: null, status: "MISSING" });
+    }
+  }
+
+  // Print table
+  const nameWidth = Math.max(12, ...report.map((r) => r.name.length));
+  const header = `${"Benchmark".padEnd(nameWidth)}  ${"Baseline".padStart(12)}  ${"Current".padStart(12)}  ${"Delta".padStart(8)}  Status`;
+  console.log(header);
+  console.log("─".repeat(header.length));
+
+  for (const r of report) {
+    const baseStr = r.base != null ? `${r.base} ns` : "—";
+    const curStr = r.cur != null ? `${r.cur} ns` : "—";
+    const deltaStr = r.delta != null ? `${r.delta > 0 ? "+" : ""}${r.delta.toFixed(1)}%` : "—";
+    const statusStr =
+      r.status === "REGRESSED" ? `FAIL (>${threshold}%)` :
+      r.status === "MISSING" ? "FAIL (missing)" :
+      r.status;
+    console.log(
+      `${r.name.padEnd(nameWidth)}  ${baseStr.padStart(12)}  ${curStr.padStart(12)}  ${deltaStr.padStart(8)}  ${statusStr}`
+    );
+  }
+
+  console.log("─".repeat(header.length));
+
+  // Write structured report
+  const reportObj = {
+    threshold_pct: threshold,
+    benchmarks: report,
+    regressions,
+    passed: regressions === 0,
+  };
+  fs.writeFileSync("perf-report.json", JSON.stringify(reportObj, null, 2) + "\n");
+  console.log("\nWrote perf-report.json");
+
+  if (regressions > 0) {
+    console.error(`\nG3: FAILED — ${regressions} benchmark(s) regressed beyond ${threshold}% threshold`);
+    process.exit(1);
+  }
+
+  console.log(`\nG3: PASSED — all benchmarks within ${threshold}% of baseline`);
+}
+
+main();