From 38cf9bde799ec7e8c5b47b261e0ff986a14dd67f Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Mon, 25 May 2026 17:39:08 +1000
Subject: [PATCH 01/11] feat: port devloop to bun opentui

---
 bun.lock              |  61 ++++++
 bunfig.toml           |   5 +
 devloop.sh            | 454 --------------------------------------
 package.json          |  19 ++
 src/cli.ts            |  46 ++++
 src/devloop.ts        | 452 ++++++++++++++++++++++++++++++++++++++
 src/tui.ts            |  68 ++++++
 tests/devloop.test.ts | 329 ++++++++++++++++++++++++++++
 tests/devloop_test.sh | 495 +-----------------------------------------
 tsconfig.json         |  14 ++
 10 files changed, 995 insertions(+), 948 deletions(-)
 create mode 100644 bun.lock
 create mode 100644 bunfig.toml
 delete mode 100755 devloop.sh
 create mode 100644 package.json
 create mode 100755 src/cli.ts
 create mode 100644 src/devloop.ts
 create mode 100644 src/tui.ts
 create mode 100644 tests/devloop.test.ts
 create mode 100644 tsconfig.json

diff --git a/bun.lock b/bun.lock
new file mode 100644
index 0000000..9c82dff
--- /dev/null
+++ b/bun.lock
@@ -0,0 +1,61 @@
+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "devloop",
+      "dependencies": {
+        "@opentui/core": "^0.2.15",
+      },
+      "devDependencies": {
+        "@types/bun": "^1.3.1",
+        "typescript": "^5.9.3",
+      },
+    },
+  },
+  "packages": {
+    "@opentui/core": ["@opentui/core@0.2.15", "", { "dependencies": { "bun-ffi-structs": "0.2.2", "diff": "9.0.0", "marked": "17.0.1", "string-width": "7.2.0", "strip-ansi": "7.1.2", "yoga-layout": "3.2.1" }, "optionalDependencies": { "@opentui/core-darwin-arm64": "0.2.15", "@opentui/core-darwin-x64": "0.2.15", "@opentui/core-linux-arm64": "0.2.15", "@opentui/core-linux-x64": "0.2.15", "@opentui/core-win32-arm64": "0.2.15", "@opentui/core-win32-x64": "0.2.15" }, "peerDependencies": { "web-tree-sitter": "0.25.10" } }, "sha512-YGHttdZWScMcSvtYgZkLR6VhUO1OoUiQzwYjZgIusf5eCkPLD8PapH+PTMVqAiX16CHO6JxfMlkHv5qDiHAccQ=="],
+
+    "@opentui/core-darwin-arm64": ["@opentui/core-darwin-arm64@0.2.15", "", { "os": "darwin", "cpu": "arm64" }, "sha512-s25f9GmZd6wxNM5ExRmwwnLT+NLCKxnTWuO9aObOlqsXfLMGHQZrb6YwgAn/PSTua98KmH7GJCVWdPgZ/P+0RQ=="],
+
+    "@opentui/core-darwin-x64": ["@opentui/core-darwin-x64@0.2.15", "", { "os": "darwin", "cpu": "x64" }, "sha512-GyaipN+nOcEr8rcTO2mqKTGmOBk0C300I69fLtubD3BadHcMI1DVNlQrcf/J1mkQEuMYbmBTi/1hT1ybWGr2Mw=="],
+
+    "@opentui/core-linux-arm64": ["@opentui/core-linux-arm64@0.2.15", "", { "os": "linux", "cpu": "arm64" }, "sha512-h+uyufselGT4afKMP8Lg4yUl5Kp+DJBlhu3XpWXhphE5Pnq5+f0uGBr4P+34CNcWxMsDnvagSQLFRCS4rGrOWA=="],
+
+    "@opentui/core-linux-x64": ["@opentui/core-linux-x64@0.2.15", "", { "os": "linux", "cpu": "x64" }, "sha512-jx+NImPq4wSp3Apfe7tlixiEJNnRyECTRJRWhGF6ZJz4PwFfgK2UHZKYR0DZHbV8nYawoDNQPJDXEWcoZShnMg=="],
+
+    "@opentui/core-win32-arm64": ["@opentui/core-win32-arm64@0.2.15", "", { "os": "win32", "cpu": "arm64" }, "sha512-2SQQLvf3sgmToxrNika9AdcccKrjPJEn5jW6sSv0oEixNBzUzW41vSZZG4LM/V3lL8eg0LoYDnRZeKLB4gwSqQ=="],
+
+    "@opentui/core-win32-x64": ["@opentui/core-win32-x64@0.2.15", "", { "os": "win32", "cpu": "x64" }, "sha512-SVMVgnC7LVEm+yVZKdmmhRBj/xAT94PanT+UCcHxaCWK+OLmv/AX+ohHq2m0odup6iXcEqj+7mAltO9fgJLFIg=="],
+
+    "@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="],
+
+    "@types/node": ["@types/node@25.9.1", "", { "dependencies": { "undici-types": ">=7.24.0 <7.24.7" } }, "sha512-xfrlY7UD5rMJk3ZVJP8BNzS28J36YJg+xp+LPXV1TdWxr8uMH5A860QNxYDGQe/ylDSgjxE52Q9VnO7p75tJxg=="],
+
+    "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="],
+
+    "bun-ffi-structs": ["bun-ffi-structs@0.2.2", "", { "peerDependencies": { "typescript": "^5" } }, "sha512-N/ZWtyN0piZlrXQT7TO0V+q952orYqkfhXRXM1Hcbb+R3QSiBH4vLnib187Mrs1H7pWIYECAmPeapGYDOMCl+w=="],
+
+    "bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="],
+
+    "diff": ["diff@9.0.0", "", {}, "sha512-svtcdpS8CgJyqAjEQIXdb3OjhFVVYjzGAPO8WGCmRbrml64SPw/jJD4GoE98aR7r25A0XcgrK3F02yw9R/vhQw=="],
+
+    "emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="],
+
+    "get-east-asian-width": ["get-east-asian-width@1.6.0", "", {}, "sha512-QRbvDIbx6YklUe6RxeTeleMR0yv3cYH6PsPZHcnVn7xv7zO1BHN8r0XETu8n6Ye3Q+ahtSarc3WgtNWmehIBfA=="],
+
+    "marked": ["marked@17.0.1", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-boeBdiS0ghpWcSwoNm/jJBwdpFaMnZWRzjA6SkUMYb40SVaN1x7mmfGKp0jvexGcx+7y2La5zRZsYFZI6Qpypg=="],
+
+    "string-width": ["string-width@7.2.0", "", { "dependencies": { "emoji-regex": "^10.3.0", "get-east-asian-width": "^1.0.0", "strip-ansi": "^7.1.0" } }, "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ=="],
+
+    "strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="],
+
+    "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
+
+    "undici-types": ["undici-types@7.24.6", "", {}, "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg=="],
+
+    "web-tree-sitter": ["web-tree-sitter@0.25.10", "", { "peerDependencies": { "@types/emscripten": "^1.40.0" }, "optionalPeers": ["@types/emscripten"] }, "sha512-Y09sF44/13XvgVKgO2cNDw5rGk6s26MgoZPXLESvMXeefBf7i6/73eFurre0IsTW6E14Y0ArIzhUMmjoc7xyzA=="],
+
+    "yoga-layout": ["yoga-layout@3.2.1", "", {}, "sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ=="],
+  }
+}
diff --git a/bunfig.toml b/bunfig.toml
new file mode 100644
index 0000000..72f3051
--- /dev/null
+++ b/bunfig.toml
@@ -0,0 +1,5 @@
+[test]
+coverage = true
+coverageThreshold = { lines = 1.0, functions = 1.0, statements = 1.0 }
+coverageReporter = ["text", "lcov"]
+coverageSkipTestFiles = true
diff --git a/devloop.sh b/devloop.sh
deleted file mode 100755
index 77463bb..0000000
--- a/devloop.sh
+++ /dev/null
@@ -1,454 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-# devloop.sh — codex implements, claude reviews, loop till ACCEPT/max/stall.
-# Usage: devloop.sh [--report-format html|markdown] <spec.md> [max]
-
-usage() {
-  echo "usage: devloop.sh [--report-format html|markdown] <spec.md> [max=5]" >&2
-}
-
-REPORT_FORMAT="html"
-SPEC=""
-MAX_RAW="5"
-MAX_SET=0
-
-while (($#)); do
-  case "$1" in
-    --report-format)
-      shift
-      [[ $# -gt 0 ]] || { usage; exit 2; }
-      REPORT_FORMAT="$1"
-      ;;
-    --html)
-      REPORT_FORMAT="html"
-      ;;
-    --markdown|--md)
-      REPORT_FORMAT="markdown"
-      ;;
-    -h|--help)
-      usage
-      exit 0
-      ;;
-    --*)
-      echo "unknown option: $1" >&2
-      usage
-      exit 2
-      ;;
-    *)
-      if [[ -z "$SPEC" ]]; then
-        SPEC="$1"
-      elif (( MAX_SET == 0 )); then
-        MAX_RAW="$1"
-        MAX_SET=1
-      else
-        usage
-        exit 2
-      fi
-      ;;
-  esac
-  shift
-done
-
-case "$REPORT_FORMAT" in
-  html|markdown) ;;
-  md) REPORT_FORMAT="markdown" ;;
-  *) echo "report format must be html or markdown" >&2; exit 2 ;;
-esac
-
-[[ -z "$SPEC" || ! -f "$SPEC" ]] && { usage; exit 2; }
-
-[[ "$MAX_RAW" =~ ^[+-]?[0-9]+$ ]] || { echo "max must be an integer between 1 and 10" >&2; exit 2; }
-MAX_SIGN=1
-MAX_DIGITS="$MAX_RAW"
-case "$MAX_DIGITS" in
-  -*) MAX_SIGN=-1; MAX_DIGITS="${MAX_DIGITS#-}" ;;
-  +*) MAX_DIGITS="${MAX_DIGITS#+}" ;;
-esac
-MAX=$(( MAX_SIGN * 10#$MAX_DIGITS ))
-(( MAX < 1 )) && MAX=1; (( MAX > 10 )) && MAX=10
-
-command -v claude >/dev/null || { echo "claude not on PATH" >&2; exit 2; }
-command -v codex  >/dev/null || { echo "codex not on PATH" >&2; exit 2; }
-
-RUN_DIR=$(pwd -P)
-SPEC=$(cd "$(dirname "$SPEC")" && pwd)/$(basename "$SPEC")
-REPO=$(git -C "$RUN_DIR" rev-parse --show-toplevel 2>/dev/null) \
-  || { echo "current directory is not inside a git repo" >&2; exit 2; }
-cd "$REPO"
-
-SLUG=$(basename "$SPEC" .md)
-BRANCH=$(git rev-parse --abbrev-ref HEAD)
-BASE=$(git symbolic-ref --short refs/remotes/origin/HEAD 2>/dev/null | sed 's|^origin/||' \
-       || (git show-ref --verify -q refs/heads/main && echo main) \
-       || (git show-ref --verify -q refs/heads/master && echo master) \
-       || echo main)
-
-mkdir -p .codex/tracks .codex/reviews .codex/reports .codex/logs .codex/sessions
-TRACK=".codex/tracks/$SLUG.md"
-if [[ "$REPORT_FORMAT" == "html" ]]; then
-  REPORT=".codex/reports/$SLUG.html"
-else
-  REPORT=".codex/reports/$SLUG.md"
-fi
-CODEX_SESSION_FILE=".codex/sessions/$SLUG-codex.id"
-CLAUDE_SESSION_FILE=".codex/sessions/$SLUG-claude.id"
-
-[[ -f "$TRACK" ]] || cat > "$TRACK" <<EOF
-# Track: $SLUG
-
-- spec: $SPEC
-- cwd: $RUN_DIR
-- base: $BASE
-- branch: $BRANCH
-- max: $MAX
-- report-format: $REPORT_FORMAT
-- started: $(date -u +%Y-%m-%dT%H:%M:%SZ)
-
-EOF
-
-log() { printf '\033[36m[devloop]\033[0m %s\n' "$*" >&2; }
-
-read_one_line() {
-  local path="$1" value=""
-  [[ -f "$path" ]] || return 0
-  IFS= read -r value < "$path" || true
-  printf '%s' "$value"
-}
-
-write_one_line() {
-  local path="$1" value="$2"
-  printf '%s\n' "$value" > "$path"
-}
-
-new_uuid() {
-  if command -v uuidgen >/dev/null; then
-    uuidgen | tr '[:upper:]' '[:lower:]'
-    return
-  fi
-  if command -v python3 >/dev/null; then
-    python3 -c 'import uuid; print(uuid.uuid4())'
-    return
-  fi
-  echo "uuidgen or python3 not on PATH" >&2
-  return 127
-}
-
-extract_session_id() {
-  local log_file="$1"
-  # Codex currently prints the resumable UUID in human-readable session/thread
-  # banners, commonly "To continue this session..." or "session id/thread_id".
-  # If those banners change, fail loudly instead of starting a fresh fix session.
-  grep -Ei '(session.?id|thread_id|codex exec resume|codex resume|To continue this session)' "$log_file" \
-    | grep -Eio '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' \
-    | tail -n 1
-}
-
-assert_repo_cwd() {
-  local cwd
-  cwd=$(pwd -P)
-  [[ "$cwd" == "$REPO" ]] || { echo "internal error: expected cwd $REPO, got $cwd" >&2; return 1; }
-}
-
-run_codex() {
-  local log_file="$1"; shift
-  local session_id
-  assert_repo_cwd || return
-  session_id=$(read_one_line "$CODEX_SESSION_FILE")
-
-  if [[ -n "$session_id" ]]; then
-    codex exec resume --dangerously-bypass-approvals-and-sandbox "$session_id" - 2>&1 | tee "$log_file"
-    return
-  fi
-
-  codex exec --dangerously-bypass-approvals-and-sandbox -C "$REPO" - 2>&1 | tee "$log_file"
-  session_id=$(extract_session_id "$log_file" || true)
-  [[ -n "$session_id" ]] || { echo "could not determine codex session id from $log_file" >&2; return 1; }
-  write_one_line "$CODEX_SESSION_FILE" "$session_id"
-  log "codex session: $session_id"
-}
-
-run_claude() {
-  local log_file="$1"; shift
-  local session_id
-  session_id=$(read_one_line "$CLAUDE_SESSION_FILE")
-
-  if [[ -n "$session_id" ]]; then
-    claude -p --resume "$session_id" --dangerously-skip-permissions --add-dir "$REPO" 2>&1 | tee "$log_file" >/dev/null
-    return
-  fi
-
-  session_id=$(new_uuid) || return
-  claude -p --session-id "$session_id" --dangerously-skip-permissions --add-dir "$REPO" 2>&1 | tee "$log_file" >/dev/null
-  write_one_line "$CLAUDE_SESSION_FILE" "$session_id"
-  log "claude session: $session_id"
-}
-
-list_reviews() {
-  local i file
-  for ((i=1; i<=MAX; i++)); do
-    file=".codex/reviews/$SLUG-r$i.md"
-    [[ -f "$file" ]] && printf -- '- %s\n' "$file"
-  done
-  return 0
-}
-
-hash_stdin() {
-  if command -v sha256sum >/dev/null; then
-    sha256sum | awk '{print $1}'
-    return
-  fi
-  if command -v shasum >/dev/null; then
-    shasum -a 256 | awk '{print $1}'
-    return
-  fi
-  echo "sha256sum or shasum not on PATH" >&2
-  return 127
-}
-
-findings_hash() {
-  awk '/^## Findings/{f=1;next} /^## /{f=0} f' "$1" \
-    | sed -E 's/[0-9]+//g; s/[[:space:]]+/ /g' | sort -u | hash_stdin
-}
-
-status="unknown"; prior=""; N=0
-
-for ((N=1; N<=MAX; N++)); do
-  log "pass $N/$MAX — codex"
-  CODEX_LOG=".codex/logs/$SLUG-r$N-codex.log"
-
-  if (( N == 1 )); then
-    PROMPT=$(cat <<EOF
-You are implementing against an approved spec.
-
-Spec: $SPEC
-Track: $TRACK
-Pass: $N
-
-Tasks:
-1. Read the spec.
-2. Implement the smallest working change that satisfies the acceptance criteria.
-3. Run relevant tests/linters/type checks for the languages touched.
-4. Append a markdown section to $TRACK titled "## Pass $N — implement" with:
-   - changed files
-   - key design decisions AND the tradeoff you weighed for each (one line each)
-   - verification commands run and outcomes
-   - residual risk or blockers
-
-Constraints:
-- Do not commit.
-- Do not edit the spec.
-- Do not revert unrelated dirty files.
-EOF
-)
-  else
-    PREV=".codex/reviews/$SLUG-r$((N-1)).md"
-    PROMPT=$(cat <<EOF
-Fix only the findings in the review. Do not refactor unrelated code.
-
-Spec: $SPEC
-Track: $TRACK
-Review: $PREV
-Pass: $N
-
-Tasks:
-1. Read the review file.
-2. Fix each finding. If a finding is wrong, explain why in the track instead of silently ignoring.
-3. Re-run relevant tests/linters.
-4. Append "## Pass $N — fix" to $TRACK with per-finding outcomes and the principle behind each fix.
-EOF
-)
-  fi
-
-  printf '%s' "$PROMPT" | run_codex "$CODEX_LOG" || { status="codex-error"; break; }
-
-  log "pass $N — claude review"
-  REVIEW=".codex/reviews/$SLUG-r$N.md"
-  CLAUDE_LOG=".codex/logs/$SLUG-r$N-claude.log"
-  PRIORS=$(list_reviews)
-
-  PROMPT=$(cat <<EOF
-You are reviewing a Codex implementation. Be a senior reviewer, not a linter.
-
-Spec: $SPEC
-Track: $TRACK
-Base: $BASE
-Pass: $N
-Prior reviews:
-$PRIORS
-Output path: $REVIEW
-
-Steps:
-1. Read the spec and the track.
-2. Run: git diff $BASE...HEAD
-3. Read all prior review files (if any) so you do not repeat resolved findings or contradict yourself.
-4. Write the review to $REVIEW using this exact format:
-
-# Claude review $N
-
-Verdict: <ACCEPT | REJECT | UNCLEAR>
-
-## Findings
-
-1. [severity] <file:line> — <symptom>. Root cause: <why this happened>. Principle: <what design/correctness principle it violates>.
-
-## Missing tests
-
-- <gap, or None>
-
-## Fix instructions
-
-1. <standalone instruction Codex can act on without your context>
-
-## Notes
-
-- <scope, disputes, lessons surfaced, or None>
-
-Rules:
-- The line "Verdict: ACCEPT" or "Verdict: REJECT" or "Verdict: UNCLEAR" must appear verbatim.
-- For ACCEPT: "## Findings" body is "None" and "## Fix instructions" body is "None".
-- Findings must explain WHY, not just WHAT. If you cannot articulate the principle, the finding is too shallow — drop it or sharpen it.
-- Rubric: acceptance criteria, bugs, edge cases, missing tests, scope creep, security/perf/compat/migration risk.
-EOF
-)
-
-  printf '%s' "$PROMPT" | run_claude "$CLAUDE_LOG" || { status="claude-error"; break; }
-  [[ -f "$REVIEW" ]] || { status="review-missing"; break; }
-
-  V=$(grep -m1 -oE '^Verdict:[[:space:]]+(ACCEPT|REJECT|UNCLEAR)' "$REVIEW" | awk '{print $2}' || true)
-  log "pass $N verdict: ${V:-MISSING}"
-
-  case "$V" in
-    ACCEPT)  status="accepted"; break ;;
-    UNCLEAR) status="unclear"; break ;;
-    REJECT)
-      h=$(findings_hash "$REVIEW")
-      [[ -n "$prior" && "$h" == "$prior" ]] && { status="stalled"; break; }
-      prior="$h"
-      ;;
-    *) status="no-verdict"; break ;;
-  esac
-done
-
-[[ "$status" == "unknown" ]] && status="max-turns"
-
-log "synthesizing report"
-PRIORS=$(list_reviews)
-
-if [[ "$REPORT_FORMAT" == "html" ]]; then
-  REPORT_INSTRUCTIONS=$(cat <<EOF
-Write the report to $REPORT as valid standalone HTML. Include a concise <title>, semantic sections, and minimal embedded CSS for readable typography. Do not wrap the HTML in a markdown code fence. Be concrete, no filler, no recap of what the reader can see in the diff.
-
-Use this content structure, with these visible section headings and no others:
-
-<h1>$SLUG — devloop report</h1>
-
-Opening result line:
-<strong>Result:</strong> $status in $N pass(es). <one-sentence headline of what shipped and the single most important thing learned.>
-
-<section>
-<h2>The shape of the problem</h2>
-<p>2-4 sentences: what the spec actually asked for, the real constraint behind it, and which alternative designs were ruled out and why. If the track or reviews surfaced a hidden assumption, name it.</p>
-</section>
-
-<section>
-<h2>What was built</h2>
-<ul><li>3-6 bullets describing the implementation at the level of design choices, not file lists. For each non-trivial choice, name the tradeoff that was weighed. The reader should be able to defend each choice in code review.</li></ul>
-</section>
-
-<section>
-<h2>What the review caught (and why it mattered)</h2>
-<p>For each unique finding across all review passes — even resolved ones — write one paragraph: the symptom, the root cause, and the principle. Group recurring themes. If a class of bug appeared twice, call that out as a pattern to internalize. If nothing was caught, say so and speculate on why: was the spec tight, was the change small, did the reviewer miss something.</p>
-</section>
-
-<section>
-<h2>What to remember next time</h2>
-<ul><li>3-5 sharp, transferable lessons. Each lesson must be actionable in a future task, not specific to this slug. Frame as "When X, prefer Y because Z." If there is nothing transferable, write a single honest line saying so.</li></ul>
-</section>
-
-<section>
-<h2>Residual risk</h2>
-<p>Concrete remaining risks, or "None known". Be specific: "untested on empty input" beats "edge cases".</p>
-</section>
-
-<section>
-<h2>Pointers</h2>
-<ul>
-<li>Spec: $SPEC</li>
-<li>Track: $TRACK</li>
-<li>Reviews: include the review files listed in the Inputs block above.</li>
-</ul>
-</section>
-EOF
-)
-else
-  REPORT_INSTRUCTIONS=$(cat <<EOF
-Write the report to $REPORT in this markdown structure. Be concrete, no filler, no recap of what the reader can see in the diff:
-
-# $SLUG — devloop report
-
-**Result:** $status in $N pass(es). <one-sentence headline of what shipped and the single most important thing learned.>
-
-## The shape of the problem
-<2-4 sentences: what the spec actually asked for, the real constraint behind it, and which alternative designs were ruled out and why. If the track or reviews surfaced a hidden assumption, name it.>
-
-## What was built
-<3-6 bullets describing the implementation at the level of design choices, not file lists. For each non-trivial choice, name the tradeoff that was weighed. The reader should be able to defend each choice in code review.>
-
-## What the review caught (and why it mattered)
-<For each unique finding across all review passes — even resolved ones — write one paragraph: the symptom, the root cause, and the principle. Group recurring themes. If a class of bug appeared twice, call that out as a pattern to internalize. If nothing was caught, say so and speculate on why (was the spec tight, was the change small, did the reviewer miss something).>
-
-## What to remember next time
-<3-5 sharp, transferable lessons. Each lesson must be actionable in a future task, not specific to this slug. Frame as "When X, prefer Y because Z." If there is nothing transferable, write a single line saying so honestly.>
-
-## Residual risk
-<Concrete remaining risks, or "None known". Be specific — "untested on empty input" beats "edge cases".>
-
-## Pointers
-- Spec: $SPEC
-- Track: $TRACK
-- Reviews: $PRIORS
-EOF
-)
-fi
-
-SYNTH_PROMPT=$(cat <<EOF
-You are writing a learning-oriented post-mortem for a developer who just ran a Codex/Claude devloop.
-This is NOT an audit log. It is a teaching artifact. The reader should come away understanding WHY
-each decision was made and what to internalize for next time.
-
-Inputs:
-- spec: $SPEC
-- track: $TRACK
-Review files:
-$PRIORS
-- final status: $status
-- passes used: $N / $MAX
-- base: $BASE, branch: $BRANCH
-
-Run: git diff --stat $BASE...HEAD   (for context only; do not paste the full diff)
-
-$REPORT_INSTRUCTIONS
-
-Style:
-- Terse, dense, no hedging.
-- No headers beyond the ones above.
-- No emoji.
-- Optimize for a developer who will read this once, six weeks from now, and needs to extract the lesson in 90 seconds.
-EOF
-)
-
-printf '%s' "$SYNTH_PROMPT" | run_claude ".codex/logs/$SLUG-report.log" \
-  || log "report synthesis failed; see .codex/logs/$SLUG-report.log"
-
-echo
-echo "result:  $status"
-echo "passes:  $N / $MAX"
-echo "report:  $REPORT"
-echo "track:   $TRACK"
-
-case "$status" in
-  accepted) exit 0 ;;
-  stalled|max-turns|unclear) exit 1 ;;
-  *) exit 2 ;;
-esac
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..e2061ac
--- /dev/null
+++ b/package.json
@@ -0,0 +1,19 @@
+{
+  "name": "devloop",
+  "version": "0.1.0",
+  "type": "module",
+  "bin": {
+    "devloop": "./src/cli.ts"
+  },
+  "scripts": {
+    "test": "bun test",
+    "typecheck": "tsc --noEmit"
+  },
+  "dependencies": {
+    "@opentui/core": "^0.2.15"
+  },
+  "devDependencies": {
+    "@types/bun": "^1.3.1",
+    "typescript": "^5.9.3"
+  }
+}
diff --git a/src/cli.ts b/src/cli.ts
new file mode 100755
index 0000000..4dfe73b
--- /dev/null
+++ b/src/cli.ts
@@ -0,0 +1,46 @@
+#!/usr/bin/env bun
+import { parseArgs, runDevloop, type Event, type Sink } from "./devloop.ts";
+import { createTuiSink } from "./tui.ts";
+
+const argv = process.argv.slice(2);
+const parsed = parseArgs(argv);
+
+if (typeof parsed === "string") {
+  console.error(parsed);
+  process.exit(argv.includes("-h") || argv.includes("--help") ? 0 : 2);
+}
+
+const useTui = argv.includes("--tui") || (!argv.includes("--plain") && Boolean(process.stdout.isTTY));
+const sink = useTui ? await createTuiSink() : plainSink();
+
+try {
+  const result = await runDevloop(parsed, sink);
+  await sink.close?.();
+  if (useTui) printResult(result);
+  process.exit(result.status === "accepted" ? 0 : result.status === "stalled" || result.status === "max-turns" || result.status === "unclear" ? 1 : 2);
+} catch (error) {
+  await sink.close?.();
+  console.error(error instanceof Error ? error.message : String(error));
+  process.exit(2);
+}
+
+function plainSink(): Sink {
+  return {
+    event(event: Event) {
+      if (event.type === "step") console.error(`[devloop] ${event.title}`);
+      else if (event.type === "done") console.error(`[devloop] ${event.detail}`);
+      else if (event.type === "gate") console.error(`[devloop] ${event.name}: ${event.detail}`);
+      else if (event.type === "result") printResult(event.result);
+    },
+  };
+}
+
+function printResult(result: { status: string; passes: number; max: number; report: string; track: string }) {
+  console.log("");
+  console.log(`result:  ${result.status}`);
+  console.log(`passes:  ${result.passes} / ${result.max}`);
+  if ("branch" in result) console.log(`branch:  ${result.branch}`);
+  if ("commit" in result) console.log(`commit:  ${result.commit || "none"}`);
+  console.log(`report:  ${result.report}`);
+  console.log(`track:   ${result.track}`);
+}
diff --git a/src/devloop.ts b/src/devloop.ts
new file mode 100644
index 0000000..f5b952f
--- /dev/null
+++ b/src/devloop.ts
@@ -0,0 +1,452 @@
+import { createHash, randomUUID } from "node:crypto";
+import { mkdir, readFile, realpath, stat, writeFile } from "node:fs/promises";
+import path from "node:path";
+
+export type ReportFormat = "html" | "markdown";
+export type Verdict = "ACCEPT" | "REJECT" | "UNCLEAR";
+export type Status =
+  | "accepted"
+  | "stalled"
+  | "max-turns"
+  | "unclear"
+  | "no-verdict"
+  | "codex-error"
+  | "claude-error"
+  | "review-missing"
+  | "commit-error";
+
+export type Options = {
+  spec: string;
+  max: number;
+  reportFormat: ReportFormat;
+  strict: boolean;
+  cwd: string;
+};
+
+export type Result = {
+  status: Status;
+  passes: number;
+  max: number;
+  report: string;
+  track: string;
+  branch: string;
+  commit: string;
+  commitMessage: string;
+  codexSessionId: string;
+  claudeSessionId: string;
+};
+
+export type Event =
+  | { type: "gate"; name: string; ok: boolean; detail: string }
+  | { type: "step"; id: string; title: string }
+  | { type: "log"; id: string; line: string }
+  | { type: "done"; id: string; ok: boolean; detail: string }
+  | { type: "result"; result: Result };
+
+export type Sink = { event(event: Event): void | Promise<void>; close?(): void | Promise<void> };
+
+type RunResult = { code: number; output: string };
+type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise<RunResult>;
+
+export function parseArgs(argv: string[], cwd = process.cwd()): Options | string {
+  let reportFormat: ReportFormat = "html";
+  let strict = true;
+  let spec = "";
+  let maxRaw = "5";
+  let maxSet = false;
+
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i]!;
+    if (arg === "--report-format") {
+      const value = argv[++i];
+      if (value !== "html" && value !== "markdown" && value !== "md") return usage();
+      reportFormat = value === "md" ? "markdown" : value;
+    } else if (arg === "--html") reportFormat = "html";
+    else if (arg === "--markdown" || arg === "--md") reportFormat = "markdown";
+    else if (arg === "--no-strict") strict = false;
+    else if (arg === "--strict") strict = true;
+    else if (arg === "--plain" || arg === "--tui") continue;
+    else if (arg === "-h" || arg === "--help") return usage();
+    else if (arg.startsWith("--")) return `unknown option: ${arg}\n${usage()}`;
+    else if (!spec) spec = arg;
+    else if (!maxSet) {
+      maxRaw = arg;
+      maxSet = true;
+    } else return usage();
+  }
+
+  if (!spec) return usage();
+  if (!/^[+-]?\d+$/.test(maxRaw)) return "max must be an integer between 1 and 10";
+  return { spec, max: clamp(Number.parseInt(maxRaw, 10), 1, 10), reportFormat, strict, cwd };
+}
+
+export function usage() {
+  return "usage: devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] <spec.md> [max=5]";
+}
+
+export function parseCriteria(markdown: string): string[] {
+  const lines = markdown.split(/\r?\n/);
+  const start = lines.findIndex((line) => /^##\s+acceptance criteria\s*$/i.test(line.trim()));
+  if (start < 0) return [];
+  const body = lines.slice(start + 1);
+  const end = body.findIndex((line) => /^##\s+/.test(line));
+  return body
+    .slice(0, end < 0 ? body.length : end)
+    .map((line) => line.trim().replace(/^([-*]|\d+[.)])\s+/, ""))
+    .filter(Boolean);
+}
+
+export function parseVerdict(review: string): Verdict | "" {
+  return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as Verdict | "";
+}
+
+export function hasPassingMatrix(review: string, count: number) {
+  if (!/^## Acceptance matrix\s*$/m.test(review)) return false;
+  return Array.from({ length: count }, (_, i) => new RegExp(`^-\\s*AC${i + 1}:\\s*PASS\\b`, "mi")).every((r) =>
+    r.test(review),
+  );
+}
+
+export function findingsHash(review: string) {
+  const body = review.match(/^## Findings\s*\n([\s\S]*?)(?:\n##\s+|$)/m)?.[1] ?? "";
+  const normalized = body
+    .replace(/\d+/g, "")
+    .replace(/[ \t\r\n]+/g, " ")
+    .split(".")
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .sort()
+    .join("\n");
+  return createHash("sha256").update(normalized).digest("hex");
+}
+
+export async function runDevloop(options: Options, sink: Sink = { event: () => {} }): Promise<Result> {
+  const spec = await absoluteFile(options.spec, options.cwd);
+  const specText = await readFile(spec, "utf8");
+  const criteria = parseCriteria(specText);
+  if (options.strict && criteria.length === 0) throw new Error("strict mode requires ## Acceptance criteria");
+  await sink.event({ type: "gate", name: "acceptance criteria", ok: criteria.length > 0, detail: `${criteria.length} found` });
+
+  const repo = (await command("git", ["-C", options.cwd, "rev-parse", "--show-toplevel"])).trim();
+  const branch = (await command("git", ["-C", repo, "rev-parse", "--abbrev-ref", "HEAD"])).trim();
+  const base = await baseBranch(repo);
+  const initialDirty = await statusPaths(repo);
+  const slug = path.basename(spec, ".md");
+  const dirs = [".codex/tracks", ".codex/reviews", ".codex/reports", ".codex/logs", ".codex/sessions"];
+  await Promise.all(dirs.map((dir) => mkdir(path.join(repo, dir), { recursive: true })));
+
+  const track = `.codex/tracks/${slug}.md`;
+  const report = `.codex/reports/${slug}.${options.reportFormat === "html" ? "html" : "md"}`;
+  const codexSession = `.codex/sessions/${slug}-codex.id`;
+  const claudeSession = `.codex/sessions/${slug}-claude.id`;
+  const runner = makeRunner(repo, sink);
+  await initTrack(path.join(repo, track), { spec, cwd: options.cwd, base, branch, max: options.max, reportFormat: options.reportFormat, strict: options.strict });
+
+  let status: Status = "max-turns";
+  let prior = "";
+  let pass = 0;
+  let commit = "";
+  let commitMessage = "";
+  let finalBranch = branch;
+
+  for (pass = 1; pass <= options.max; pass++) {
+    const codexLog = `.codex/logs/${slug}-r${pass}-codex.log`;
+    const codexId = `codex-${pass}`;
+    await sink.event({ type: "step", id: codexId, title: `pass ${pass}/${options.max} codex` });
+    const codex = await runCodex(runner, repo, path.join(repo, codexSession), path.join(repo, codexLog), codexPrompt({ spec, track, pass, strict: options.strict, previous: `.codex/reviews/${slug}-r${pass - 1}.md`, criteria }));
+    await sink.event({ type: "done", id: codexId, ok: codex, detail: codex ? "completed" : "failed" });
+    if (!codex) {
+      status = "codex-error";
+      break;
+    }
+
+    const review = `.codex/reviews/${slug}-r${pass}.md`;
+    const claudeLog = `.codex/logs/${slug}-r${pass}-claude.log`;
+    const claudeId = `claude-${pass}`;
+    await sink.event({ type: "step", id: claudeId, title: `pass ${pass}/${options.max} claude review` });
+    const ok = await runClaude(runner, repo, path.join(repo, claudeSession), path.join(repo, claudeLog), reviewPrompt({ spec, track, base, pass, output: review, priors: listReviews(slug, pass, options.max), criteria, strict: options.strict }));
+    await sink.event({ type: "done", id: claudeId, ok, detail: ok ? "completed" : "failed" });
+    if (!ok) {
+      status = "claude-error";
+      break;
+    }
+
+    let reviewText = "";
+    try {
+      reviewText = await readFile(path.join(repo, review), "utf8");
+    } catch {
+      status = "review-missing";
+      break;
+    }
+    const verdict = parseVerdict(reviewText);
+    await sink.event({ type: "gate", name: `pass ${pass} verdict`, ok: verdict === "ACCEPT", detail: verdict || "MISSING" });
+    if (verdict === "ACCEPT") {
+      status = options.strict && !hasPassingMatrix(reviewText, criteria.length) ? "unclear" : "accepted";
+      break;
+    }
+    if (verdict === "UNCLEAR") {
+      status = "unclear";
+      break;
+    }
+    if (verdict === "REJECT") {
+      const hash = findingsHash(reviewText);
+      if (prior && hash === prior) {
+        status = "stalled";
+        break;
+      }
+      prior = hash;
+    } else {
+      status = "no-verdict";
+      break;
+    }
+  }
+
+  if (pass > options.max) pass = options.max;
+  if (status === "accepted") {
+    const commitId = "commit";
+    await sink.event({ type: "step", id: commitId, title: "local branch and commit" });
+    const committed = await commitAccepted(repo, slug, initialDirty).catch(() => undefined);
+    if (committed) {
+      finalBranch = committed.branch;
+      commit = committed.commit;
+      commitMessage = committed.message;
+      await sink.event({ type: "done", id: commitId, ok: true, detail: commit ? `${finalBranch} ${commit}` : `${finalBranch} no changes` });
+    } else {
+      status = "commit-error";
+      await sink.event({ type: "done", id: commitId, ok: false, detail: "failed" });
+    }
+  }
+
+  const codexSessionId = await readLine(path.join(repo, codexSession));
+  const claudeSessionId = await readLine(path.join(repo, claudeSession));
+  await synthesizeReport(runner, repo, { slug, spec, track, report, status, pass, max: options.max, base, initialBranch: branch, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId, format: options.reportFormat, reviews: listReviews(slug, pass, options.max) });
+  const result = { status, passes: pass, max: options.max, report, track, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId };
+  await sink.event({ type: "result", result });
+  return result;
+}
+
+async function absoluteFile(file: string, cwd: string) {
+  const full = path.resolve(cwd, file);
+  if (!(await stat(full).catch(() => false))) throw new Error(usage());
+  return realpath(full);
+}
+
+async function command(cmd: string, args: string[]) {
+  const proc = Bun.spawn([cmd, ...args], { stdout: "pipe", stderr: "pipe" });
+  const [out, err, code] = await Promise.all([new Response(proc.stdout).text(), new Response(proc.stderr).text(), proc.exited]);
+  if (code !== 0) throw new Error(err.trim() || `${cmd} failed`);
+  return out;
+}
+
+async function baseBranch(repo: string) {
+  for (const args of [
+    ["-C", repo, "symbolic-ref", "--short", "refs/remotes/origin/HEAD"],
+    ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/main"],
+    ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/master"],
+  ]) {
+    const proc = Bun.spawn(["git", ...args], { stdout: "pipe", stderr: "pipe" });
+    if ((await proc.exited) === 0) {
+      if (args[2] === "symbolic-ref") return (await new Response(proc.stdout).text()).trim().replace(/^origin\//, "");
+      return args.at(-1)!.split("/").pop()!;
+    }
+  }
+  return "main";
+}
+
+async function statusPaths(repo: string) {
+  const out = await command("git", ["-C", repo, "status", "--porcelain=v1", "-z", "--untracked-files=all"]);
+  const parts = out.split("\0").filter(Boolean);
+  const paths = new Set<string>();
+  for (let i = 0; i < parts.length; i++) {
+    const item = parts[i]!;
+    const code = item.slice(0, 2);
+    const file = item.slice(3);
+    if (file) paths.add(file);
+    if (code.includes("R") || code.includes("C")) {
+      const next = parts[++i];
+      if (next) paths.add(next);
+    }
+  }
+  return paths;
+}
+
+async function commitAccepted(repo: string, slug: string, initialDirty: Set<string>) {
+  const current = (await command("git", ["-C", repo, "branch", "--show-current"])).trim();
+  const branch = await nextBranch(repo, slug, current);
+  const message = `feat: ${slugify(slug)}`;
+  if (branch !== current) await command("git", ["-C", repo, "switch", "-c", branch]);
+  const changed = [...(await statusPaths(repo))].filter((file) => !initialDirty.has(file) && !file.startsWith(".codex/"));
+  if (changed.length === 0) return { branch, commit: "", message };
+  await command("git", ["-C", repo, "add", "--", ...changed]);
+  await command("git", ["-C", repo, "commit", "--only", "-m", message, "--", ...changed]);
+  return { branch, commit: (await command("git", ["-C", repo, "rev-parse", "--short", "HEAD"])).trim(), message };
+}
+
+async function nextBranch(repo: string, slug: string, current: string) {
+  const base = `devloop/${slugify(slug)}`;
+  if (current === base || new RegExp(`^${escapeRegex(base)}-\\d+$`).test(current)) return current;
+  let suffix = 1;
+  let branch = base;
+  while (await branchExists(repo, branch)) {
+    suffix++;
+    branch = `${base}-${suffix}`;
+  }
+  return branch;
+}
+
+async function branchExists(repo: string, branch: string) {
+  const proc = Bun.spawn(["git", "-C", repo, "show-ref", "--verify", "--quiet", `refs/heads/${branch}`]);
+  return (await proc.exited) === 0;
+}
+
+function makeRunner(cwd: string, sink: Sink): Runner {
+  return async (cmd, args, input = "", log, id) => {
+    let proc: Bun.Subprocess<"pipe", "pipe", "pipe">;
+    try {
+      proc = Bun.spawn([cmd, ...args], { cwd, stdin: "pipe", stdout: "pipe", stderr: "pipe", env: Bun.env });
+    } catch (error) {
+      const output = error instanceof Error ? error.message : String(error);
+      if (log) await writeFile(log, output);
+      return { code: 127, output };
+    }
+    proc.stdin.write(input);
+    proc.stdin.end();
+    let output = "";
+    const pump = async (stream: ReadableStream<Uint8Array>) => {
+      const reader = stream.getReader();
+      const decoder = new TextDecoder();
+      let pending = "";
+      for (;;) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        const text = decoder.decode(value);
+        output += text;
+        pending += text;
+        const lines = pending.split(/\r?\n/);
+        pending = lines.pop() ?? "";
+        if (id) for (const line of lines.filter(Boolean)) await sink.event({ type: "log", id, line });
+      }
+      if (id && pending) await sink.event({ type: "log", id, line: pending });
+    };
+    const [, , code] = await Promise.all([pump(proc.stdout), pump(proc.stderr), proc.exited]);
+    if (log) await writeFile(log, output);
+    return { code, output };
+  };
+}
+
+async function initTrack(file: string, data: { spec: string; cwd: string; base: string; branch: string; max: number; reportFormat: ReportFormat; strict: boolean }) {
+  if (await stat(file).catch(() => false)) return;
+  await writeFile(
+    file,
+    `# Track: ${path.basename(file, ".md")}\n\n- spec: ${data.spec}\n- cwd: ${data.cwd}\n- base: ${data.base}\n- branch: ${data.branch}\n- max: ${data.max}\n- report-format: ${data.reportFormat}\n- strict: ${data.strict}\n- started: ${new Date().toISOString()}\n\n`,
+  );
+}
+
+async function readLine(file: string) {
+  return (await readFile(file, "utf8").catch(() => "")).split(/\r?\n/, 1)[0] ?? "";
+}
+
+async function writeLine(file: string, value: string) {
+  await writeFile(file, `${value}\n`);
+}
+
+async function runCodex(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) {
+  const session = await readLine(sessionFile);
+  const args = session
+    ? ["exec", "resume", "--dangerously-bypass-approvals-and-sandbox", session, "-"]
+    : ["exec", "--dangerously-bypass-approvals-and-sandbox", "-C", repo, "-"];
+  const result = await runner("codex", args, prompt, log, log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex");
+  if (result.code !== 0) return false;
+  if (!session) {
+    const next = extractSessionId(result.output);
+    if (!next) return false;
+    await writeLine(sessionFile, next);
+  }
+  return true;
+}
+
+async function runClaude(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) {
+  const session = await readLine(sessionFile);
+  const next = session || randomUUID();
+  const args = session
+    ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo]
+    : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo];
+  const result = await runner("claude", args, prompt, log, log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report");
+  if (result.code !== 0) return false;
+  if (!session) await writeLine(sessionFile, next);
+  return true;
+}
+
+function extractSessionId(output: string) {
+  return output
+    .split(/\r?\n/)
+    .filter((line) => /(session.?id|thread_id|codex exec resume|codex resume|To continue this session)/i.test(line))
+    .join("\n")
+    .match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i)?.[0]
+    .toLowerCase();
+}
+
+function listReviews(slug: string, upto: number, max: number) {
+  return Array.from({ length: Math.min(upto, max) }, (_, i) => `- .codex/reviews/${slug}-r${i + 1}.md`).join("\n");
+}
+
+function criteriaBlock(criteria: string[]) {
+  return criteria.map((criterion, i) => `AC${i + 1}: ${criterion}`).join("\n") || "No parsed acceptance criteria.";
+}
+
+function codexPrompt(input: { spec: string; track: string; pass: number; strict: boolean; previous: string; criteria: string[] }) {
+  const strict = input.strict
+    ? "\nStrict lifecycle:\n1. Add or update regression tests before implementation.\n2. Run the narrow test first and record the failing result, unless impossible; if impossible, say why.\n3. Implement the smallest change.\n4. Run targeted tests, full tests, lint/typecheck, and coverage. Coverage must be 100% when the project exposes coverage tooling.\n"
+    : "";
+  return input.pass === 1
+    ? `You are implementing against an approved spec.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nPass: ${input.pass}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}${strict}\nTasks:\n1. Read the spec.\n2. Implement the smallest working change satisfying the acceptance criteria.\n3. Append "## Pass ${input.pass} - implement" to ${input.track} with changed files, design tradeoffs, verification, and residual risk.\n\nConstraints:\n- Do not commit.\n- Do not edit the spec.\n- Do not revert unrelated dirty files.\n`
+    : `Fix only the findings in the review. Do not refactor unrelated code.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nReview: ${input.previous}\nPass: ${input.pass}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}${strict}\nTasks:\n1. Read the review file.\n2. Fix each finding or explain why it is wrong in the track.\n3. Re-run relevant tests.\n4. Append "## Pass ${input.pass} - fix" to ${input.track} with per-finding outcomes.\n`;
+}
+
+function reviewPrompt(input: { spec: string; track: string; base: string; pass: number; output: string; priors: string; criteria: string[]; strict: boolean }) {
+  return `You are reviewing a Codex implementation. Be a senior reviewer, not a linter.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nBase: ${input.base}\nPass: ${input.pass}\nPrior reviews:\n${input.priors}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}\nOutput path: ${input.output}\n\nSteps:\n1. Read the spec and track.\n2. Run: git diff ${input.base}...HEAD\n3. Read prior reviews so you do not repeat resolved findings.\n4. Write the review to ${input.output} using this exact format:\n\n# Claude review ${input.pass}\n\nVerdict: <ACCEPT | REJECT | UNCLEAR>\n\n## Acceptance matrix\n\n- AC1: <PASS | FAIL | UNCLEAR> - <evidence>\n\n## Findings\n\n1. [severity] <file:line> - <symptom>. Root cause: <why>. Principle: <principle>.\n\n## Missing tests\n\n- <gap, or None>\n\n## Fix instructions\n\n1. <standalone instruction>\n\n## Notes\n\n- <scope, disputes, lessons, or None>\n\nRules:\n- The verdict line must appear verbatim.\n- ACCEPT requires every acceptance criterion PASS with concrete evidence.${input.strict ? "\n- ACCEPT also requires regression-test evidence, red/green evidence when behavior changed, passing full tests, and 100% coverage when coverage tooling exists." : ""}\n- For ACCEPT: Findings and Fix instructions bodies are "None".\n- Findings must explain WHY, not just WHAT.\n`;
+}
+
+async function synthesizeReport(runner: Runner, repo: string, input: { slug: string; spec: string; track: string; report: string; status: Status; pass: number; max: number; base: string; initialBranch: string; branch: string; commit: string; commitMessage: string; codexSessionId: string; claudeSessionId: string; format: ReportFormat; reviews: string }) {
+  const metadata = `Result: ${input.status}
+Passes: ${input.pass} / ${input.max}
+Repository: ${repo}
+Spec: ${input.spec}
+Base branch: ${input.base}
+Starting branch: ${input.initialBranch}
+Final branch: ${input.branch}
+Local commit: ${input.commit || "none"}
+Commit message: ${input.commitMessage || "none"}
+Codex session: ${input.codexSessionId || "unknown"}
+Claude session: ${input.claudeSessionId || "unknown"}
+Track: ${input.track}
+Reviews:
+${input.reviews}`;
+  const body =
+    input.format === "html"
+      ? `Write the report to ${input.report} as valid standalone HTML. Use a readable document layout with embedded CSS, a compact metadata table at the top, and substantive sections after it. Include these visible section headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.`
+      : `Write the report to ${input.report} in markdown with these headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.`;
+  const sessionFile = path.join(repo, `.codex/sessions/${input.slug}-claude.id`);
+  const session = await readLine(sessionFile);
+  const next = session || randomUUID();
+  await runner(
+    "claude",
+    session ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo] : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo],
+    `You are writing a learning-oriented post-mortem for a developer who just ran a Codex/Claude devloop.\n\nMetadata to render at the top exactly and visibly:\n${metadata}\n\nInputs:\n- spec: ${input.spec}\n- track: ${input.track}\nReview files:\n${input.reviews}\n- final status: ${input.status}\n- passes used: ${input.pass} / ${input.max}\n- base: ${input.base}, starting branch: ${input.initialBranch}, final branch: ${input.branch}, local commit: ${input.commit || "none"}\n\n${body}\n\nStyle:\n- Human readable, not ornamental.\n- Preserve useful substance over brevity.\n- Teach the why: symptom, root cause, principle, decision, tradeoff, and evidence.\n- No emoji.\n`,
+    path.join(repo, `.codex/logs/${input.slug}-report.log`),
+    "report",
+  );
+  if (!session) await writeLine(sessionFile, next);
+}
+
+function clamp(value: number, min: number, max: number) {
+  return Math.max(min, Math.min(max, value));
+}
+
+function slugify(value: string) {
+  return value.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "change";
+}
+
+function escapeRegex(value: string) {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
diff --git a/src/tui.ts b/src/tui.ts
new file mode 100644
index 0000000..0403fe2
--- /dev/null
+++ b/src/tui.ts
@@ -0,0 +1,68 @@
+import type { Event, Result, Sink } from "./devloop.ts";
+
+type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean };
+
+const LOGO = [
+  "   ▐▌▗▞▀▚▖▄   ▄ █  ▄▄▄   ▄▄▄  ▄▄▄▄  ",
+  "   ▐▌▐▛▀▀▘█   █ █ █   █ █   █ █   █ ",
+  "▗▞▀▜▌▝▚▄▄▖ ▀▄▀  █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ",
+  "▝▚▄▟▌           █             █     ",
+  "                              ▀",
+];
+
+export async function createTuiSink(): Promise<Sink> {
+  const { TextRenderable, createCliRenderer } = await import("@opentui/core");
+  const renderer = await createCliRenderer({ exitOnCtrlC: true, consoleMode: "disabled", screenMode: "alternate-screen" });
+  const text = new TextRenderable(renderer, { id: "devloop", width: "100%", height: "100%", content: "" });
+  const rows: Row[] = [];
+  let selected = 0;
+  let result: Result | undefined;
+
+  renderer.root.add(text);
+  renderer.keyInput.on("keypress", (key) => {
+    if (key.name === "up" || key.name === "k") selected = Math.max(0, selected - 1);
+    else if (key.name === "down" || key.name === "j") selected = Math.min(rows.length - 1, selected + 1);
+    else if (rows.length && (key.name === "return" || key.name === "space")) rows[selected]!.open = !rows[selected]!.open;
+    render();
+  });
+
+  function render() {
+    text.content = view(rows, selected, result);
+    renderer.requestRender();
+  }
+
+  render();
+  return {
+    event(event: Event) {
+      if (event.type === "step") rows.push({ id: event.id, title: event.title, status: "run", detail: "running", lines: [], open: false });
+      else if (event.type === "log") row(rows, event.id).lines.push(event.line);
+      else if (event.type === "done") Object.assign(row(rows, event.id), { status: event.ok ? "ok" : "fail", detail: event.detail });
+      else if (event.type === "gate") rows.push({ id: event.name, title: event.name, status: event.ok ? "ok" : "fail", detail: event.detail, lines: [], open: false });
+      else result = event.result;
+      selected = Math.min(selected, Math.max(0, rows.length - 1));
+      render();
+    },
+    close() {
+      renderer.destroy();
+    },
+  };
+}
+
+export function view(rows: Row[], selected: number, result?: Result) {
+  const body = rows.flatMap((item, i) => {
+    const mark = i === selected ? ">" : " ";
+    const fold = item.lines.length ? (item.open ? "[-]" : "[+]") : "   ";
+    const head = `${mark} ${icon(item.status)} ${fold} ${item.title} - ${item.detail}`;
+    return item.open ? [head, ...item.lines.slice(-80).map((line) => `      ${line}`)] : [head];
+  });
+  const tail = result ? ["", `result:  ${result.status}`, `passes:  ${result.passes} / ${result.max}`, `branch:  ${result.branch}`, `commit:  ${result.commit || "none"}`, `report:  ${result.report}`, `track:   ${result.track}`] : ["", "enter toggles logs, j/k moves"];
+  return [...LOGO, "", ...body, ...tail].join("\n");
+}
+
+function row(rows: Row[], id: string) {
+  return rows.find((item) => item.id === id) ?? rows[rows.push({ id, title: id, status: "run", detail: "running", lines: [], open: false }) - 1]!;
+}
+
+function icon(status: Row["status"]) {
+  return status === "ok" ? "ok" : status === "fail" ? "!!" : "..";
+}
diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts
new file mode 100644
index 0000000..b1bd610
--- /dev/null
+++ b/tests/devloop.test.ts
@@ -0,0 +1,329 @@
+import { afterAll, beforeEach, describe, expect, test } from "bun:test";
+import { mkdtemp, readFile, realpath, rm, stat, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import path from "node:path";
+import { parseArgs, parseCriteria, parseVerdict, runDevloop, type Event, type Options } from "../src/devloop.ts";
+
+const root = await mkdtemp(path.join(tmpdir(), "devloop-test."));
+let oldPath = process.env.PATH ?? "";
+
+afterAll(async () => rm(root, { recursive: true, force: true }));
+beforeEach(() => {
+  oldPath = process.env.PATH ?? "";
+  delete process.env.DEVLOOP_TEST_VERDICTS;
+  delete process.env.DEVLOOP_TEST_STATE;
+  delete process.env.DEVLOOP_TEST_NO_MATRIX;
+  delete process.env.DEVLOOP_TEST_NO_REVIEW;
+  delete process.env.DEVLOOP_TEST_NO_VERDICT;
+  delete process.env.DEVLOOP_TEST_FAIL_CODEX;
+  delete process.env.DEVLOOP_TEST_FAIL_CLAUDE;
+});
+
+describe("parsing", () => {
+  test("parses options tightly", () => {
+    expect(parseArgs(["--no-strict", "--report-format", "md", "spec.md", "08"], "/x")).toEqual({
+      spec: "spec.md",
+      max: 8,
+      reportFormat: "markdown",
+      strict: false,
+      cwd: "/x",
+    } satisfies Options);
+    expect(parseArgs(["spec.md", "0"], "/x")).toMatchObject({ max: 1 });
+    expect(parseArgs(["spec.md", "99"], "/x")).toMatchObject({ max: 10 });
+    expect(parseArgs(["--wat"], "/x")).toContain("unknown option");
+    expect(parseArgs([], "/x")).toContain("usage:");
+    expect(parseArgs(["spec.md", "nope"], "/x")).toBe("max must be an integer between 1 and 10");
+  });
+
+  test("extracts acceptance criteria", () => {
+    expect(parseCriteria("# Spec\n\n## Acceptance criteria\n1. One\n- Two\n\n## Notes\nNope")).toEqual(["One", "Two"]);
+    expect(parseCriteria("# Spec")).toEqual([]);
+    expect(parseVerdict("Verdict: ACCEPT\n")).toBe("ACCEPT");
+  });
+});
+
+describe("loop", () => {
+  test("accepts and writes core artifacts", async () => {
+    const { repo, state } = await fixture("accept");
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    const { result, events } = await run(repo);
+
+    expect(result.status).toBe("accepted");
+    expect(result.passes).toBe(1);
+    expect(result.branch).toBe("devloop/change");
+    expect(result.commit).toMatch(/^[0-9a-f]+$/);
+    expect(result.commitMessage).toBe("feat: change");
+    await exists(path.join(repo, ".codex/tracks/change.md"));
+    await exists(path.join(repo, ".codex/reviews/change-r1.md"));
+    await exists(path.join(repo, ".codex/reports/change.html"));
+    expect(await readFile(path.join(repo, ".codex/sessions/change-codex.id"), "utf8")).toContain("00000000-0000-4000-8000-000000000001");
+    expect(await readFile(path.join(repo, ".codex/tracks/change.md"), "utf8")).toContain("- strict: true");
+    expect(await readFile(path.join(repo, ".codex/reviews/change-r1.md"), "utf8")).toContain("- AC1: PASS");
+    expect(await readFile(path.join(state, "codex-args.log"), "utf8")).toContain(`exec --dangerously-bypass-approvals-and-sandbox -C ${repo} -`);
+    expect((await Bun.$`git -C ${repo} branch --show-current`.text()).trim()).toBe("devloop/change");
+    expect((await Bun.$`git -C ${repo} log -1 --format=%s`.text()).trim()).toBe("feat: change");
+    expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).toContain("feature.txt");
+    expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).not.toContain(".codex/");
+    const reportPrompt = await readFile(path.join(state, "claude-prompts.log"), "utf8");
+    expect(reportPrompt).toContain("Codex session: 00000000-0000-4000-8000-000000000001");
+    expect(reportPrompt).toContain("Final branch: devloop/change");
+    expect(reportPrompt).toContain(`Local commit: ${result.commit}`);
+    expect(reportPrompt).toContain("Commit message: feat: change");
+    expect(events.some((event) => event.type === "gate" && event.name === "acceptance criteria" && event.ok)).toBe(true);
+    expect(events).toContainEqual({ type: "log", id: "codex-1", line: "codex-tail" });
+  });
+
+  test("rejects then accepts with resumed sessions", async () => {
+    const { repo, state } = await fixture("reject-accept");
+    process.env.DEVLOOP_TEST_VERDICTS = "REJECT,ACCEPT";
+    const { result } = await run(repo, { max: 3 });
+
+    expect(result.status).toBe("accepted");
+    expect(result.passes).toBe(2);
+    expect(await readFile(path.join(repo, ".codex/reviews/change-r1.md"), "utf8")).toContain("Verdict: REJECT");
+    expect(await readFile(path.join(repo, ".codex/reviews/change-r2.md"), "utf8")).toContain("Verdict: ACCEPT");
+    expect(await readFile(path.join(state, "codex-args.log"), "utf8")).toContain("exec resume --dangerously-bypass-approvals-and-sandbox 00000000-0000-4000-8000-000000000001 -");
+  });
+
+  test("stalls on repeated reject findings", async () => {
+    const { repo } = await fixture("stall");
+    process.env.DEVLOOP_TEST_VERDICTS = "REJECT,REJECT";
+    const { result } = await run(repo, { max: 5 });
+
+    expect(result.status).toBe("stalled");
+    expect(result.passes).toBe(2);
+  });
+
+  test("supports markdown reports", async () => {
+    const { repo, state } = await fixture("markdown");
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    const { result } = await run(repo, { reportFormat: "markdown" });
+
+    expect(result.report).toBe(".codex/reports/change.md");
+    await exists(path.join(repo, ".codex/reports/change.md"));
+    expect(await exists(path.join(repo, ".codex/reports/change.html"), false)).toBe(false);
+    expect(await readFile(path.join(state, "claude-prompts.log"), "utf8")).toContain("in markdown");
+  });
+
+  test("skips files dirty before the run when committing", async () => {
+    const { repo } = await fixture("dirty-before");
+    await writeFile(path.join(repo, "dirty.txt"), "do not commit\n");
+    await writeFile(path.join(repo, "old.txt"), "old\n");
+    await Bun.$`git -C ${repo} add old.txt`.quiet();
+    await Bun.$`git -C ${repo} commit -q -m old`.quiet();
+    await Bun.$`git -C ${repo} mv old.txt renamed.txt`.quiet();
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    const { result } = await run(repo);
+
+    expect(result.status).toBe("accepted");
+    expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).toContain("feature.txt");
+    expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).not.toContain("dirty.txt");
+    expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).not.toContain("renamed.txt");
+    expect(await Bun.$`git -C ${repo} status --short -- dirty.txt`.text()).toContain("?? dirty.txt");
+    expect(await Bun.$`git -C ${repo} status --short -- renamed.txt`.text()).toContain("renamed.txt");
+  });
+
+  test("reports commit errors", async () => {
+    const { repo } = await fixture("commit-error");
+    await writeFile(path.join(repo, ".git/hooks/pre-commit"), "#!/usr/bin/env bash\nexit 1\n", { mode: 0o755 });
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    const { result } = await run(repo);
+
+    expect(result.status).toBe("commit-error");
+  });
+
+  test("uses a suffixed branch when the default branch exists", async () => {
+    const { repo } = await fixture("branch-exists");
+    await Bun.$`git -C ${repo} branch devloop/change`.quiet();
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    const { result } = await run(repo);
+
+    expect(result.status).toBe("accepted");
+    expect(result.branch).toBe("devloop/change-2");
+  });
+
+  test("preserves spacey slugs and invocation repo ownership", async () => {
+    const work = await fixture("space-work", undefined, "change with spaces.md");
+    const specOnly = await fixture("space-spec", undefined, "external spec.md");
+    process.env.PATH = `${work.bin}:${oldPath}`;
+    process.env.DEVLOOP_TEST_STATE = work.state;
+    process.env.DEVLOOP_TEST_VERDICTS = "REJECT,ACCEPT";
+
+    const spaced = await runDevloop({ spec: work.specPath, max: 2, reportFormat: "html", strict: true, cwd: work.repo });
+    expect(spaced.status).toBe("accepted");
+    await exists(path.join(work.repo, ".codex/reviews/change with spaces-r2.md"));
+
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    const external = await runDevloop({ spec: specOnly.specPath, max: 1, reportFormat: "html", strict: true, cwd: work.repo });
+    expect(external.status).toBe("accepted");
+    await exists(path.join(work.repo, ".codex/tracks/external spec.md"));
+    expect(await exists(path.join(specOnly.repo, ".codex"), false)).toBe(false);
+  });
+
+  test("requires acceptance criteria in strict mode", async () => {
+    const { repo } = await fixture("no-criteria", "# Spec\n");
+    await expect(run(repo)).rejects.toThrow("strict mode requires ## Acceptance criteria");
+    await expect(runDevloop({ spec: path.join(repo, ".specs/missing.md"), max: 1, reportFormat: "html", strict: true, cwd: repo })).rejects.toThrow("usage:");
+  });
+
+  test("allows missing criteria only when strict is off", async () => {
+    const { repo } = await fixture("loose", "# Spec\n");
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    const { result, events } = await run(repo, { strict: false });
+
+    expect(result.status).toBe("accepted");
+    expect(events).toContainEqual({ type: "gate", name: "acceptance criteria", ok: false, detail: "0 found" });
+  });
+
+  test("turns strict accepts without matrix into unclear", async () => {
+    const { repo } = await fixture("no-matrix");
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    process.env.DEVLOOP_TEST_NO_MATRIX = "1";
+    const { result } = await run(repo);
+
+    expect(result.status).toBe("unclear");
+    expect(result.passes).toBe(1);
+  });
+
+  test("handles agent and review failures", async () => {
+    const codex = await fixture("codex-fail");
+    process.env.DEVLOOP_TEST_FAIL_CODEX = "1";
+    expect((await run(codex.repo)).result.status).toBe("codex-error");
+    delete process.env.DEVLOOP_TEST_FAIL_CODEX;
+
+    const claude = await fixture("claude-fail");
+    process.env.DEVLOOP_TEST_FAIL_CLAUDE = "1";
+    expect((await run(claude.repo)).result.status).toBe("claude-error");
+    delete process.env.DEVLOOP_TEST_FAIL_CLAUDE;
+
+    const missing = await fixture("missing-review");
+    process.env.DEVLOOP_TEST_NO_REVIEW = "1";
+    expect((await run(missing.repo)).result.status).toBe("review-missing");
+    delete process.env.DEVLOOP_TEST_NO_REVIEW;
+
+    const noVerdict = await fixture("no-verdict");
+    process.env.DEVLOOP_TEST_NO_VERDICT = "1";
+    expect((await run(noVerdict.repo)).result.status).toBe("no-verdict");
+    delete process.env.DEVLOOP_TEST_NO_VERDICT;
+  });
+
+  test("handles unclear verdicts and missing executables", async () => {
+    const unclear = await fixture("unclear");
+    process.env.DEVLOOP_TEST_VERDICTS = "UNCLEAR";
+    expect((await run(unclear.repo)).result.status).toBe("unclear");
+
+    const missingClaude = await fixture("missing-claude-bin");
+    await rm(path.join(missingClaude.repo, "../bin/claude"), { force: true });
+    process.env.PATH = `${path.join(missingClaude.repo, "../bin")}:/usr/bin:/bin`;
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    expect((await run(missingClaude.repo)).result.status).toBe("claude-error");
+  });
+
+  test("falls back to main when no base branch exists", async () => {
+    const { repo } = await fixture("no-base");
+    await Bun.$`git -C ${repo} branch -m topic`.quiet();
+    process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
+    expect((await runDevloop({ spec: path.join(repo, ".specs/change.md"), max: 1, reportFormat: "html", strict: true, cwd: repo })).status).toBe("accepted");
+    expect(await readFile(path.join(repo, ".codex/tracks/change.md"), "utf8")).toContain("- base: main");
+  });
+});
+
+async function fixture(name: string, spec = "# Fixture spec\n\n## Acceptance criteria\n1. The loop runs deterministically under test.\n", specName = "change.md") {
+  const dir = path.join(root, name);
+  const repo = path.join(dir, "repo");
+  const bin = path.join(dir, "bin");
+  const state = path.join(dir, "state");
+  await Bun.$`mkdir -p ${repo}/.specs ${bin} ${state}`.quiet();
+  await Bun.$`git init -q ${repo}`.quiet();
+  await Bun.$`git -C ${repo} symbolic-ref HEAD refs/heads/main`.quiet();
+  await writeFile(path.join(repo, "README.md"), "# Fixture\n");
+  const specPath = path.join(repo, ".specs", specName);
+  await writeFile(specPath, spec);
+  await Bun.$`git -C ${repo} config user.email devloop-test@example.com`.quiet();
+  await Bun.$`git -C ${repo} config user.name "devloop test"`.quiet();
+  await Bun.$`git -C ${repo} add README.md`.quiet();
+  await Bun.$`git -C ${repo} commit -q -m init`.quiet();
+  await installMocks(bin);
+  process.env.PATH = `${bin}:${oldPath}`;
+  process.env.DEVLOOP_TEST_STATE = state;
+  return { repo: await real(repo), state, bin, specPath };
+}
+
+async function installMocks(bin: string) {
+  await writeFile(
+    path.join(bin, "codex"),
+    `#!/usr/bin/env bash
+set -euo pipefail
+[[ -z "\${DEVLOOP_TEST_FAIL_CODEX:-}" ]] || exit 42
+prompt=$(cat)
+mkdir -p "$DEVLOOP_TEST_STATE"
+count=$(( $(cat "$DEVLOOP_TEST_STATE/codex-count" 2>/dev/null || echo 0) + 1 ))
+printf '%s\\n' "$count" > "$DEVLOOP_TEST_STATE/codex-count"
+printf '%s\\n' "$*" >> "$DEVLOOP_TEST_STATE/codex-args.log"
+printf '%s\\n---\\n' "$prompt" >> "$DEVLOOP_TEST_STATE/codex-prompts.log"
+track=$(printf '%s\\n' "$prompt" | awk -F': ' '/^Track: /{print $2; exit}')
+[[ -z "$track" ]] || printf '\\n## Pass %s - mock codex\\n- verification: fixture\\n' "$count" >> "$track"
+printf 'feature pass %s\\n' "$count" >> feature.txt
+printf 'codex pass %s\\n' "$count"
+printf 'To continue this session, run codex exec resume 00000000-0000-4000-8000-000000000001\\n'
+printf 'codex-tail' >&2
+`,
+    { mode: 0o755 },
+  );
+  await writeFile(
+    path.join(bin, "claude"),
+    `#!/usr/bin/env bash
+set -euo pipefail
+[[ -z "\${DEVLOOP_TEST_FAIL_CLAUDE:-}" ]] || exit 43
+prompt=$(cat)
+mkdir -p "$DEVLOOP_TEST_STATE"
+printf '%s\\n' "$*" >> "$DEVLOOP_TEST_STATE/claude-args.log"
+printf '%s\\n---\\n' "$prompt" >> "$DEVLOOP_TEST_STATE/claude-prompts.log"
+if [[ "$prompt" == *"Output path:"* ]]; then
+  [[ -z "\${DEVLOOP_TEST_NO_REVIEW:-}" ]] || exit 0
+  review_file=$(printf '%s\\n' "$prompt" | awk -F': ' '/^Output path: /{print $2; exit}')
+  count=$(( $(cat "$DEVLOOP_TEST_STATE/claude-review-count" 2>/dev/null || echo 0) + 1 ))
+  printf '%s\\n' "$count" > "$DEVLOOP_TEST_STATE/claude-review-count"
+  IFS=',' read -r -a verdicts <<< "\${DEVLOOP_TEST_VERDICTS:-ACCEPT}"
+  verdict="\${verdicts[$(( count <= \${#verdicts[@]} ? count - 1 : \${#verdicts[@]} - 1 ))]}"
+  mkdir -p "$(dirname "$review_file")"
+  {
+    printf '# Claude review %s\\n\\n' "$count"
+    [[ -n "\${DEVLOOP_TEST_NO_VERDICT:-}" ]] || printf 'Verdict: %s\\n\\n' "$verdict"
+    if [[ -z "\${DEVLOOP_TEST_NO_MATRIX:-}" ]]; then
+      printf '## Acceptance matrix\\n\\n'
+      printf -- '- AC1: PASS - mock evidence\\n\\n'
+    fi
+    printf '## Findings\\n\\n'
+    if [[ "$verdict" == "ACCEPT" ]]; then printf 'None\\n\\n'; else printf '1. [must-fix] devloop.ts:1 - repeated fixture finding. Root cause: mock review. Principle: deterministic retry behavior.\\n\\n'; fi
+    printf '## Missing tests\\n\\n- None\\n\\n## Fix instructions\\n\\n'
+    if [[ "$verdict" == "ACCEPT" ]]; then printf 'None\\n\\n'; else printf '1. Fix the repeated fixture finding.\\n\\n'; fi
+    printf '## Notes\\n\\n- None\\n'
+  } > "$review_file"
+else
+  report_file=$(printf '%s\\n' "$prompt" | sed -n 's/^Write the report to \\([^ ]*\\).*/\\1/p' | head -n 1)
+  [[ -z "$report_file" ]] || { mkdir -p "$(dirname "$report_file")"; printf '# mock devloop report\\n' > "$report_file"; }
+fi
+`,
+    { mode: 0o755 },
+  );
+}
+
+async function run(repo: string, overrides: Partial<Options> = {}) {
+  const events: Event[] = [];
+  const result = await runDevloop(
+    { spec: path.join(repo, ".specs/change.md"), max: 1, reportFormat: "html", strict: true, cwd: repo, ...overrides },
+    { event: (event) => void events.push(event) },
+  );
+  return { result, events };
+}
+
+async function exists(file: string, expected = true) {
+  const ok = Boolean(await stat(file).catch(() => false));
+  if (expected) expect(ok).toBe(true);
+  return ok;
+}
+
+async function real(file: string) {
+  return realpath(file);
+}
diff --git a/tests/devloop_test.sh b/tests/devloop_test.sh
index 4e06387..58ac515 100755
--- a/tests/devloop_test.sh
+++ b/tests/devloop_test.sh
@@ -2,497 +2,4 @@
 set -euo pipefail
 
 ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
-DEVLOOP="$ROOT/devloop.sh"
-TMP_ROOT=${TMPDIR:-/tmp}
-TEST_TMP=$(mktemp -d "$TMP_ROOT/devloop-test.XXXXXX")
-
-total=0
-passed=0
-
-cleanup() {
-  rm -rf "$TEST_TMP"
-}
-trap cleanup EXIT
-
-fail() {
-  printf 'FAIL: %s\n' "$*" >&2
-  exit 1
-}
-
-assert_eq() {
-  local expected="$1"
-  local actual="$2"
-  local message="$3"
-
-  [[ "$actual" == "$expected" ]] || fail "$message: expected '$expected', got '$actual'"
-}
-
-assert_file_exists() {
-  local path="$1"
-
-  [[ -f "$path" ]] || fail "expected file to exist: $path"
-}
-
-assert_file_not_exists() {
-  local path="$1"
-
-  [[ ! -e "$path" ]] || fail "expected file not to exist: $path"
-}
-
-assert_contains() {
-  local needle="$1"
-  local path="$2"
-
-  grep -Fq -- "$needle" "$path" || {
-    printf '%s\n' "--- $path ---" >&2
-    sed -n '1,220p' "$path" >&2 || true
-    fail "expected '$path' to contain: $needle"
-  }
-}
-
-assert_not_contains() {
-  local needle="$1"
-  local path="$2"
-
-  ! grep -Fq -- "$needle" "$path" || fail "did not expect '$path' to contain: $needle"
-}
-
-make_repo() {
-  local name="$1"
-  local spec_name="${2:-change.md}"
-  local repo="$TEST_TMP/$name/repo"
-
-  mkdir -p "$repo/.specs"
-  git init -q "$repo"
-  git -C "$repo" symbolic-ref HEAD refs/heads/main
-  (
-    cd "$repo"
-    git config user.email "devloop-test@example.com"
-    git config user.name "devloop test"
-    printf '# Fixture\n' > README.md
-    git add README.md
-    git commit -q -m init
-  )
-
-  cat > "$repo/.specs/$spec_name" <<'EOF'
-# Fixture spec
-
-## Acceptance criteria
-1. The loop runs deterministically under test.
-EOF
-
-  printf '%s\n' "$repo"
-}
-
-install_mocks() {
-  local bin_dir="$1"
-
-  mkdir -p "$bin_dir"
-
-  cat > "$bin_dir/codex" <<'EOF'
-#!/usr/bin/env bash
-set -euo pipefail
-
-: "${DEVLOOP_TEST_STATE:?DEVLOOP_TEST_STATE is required}"
-prompt=$(cat)
-session_id="${DEVLOOP_TEST_CODEX_SESSION_ID:-00000000-0000-4000-8000-000000000001}"
-
-mkdir -p "$DEVLOOP_TEST_STATE"
-count_file="$DEVLOOP_TEST_STATE/codex-count"
-count=$(( $(cat "$count_file" 2>/dev/null || echo 0) + 1 ))
-printf '%s\n' "$count" > "$count_file"
-printf '%s\n' "$*" >> "$DEVLOOP_TEST_STATE/codex-args.log"
-printf '%s\n---\n' "$prompt" >> "$DEVLOOP_TEST_STATE/codex-prompts.log"
-
-track=$(printf '%s\n' "$prompt" | awk -F': ' '/^Track: /{print $2; exit}')
-if [[ -n "$track" ]]; then
-  {
-    printf '\n## Pass %s - mock codex\n' "$count"
-    printf -- '- changed files: fixture\n'
-    printf -- '- verification: fixture\n'
-  } >> "$track"
-fi
-
-printf 'codex pass %s\n' "$count"
-printf 'To continue this session, run codex exec resume %s\n' "$session_id"
-EOF
-
-  cat > "$bin_dir/claude" <<'EOF'
-#!/usr/bin/env bash
-set -euo pipefail
-
-: "${DEVLOOP_TEST_STATE:?DEVLOOP_TEST_STATE is required}"
-prompt=$(cat)
-
-mkdir -p "$DEVLOOP_TEST_STATE"
-total_file="$DEVLOOP_TEST_STATE/claude-total-count"
-total=$(( $(cat "$total_file" 2>/dev/null || echo 0) + 1 ))
-printf '%s\n' "$total" > "$total_file"
-printf '%s\n' "$*" >> "$DEVLOOP_TEST_STATE/claude-args.log"
-printf '%s\n---\n' "$prompt" >> "$DEVLOOP_TEST_STATE/claude-prompts.log"
-
-if [[ "$prompt" == *"Output path:"* ]]; then
-  review_file=$(printf '%s\n' "$prompt" | awk -F': ' '/^Output path: /{print $2; exit}')
-  review_count_file="$DEVLOOP_TEST_STATE/claude-review-count"
-  review_count=$(( $(cat "$review_count_file" 2>/dev/null || echo 0) + 1 ))
-  printf '%s\n' "$review_count" > "$review_count_file"
-
-  IFS=',' read -r -a verdicts <<< "${DEVLOOP_TEST_VERDICTS:-ACCEPT}"
-  if (( review_count <= ${#verdicts[@]} )); then
-    verdict="${verdicts[$((review_count - 1))]}"
-  else
-    verdict="${verdicts[$((${#verdicts[@]} - 1))]}"
-  fi
-
-  mkdir -p "$(dirname "$review_file")"
-  {
-    printf '# Claude review %s\n\n' "$review_count"
-    printf 'Verdict: %s\n\n' "$verdict"
-    printf '## Findings\n\n'
-    if [[ "$verdict" == "ACCEPT" ]]; then
-      printf 'None\n\n'
-    else
-      printf '1. [should-fix] devloop.sh:10 - repeated fixture finding. Root cause: mock review. Principle: deterministic retry behavior.\n\n'
-    fi
-    printf '## Missing tests\n\n'
-    printf -- '- None\n\n'
-    printf '## Fix instructions\n\n'
-    if [[ "$verdict" == "ACCEPT" ]]; then
-      printf 'None\n\n'
-    else
-      printf '1. Fix the repeated fixture finding.\n\n'
-    fi
-    printf '## Notes\n\n'
-    printf -- '- None\n'
-  } > "$review_file"
-else
-  report_line=$(printf '%s\n' "$prompt" | awk '/^Write the report to /{print; exit}')
-  report_file="${report_line#Write the report to }"
-  report_file="${report_file%% in this structure.*}"
-  report_file="${report_file%% in this markdown structure.*}"
-  report_file="${report_file%% as valid standalone HTML.*}"
-  [[ -n "$report_file" ]] || exit 0
-  mkdir -p "$(dirname "$report_file")"
-  {
-    printf '# mock devloop report\n\n'
-    printf 'Report synthesized by test double.\n'
-  } > "$report_file"
-fi
-EOF
-
-  chmod +x "$bin_dir/codex" "$bin_dir/claude"
-}
-
-run_devloop() {
-  local cwd="$1"
-  local stdout="$2"
-  local stderr="$3"
-  shift 3
-
-  set +e
-  (cd "$cwd" && "$BASH" "$DEVLOOP" "$@") >"$stdout" 2>"$stderr"
-  local rc=$?
-  set -e
-  return "$rc"
-}
-
-test_usage_when_spec_missing() {
-  local out="$TEST_TMP/usage.out"
-  local err="$TEST_TMP/usage.err"
-
-  set +e
-  "$BASH" "$DEVLOOP" >"$out" 2>"$err"
-  local rc=$?
-  set -e
-
-  assert_eq 2 "$rc" "missing spec exit code"
-  assert_contains "usage: devloop.sh [--report-format html|markdown] <spec.md> [max=5]" "$err"
-  assert_not_contains "claude not on PATH" "$err"
-}
-
-test_missing_claude_is_reported_before_git_setup() {
-  local work="$TEST_TMP/missing-claude"
-  local out="$work.out"
-  local err="$work.err"
-  local spec="$work/spec.md"
-
-  mkdir -p "$work"
-  printf '# Spec\n' > "$spec"
-
-  set +e
-  PATH="$work" "$BASH" "$DEVLOOP" "$spec" >"$out" 2>"$err"
-  local rc=$?
-  set -e
-
-  assert_eq 2 "$rc" "missing claude exit code"
-  assert_contains "claude not on PATH" "$err"
-}
-
-test_invalid_max_is_usage_error() {
-  local work="$TEST_TMP/invalid-max"
-  local out="$work.out"
-  local err="$work.err"
-  local spec="$work/spec.md"
-
-  mkdir -p "$work"
-  printf '# Spec\n' > "$spec"
-
-  set +e
-  "$BASH" "$DEVLOOP" "$spec" nope >"$out" 2>"$err"
-  local rc=$?
-  set -e
-
-  assert_eq 2 "$rc" "invalid max exit code"
-  assert_contains "max must be an integer between 1 and 10" "$err"
-  assert_not_contains "unbound variable" "$err"
-}
-
-test_report_alias_is_not_accepted() {
-  local work="$TEST_TMP/report-alias"
-  local out="$work.out"
-  local err="$work.err"
-  local spec="$work/spec.md"
-
-  mkdir -p "$work"
-  printf '# Spec\n' > "$spec"
-
-  set +e
-  "$BASH" "$DEVLOOP" --report markdown "$spec" >"$out" 2>"$err"
-  local rc=$?
-  set -e
-
-  assert_eq 2 "$rc" "report alias exit code"
-  assert_contains "unknown option: --report" "$err"
-}
-
-test_accept_writes_core_artifacts() {
-  local repo repo_real state bin out err rc
-  repo=$(make_repo "accept")
-  repo_real=$(cd "$repo" && pwd -P)
-  state="$TEST_TMP/accept/state"
-  bin="$TEST_TMP/accept/bin"
-  out="$TEST_TMP/accept.out"
-  err="$TEST_TMP/accept.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \
-    run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 5
-  rc=$?
-  set -e
-
-  assert_eq 0 "$rc" "accepted loop exit code"
-  assert_contains "result:  accepted" "$out"
-  assert_contains "passes:  1 / 5" "$out"
-  assert_file_exists "$repo/.codex/tracks/change.md"
-  assert_file_exists "$repo/.codex/reviews/change-r1.md"
-  assert_file_exists "$repo/.codex/reports/change.html"
-  assert_file_exists "$repo/.codex/sessions/change-codex.id"
-  assert_file_exists "$repo/.codex/sessions/change-claude.id"
-  assert_contains "Verdict: ACCEPT" "$repo/.codex/reviews/change-r1.md"
-  assert_contains "## Pass 1 - mock codex" "$repo/.codex/tracks/change.md"
-  assert_contains "- report-format: html" "$repo/.codex/tracks/change.md"
-  assert_contains "valid standalone HTML" "$state/claude-prompts.log"
-  assert_contains "3-5 sharp, transferable lessons" "$state/claude-prompts.log"
-  assert_contains "exec --dangerously-bypass-approvals-and-sandbox -C $repo_real -" "$state/codex-args.log"
-  assert_eq "00000000-0000-4000-8000-000000000001" "$(cat "$repo/.codex/sessions/change-codex.id")" "codex session id"
-  assert_eq 1 "$(grep -c -- '--session-id' "$state/claude-args.log")" "claude initial session count"
-  assert_eq 1 "$(grep -c -- '--resume' "$state/claude-args.log")" "claude report resume count"
-  assert_eq 1 "$(cat "$state/codex-count")" "codex call count"
-  assert_eq 1 "$(cat "$state/claude-review-count")" "claude review count"
-  assert_eq 2 "$(cat "$state/claude-total-count")" "claude total count including synthesis"
-}
-
-test_reject_then_accept_runs_fix_pass() {
-  local repo state bin out err rc
-  repo=$(make_repo "reject-accept")
-  state="$TEST_TMP/reject-accept/state"
-  bin="$TEST_TMP/reject-accept/bin"
-  out="$TEST_TMP/reject-accept.out"
-  err="$TEST_TMP/reject-accept.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="REJECT,ACCEPT" \
-    run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 3
-  rc=$?
-  set -e
-
-  assert_eq 0 "$rc" "reject then accept exit code"
-  assert_contains "passes:  2 / 3" "$out"
-  assert_contains "Verdict: REJECT" "$repo/.codex/reviews/change-r1.md"
-  assert_contains "Verdict: ACCEPT" "$repo/.codex/reviews/change-r2.md"
-  assert_contains "Fix only the findings in the review." "$state/codex-prompts.log"
-  assert_contains "Review: .codex/reviews/change-r1.md" "$state/codex-prompts.log"
-  assert_contains "exec resume --dangerously-bypass-approvals-and-sandbox 00000000-0000-4000-8000-000000000001 -" "$state/codex-args.log"
-  assert_eq 1 "$(grep -c -- '--session-id' "$state/claude-args.log")" "claude initial session count"
-  assert_eq 2 "$(grep -c -- '--resume' "$state/claude-args.log")" "claude resumed review and report count"
-  assert_eq 2 "$(cat "$state/codex-count")" "codex call count"
-  assert_eq 2 "$(cat "$state/claude-review-count")" "claude review count"
-}
-
-test_spec_slug_with_spaces_preserves_prior_reviews() {
-  local repo state bin out err rc
-  repo=$(make_repo "space-spec" "change with spaces.md")
-  state="$TEST_TMP/space-spec/state"
-  bin="$TEST_TMP/space-spec/bin"
-  out="$TEST_TMP/space-spec.out"
-  err="$TEST_TMP/space-spec.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="REJECT,ACCEPT" \
-    run_devloop "$repo" "$out" "$err" "$repo/.specs/change with spaces.md" 2
-  rc=$?
-  set -e
-
-  assert_eq 0 "$rc" "space slug loop exit code"
-  assert_file_exists "$repo/.codex/reviews/change with spaces-r1.md"
-  assert_file_exists "$repo/.codex/reviews/change with spaces-r2.md"
-  assert_contains "Prior reviews:" "$state/claude-prompts.log"
-  assert_contains "- .codex/reviews/change with spaces-r1.md" "$state/claude-prompts.log"
-  assert_contains "- .codex/reviews/change with spaces-r2.md" "$state/claude-prompts.log"
-  assert_contains "Review files:" "$state/claude-prompts.log"
-}
-
-test_invocation_repo_controls_workdir_not_spec_location() {
-  local repo repo_real spec_repo spec_path state bin out err rc
-  repo=$(make_repo "invocation-repo")
-  repo_real=$(cd "$repo" && pwd -P)
-  spec_repo=$(make_repo "spec-repo")
-  spec_path=$(cd "$spec_repo/.specs" && pwd)/change.md
-  state="$TEST_TMP/invocation-repo/state"
-  bin="$TEST_TMP/invocation-repo/bin"
-  out="$TEST_TMP/invocation-repo.out"
-  err="$TEST_TMP/invocation-repo.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \
-    run_devloop "$repo" "$out" "$err" "$spec_repo/.specs/change.md" 1
-  rc=$?
-  set -e
-
-  assert_eq 0 "$rc" "invocation repo exit code"
-  assert_file_exists "$repo/.codex/tracks/change.md"
-  assert_file_not_exists "$spec_repo/.codex"
-  assert_contains "- spec: $spec_path" "$repo/.codex/tracks/change.md"
-  assert_contains "- cwd: $repo_real" "$repo/.codex/tracks/change.md"
-  assert_contains "exec --dangerously-bypass-approvals-and-sandbox -C $repo_real -" "$state/codex-args.log"
-}
-
-test_markdown_report_option() {
-  local repo state bin out err rc
-  repo=$(make_repo "markdown-report")
-  state="$TEST_TMP/markdown-report/state"
-  bin="$TEST_TMP/markdown-report/bin"
-  out="$TEST_TMP/markdown-report.out"
-  err="$TEST_TMP/markdown-report.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \
-    run_devloop "$repo" "$out" "$err" --report-format markdown "$repo/.specs/change.md" 1
-  rc=$?
-  set -e
-
-  assert_eq 0 "$rc" "markdown report exit code"
-  assert_file_exists "$repo/.codex/reports/change.md"
-  assert_file_not_exists "$repo/.codex/reports/change.html"
-  assert_contains "report:  .codex/reports/change.md" "$out"
-  assert_contains "- report-format: markdown" "$repo/.codex/tracks/change.md"
-  assert_contains "in this markdown structure" "$state/claude-prompts.log"
-}
-
-test_repeated_reject_findings_stall_the_loop() {
-  local repo state bin out err rc
-  repo=$(make_repo "stall")
-  state="$TEST_TMP/stall/state"
-  bin="$TEST_TMP/stall/bin"
-  out="$TEST_TMP/stall.out"
-  err="$TEST_TMP/stall.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:/usr/bin:/bin" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="REJECT,REJECT,REJECT" \
-    run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 5
-  rc=$?
-  set -e
-
-  assert_eq 1 "$rc" "stalled loop exit code"
-  assert_contains "result:  stalled" "$out"
-  assert_contains "passes:  2 / 5" "$out"
-  assert_file_exists "$repo/.codex/reviews/change-r1.md"
-  assert_file_exists "$repo/.codex/reviews/change-r2.md"
-  assert_eq 2 "$(cat "$state/codex-count")" "codex call count before stall"
-  assert_eq 2 "$(cat "$state/claude-review-count")" "claude review count before stall"
-}
-
-test_max_is_clamped_to_one() {
-  local repo state bin out err rc
-  repo=$(make_repo "max-clamp")
-  state="$TEST_TMP/max-clamp/state"
-  bin="$TEST_TMP/max-clamp/bin"
-  out="$TEST_TMP/max-clamp.out"
-  err="$TEST_TMP/max-clamp.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \
-    run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 0
-  rc=$?
-  set -e
-
-  assert_eq 0 "$rc" "max clamp exit code"
-  assert_contains "passes:  1 / 1" "$out"
-  assert_contains "- max: 1" "$repo/.codex/tracks/change.md"
-}
-
-test_leading_zero_max_is_decimal() {
-  local repo state bin out err rc
-  repo=$(make_repo "leading-zero-max")
-  state="$TEST_TMP/leading-zero-max/state"
-  bin="$TEST_TMP/leading-zero-max/bin"
-  out="$TEST_TMP/leading-zero-max.out"
-  err="$TEST_TMP/leading-zero-max.err"
-  install_mocks "$bin"
-
-  set +e
-  PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \
-    run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 08
-  rc=$?
-  set -e
-
-  assert_eq 0 "$rc" "leading zero max exit code"
-  assert_contains "passes:  1 / 8" "$out"
-  assert_contains "- max: 8" "$repo/.codex/tracks/change.md"
-}
-
-run_test() {
-  local name="$1"
-
-  total=$((total + 1))
-  printf 'test %s ... ' "$name"
-  if ( "$name" ); then
-    passed=$((passed + 1))
-    printf 'ok\n'
-  else
-    printf 'not ok\n'
-    return 1
-  fi
-}
-
-run_test test_usage_when_spec_missing
-run_test test_missing_claude_is_reported_before_git_setup
-run_test test_invalid_max_is_usage_error
-run_test test_report_alias_is_not_accepted
-run_test test_accept_writes_core_artifacts
-run_test test_reject_then_accept_runs_fix_pass
-run_test test_spec_slug_with_spaces_preserves_prior_reviews
-run_test test_invocation_repo_controls_workdir_not_spec_location
-run_test test_markdown_report_option
-run_test test_repeated_reject_findings_stall_the_loop
-run_test test_max_is_clamped_to_one
-run_test test_leading_zero_max_is_decimal
-
-printf '\n%d/%d tests passed\n' "$passed" "$total"
+exec bun test "$ROOT/tests/devloop.test.ts"
diff --git a/tsconfig.json b/tsconfig.json
new file mode 100644
index 0000000..b1f1259
--- /dev/null
+++ b/tsconfig.json
@@ -0,0 +1,14 @@
+{
+  "compilerOptions": {
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "module": "Preserve",
+    "moduleResolution": "Bundler",
+    "noEmit": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "target": "ES2022",
+    "types": ["bun-types"]
+  },
+  "include": ["src/**/*.ts", "tests/**/*.ts"]
+}

From c5401315dbda4aa1902965373ef2179e72f22fce Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Mon, 25 May 2026 17:39:26 +1000
Subject: [PATCH 02/11] chore: document bun devloop workflow

---
 .gitignore |   2 +
 README.md  | 128 ++++++++++++++++++++++++-----------------------------
 2 files changed, 60 insertions(+), 70 deletions(-)

diff --git a/.gitignore b/.gitignore
index f99b306..30726a3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 .codex/
 .specs/
 .DS_Store
+coverage/
+node_modules/
diff --git a/README.md b/README.md
index 1b87a4c..292c6e9 100644
--- a/README.md
+++ b/README.md
@@ -1,112 +1,100 @@
 # devloop
 
-Spec in, accepted code out. Codex implements, Claude reviews, loop until ACCEPT, stall, or max turns. One bash file.
+Spec in, accepted code out. Codex implements, Claude reviews, loop until ACCEPT, stall, unclear, error, or max turns.
 
+```sh
+devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5]
+bun src/cli.ts [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5]
 ```
-devloop.sh [--report-format html|markdown] path/to/spec.md [max=5]
-```
-
-## Why
 
-Skills-as-orchestrators drift. The LLM driver has discretion to skip steps and often does, especially under load. A shell state machine cannot. devloop is the same workflow as the [`/devloop`](https://github.com/anthropics/claude-code) skill it replaces, minus the discretion.
+## Defaults
 
-What stays in the LLMs (because they are good at it):
-- Codex: implementation, design decisions, fix passes
-- Claude: review judgment, verdict, final synthesis
+- strict mode is on
+- HTML report output is on
+- max turns defaults to 5 and is clamped to 1-10
+- interactive terminals use the OpenTUI view
+- non-TTY runs use plain output
+- accepted runs create a local `devloop/<slug>` branch and a local Conventional Commit
 
-What moves to bash (because the LLM does not need discretion here):
-- Sequencing the loop
-- Spawning each agent headless
-- Reusing one Codex implementation session and one Claude review session
-- Parsing the verdict
-- Detecting stalls
-- File path conventions
-- Stopping at max turns
+Use `--plain` for CI or debugging. Use `--tui` to force the collapsed terminal UI. Use `--no-strict` only when you deliberately want to bypass strict acceptance-gate behavior.
 
-## Quick start
+## Strict Mode
 
-Prereqs on PATH: `claude`, `codex`, `git`.
-
-```sh
-# 1. write a spec
-cat > .specs/add-foo-flag.md <<'EOF'
-# Add foo flag to bar config
+Strict mode requires the spec to contain:
 
+```md
 ## Acceptance criteria
 1. ...
-EOF
-
-# 2. loop
-./devloop.sh .specs/add-foo-flag.md
 ```
 
-Defaults to unattended (`--dangerously-bypass-approvals-and-sandbox` for codex, `--dangerously-skip-permissions` for claude). Run inside a git worktree, not your main checkout.
+Codex is prompted to follow a regression-first lifecycle: tests first, red phase when behavior changes, smallest implementation, targeted tests, full tests, lint/typecheck, and 100% coverage when the target project exposes coverage tooling.
 
-The implementation worktree is resolved from the directory where you invoke `devloop.sh`, not from the spec file's location. The spec can live elsewhere; Codex and Claude are pointed at the current worktree.
+Claude must write an acceptance matrix:
 
-## The loop
+```md
+## Acceptance matrix
 
-```
-pass 1: codex implements against spec
-        claude reviews → ACCEPT | REJECT | UNCLEAR
-pass N: codex fixes findings from review N-1
-        claude reviews
-exit:   ACCEPT          → 0
-        stall | max | unclear → 1
-        codex/claude error    → 2
+- AC1: PASS - evidence
 ```
 
-Stall = normalized findings hash matches the prior REJECT.
+`Verdict: ACCEPT` is only honored in strict mode when every parsed acceptance criterion has a passing matrix row. Missing evidence becomes `unclear`.
 
-## Sessions
+## Local Commit
 
-Each spec slug gets two persisted sessions:
+On `accepted`, devloop creates or reuses a local branch:
 
+```text
+devloop/<spec-slug>
 ```
-.codex/sessions/<slug>-codex.id
-.codex/sessions/<slug>-claude.id
+
+It commits only files that were not already dirty when the run started, and it excludes `.codex/` artifacts from the commit. The generated commit message uses a Conventional Commit type:
+
+```text
+feat: <spec-slug>
 ```
 
-Pass 1 starts the Codex implementation session and records the resumable session ID. Later fix passes call `codex exec resume <session-id>`, so Codex keeps the implementation context. Claude uses one review session for every review pass and the final report.
+No push or PR is performed.
 
 ## Artifacts
 
-```
-.codex/tracks/<slug>.md        codex's running notes per pass
-.codex/reviews/<slug>-r<N>.md  one per review turn
-.codex/reports/<slug>.html     synthesized post-mortem by default
-.codex/reports/<slug>.md       synthesized post-mortem with --report-format markdown
-.codex/logs/                   raw agent stdout for debugging
+```text
+.codex/tracks/<slug>.md
+.codex/reviews/<slug>-r<N>.md
+.codex/reports/<slug>.html
+.codex/reports/<slug>.md
+.codex/logs/
+.codex/sessions/
 ```
 
-## Tests
+Report format stays deliberately narrow:
 
 ```sh
-./tests/devloop_test.sh
+devloop --report-format html .specs/change.md
+devloop --report-format markdown .specs/change.md
+devloop --md .specs/change.md
 ```
 
-The tests run the shell state machine against temporary git repos with mocked `codex` and `claude` commands, so they do not call either agent.
+Reports include top-level metadata: result, passes, repository, spec, base branch, starting branch, final branch, local commit, commit message, Codex session ID, Claude session ID, track, and review files.
 
-## The report
+## Sessions
 
-Not a mechanical concat. Claude is called one more time in the same review session with the spec + track + all reviews and writes a learning-oriented post-mortem:
+Each spec slug gets one Codex session and one Claude session:
 
-- **Shape of the problem** — what the spec really asked for, alternatives ruled out
-- **What was built** — design choices and the tradeoffs weighed
-- **What review caught (and why it mattered)** — symptom → root cause → principle violated, grouped into patterns
-- **What to remember next time** — transferable lessons in `When X, prefer Y because Z` form
-- **Residual risk** — concrete, not generic
+```text
+.codex/sessions/<slug>-codex.id
+.codex/sessions/<slug>-claude.id
+```
 
-The "why" is enforced in the prompts: codex must explain decisions, claude must articulate the principle behind each finding ("if you cannot articulate the principle, the finding is too shallow — drop it or sharpen it").
+Pass 1 starts the sessions. Later fix passes resume them.
 
-## Caveats
+## Development
 
-- **Unattended = trusts both agents.** Use worktrees.
-- **Sessions persist per spec slug.** Delete the matching files in `.codex/sessions/` when you want a fresh Codex or Claude context for the same spec filename.
-- **No spec writing.** Deliberate. Write the spec yourself (or via an interview skill) and hand the path in.
-- **Stall detection is hash-based.** Cosmetic rewording of identical findings will defeat it.
-- **Base branch is auto-guessed** (`origin/HEAD` → `main` → `master`). Edit the `BASE=` line for stacked branches.
+Prereqs: `bun`, `codex`, `claude`, `git`.
 
-## License
+```sh
+bun install
+bun run typecheck
+bun test
+```
 
-MIT
+`bun test` enforces 100% line/function/statement coverage for the TypeScript core.

From a84cab7118b67d27e78e991a2571fc9c853b33b2 Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Mon, 25 May 2026 17:57:58 +1000
Subject: [PATCH 03/11] chore: clarify readme

---
 README.md | 64 +++++++++++++++++++++++--------------------------------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 292c6e9..975dcd7 100644
--- a/README.md
+++ b/README.md
@@ -1,35 +1,36 @@
 # devloop
 
-Spec in, accepted code out. Codex implements, Claude reviews, loop until ACCEPT, stall, unclear, error, or max turns.
+Codex implements. Claude reviews. devloop runs the loop until the work is accepted, stalls, becomes unclear, hits max turns, or an agent fails.
 
 ```sh
-devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5]
-bun src/cli.ts [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5]
+devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] spec.md [max=5]
 ```
 
+Run from the target git worktree. The spec may live anywhere.
+
 ## Defaults
 
 - strict mode is on
-- HTML report output is on
-- max turns defaults to 5 and is clamped to 1-10
-- interactive terminals use the OpenTUI view
+- HTML reports are on
+- max turns default to 5 and clamp to 1-10
+- TTY runs use the collapsed OpenTUI view
 - non-TTY runs use plain output
-- accepted runs create a local `devloop/<slug>` branch and a local Conventional Commit
+- accepted runs create a local branch and local commit
 
-Use `--plain` for CI or debugging. Use `--tui` to force the collapsed terminal UI. Use `--no-strict` only when you deliberately want to bypass strict acceptance-gate behavior.
+Use `--plain` for CI. Use `--tui` to force the TUI. Use `--no-strict` only when you explicitly want weaker gates.
 
-## Strict Mode
+## Strict Acceptance
 
-Strict mode requires the spec to contain:
+Strict mode requires:
 
 ```md
 ## Acceptance criteria
 1. ...
 ```
 
-Codex is prompted to follow a regression-first lifecycle: tests first, red phase when behavior changes, smallest implementation, targeted tests, full tests, lint/typecheck, and 100% coverage when the target project exposes coverage tooling.
+Codex is prompted to work regression-first: add or update tests, observe the red phase when behavior changes, implement the smallest fix, then run targeted tests, full tests, lint/typecheck, and coverage.
 
-Claude must write an acceptance matrix:
+Claude must write:
 
 ```md
 ## Acceptance matrix
@@ -37,25 +38,9 @@ Claude must write an acceptance matrix:
 - AC1: PASS - evidence
 ```
 
-`Verdict: ACCEPT` is only honored in strict mode when every parsed acceptance criterion has a passing matrix row. Missing evidence becomes `unclear`.
-
-## Local Commit
+In strict mode, `Verdict: ACCEPT` only counts when every parsed criterion has a `PASS` matrix row. Missing evidence exits as `unclear`.
 
-On `accepted`, devloop creates or reuses a local branch:
-
-```text
-devloop/<spec-slug>
-```
-
-It commits only files that were not already dirty when the run started, and it excludes `.codex/` artifacts from the commit. The generated commit message uses a Conventional Commit type:
-
-```text
-feat: <spec-slug>
-```
-
-No push or PR is performed.
-
-## Artifacts
+## Output
 
 ```text
 .codex/tracks/<slug>.md
@@ -66,7 +51,7 @@ No push or PR is performed.
 .codex/sessions/
 ```
 
-Report format stays deliberately narrow:
+Reports can be HTML or Markdown:
 
 ```sh
 devloop --report-format html .specs/change.md
@@ -74,18 +59,23 @@ devloop --report-format markdown .specs/change.md
 devloop --md .specs/change.md
 ```
 
-Reports include top-level metadata: result, passes, repository, spec, base branch, starting branch, final branch, local commit, commit message, Codex session ID, Claude session ID, track, and review files.
+Reports include result, passes, repo, spec, base branch, starting branch, final branch, local commit, commit message, Codex session ID, Claude session ID, track, and review files.
+
+## Local Commit
+
+On `accepted`, devloop creates or reuses:
 
-## Sessions
+```text
+devloop/<spec-slug>
+```
 
-Each spec slug gets one Codex session and one Claude session:
+It commits only files that were clean when the run started and excludes `.codex/`. Commit messages are Conventional Commit style:
 
 ```text
-.codex/sessions/<slug>-codex.id
-.codex/sessions/<slug>-claude.id
+feat: <spec-slug>
 ```
 
-Pass 1 starts the sessions. Later fix passes resume them.
+devloop does not push or open a PR.
 
 ## Development
 

From cfbfea0bcffaee11310178f76f4a72a73557eb37 Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Mon, 25 May 2026 18:06:21 +1000
Subject: [PATCH 04/11] chore: add local installer

---
 README.md          | 12 +++++++++++-
 package.json       |  1 +
 scripts/install.ts | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 scripts/install.ts

diff --git a/README.md b/README.md
index 975dcd7..40194d5 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,16 @@ devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] spec.md [m
 
 Run from the target git worktree. The spec may live anywhere.
 
+## Install
+
+From this checkout:
+
+```sh
+bun scripts/install.ts
+```
+
+That installs dependencies and links `devloop` into `~/.local/bin`. Set `DEVLOOP_BIN_DIR` to choose another bin directory.
+
 ## Defaults
 
 - strict mode is on
@@ -82,7 +92,7 @@ devloop does not push or open a PR.
 Prereqs: `bun`, `codex`, `claude`, `git`.
 
 ```sh
-bun install
+bun scripts/install.ts
 bun run typecheck
 bun test
 ```
diff --git a/package.json b/package.json
index e2061ac..2c2799f 100644
--- a/package.json
+++ b/package.json
@@ -6,6 +6,7 @@
     "devloop": "./src/cli.ts"
   },
   "scripts": {
+    "install:local": "bun scripts/install.ts",
     "test": "bun test",
     "typecheck": "tsc --noEmit"
   },
diff --git a/scripts/install.ts b/scripts/install.ts
new file mode 100644
index 0000000..34ab3ea
--- /dev/null
+++ b/scripts/install.ts
@@ -0,0 +1,42 @@
+#!/usr/bin/env bun
+import { chmod, mkdir, readlink, rm, symlink } from "node:fs/promises";
+import { homedir } from "node:os";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
+const cli = path.join(root, "src", "cli.ts");
+const binDir = process.env.DEVLOOP_BIN_DIR ?? path.join(homedir(), ".local", "bin");
+const link = path.join(binDir, "devloop");
+
+await run(["bun", "install"], root);
+await mkdir(binDir, { recursive: true });
+await chmod(cli, 0o755);
+
+const existing = await readlink(link).catch(() => "");
+if (existing && path.resolve(binDir, existing) === cli) {
+  console.log(`devloop already points to ${cli}`);
+} else {
+  await rm(link, { force: true });
+  await symlink(cli, link);
+  console.log(`installed devloop -> ${cli}`);
+}
+
+if (!pathInEnv(binDir)) {
+  console.log("");
+  console.log(`${binDir} is not on PATH. Add this to ~/.zshrc:`);
+  console.log(`export PATH="${binDir}:$PATH"`);
+}
+
+console.log("");
+console.log("try: devloop --help");
+
+async function run(cmd: string[], cwd: string) {
+  const proc = Bun.spawn(cmd, { cwd, stdout: "inherit", stderr: "inherit" });
+  const code = await proc.exited;
+  if (code !== 0) process.exit(code);
+}
+
+function pathInEnv(dir: string) {
+  return (process.env.PATH ?? "").split(path.delimiter).some((entry) => path.resolve(entry) === dir);
+}

From 4e551816a2446ac041100b22c988eaa0e3e5a398 Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Mon, 25 May 2026 18:24:45 +1000
Subject: [PATCH 05/11] feat: show default cli welcome

---
 README.md             |  1 +
 src/cli.ts            |  7 ++++++-
 src/devloop.ts        | 31 +++++++++++++++++++++++++++++++
 src/tui.ts            | 12 ++----------
 tests/devloop.test.ts |  9 ++++++++-
 5 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 40194d5..9bc5974 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ That installs dependencies and links `devloop` into `~/.local/bin`. Set `DEVLOOP
 - TTY runs use the collapsed OpenTUI view
 - non-TTY runs use plain output
 - accepted runs create a local branch and local commit
+- no-arg `devloop` shows the logo and common commands
 
 Use `--plain` for CI. Use `--tui` to force the TUI. Use `--no-strict` only when you explicitly want weaker gates.
 
diff --git a/src/cli.ts b/src/cli.ts
index 4dfe73b..38c17c0 100755
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -1,8 +1,13 @@
 #!/usr/bin/env bun
-import { parseArgs, runDevloop, type Event, type Sink } from "./devloop.ts";
+import { parseArgs, runDevloop, welcome, type Event, type Sink } from "./devloop.ts";
 import { createTuiSink } from "./tui.ts";
 
 const argv = process.argv.slice(2);
+if (argv.length === 0 || argv.includes("-h") || argv.includes("--help")) {
+  console.log(welcome());
+  process.exit(0);
+}
+
 const parsed = parseArgs(argv);
 
 if (typeof parsed === "string") {
diff --git a/src/devloop.ts b/src/devloop.ts
index f5b952f..ea49be3 100644
--- a/src/devloop.ts
+++ b/src/devloop.ts
@@ -48,6 +48,37 @@ export type Sink = { event(event: Event): void | Promise<void>; close?(): void |
 type RunResult = { code: number; output: string };
 type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise<RunResult>;
 
+export const LOGO = [
+  "   ▐▌▗▞▀▚▖▄   ▄ █  ▄▄▄   ▄▄▄  ▄▄▄▄  ",
+  "   ▐▌▐▛▀▀▘█   █ █ █   █ █   █ █   █ ",
+  "▗▞▀▜▌▝▚▄▄▖ ▀▄▀  █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ",
+  "▝▚▄▟▌           █             █     ",
+  "                              ▀",
+].join("\n");
+
+export function welcome() {
+  return `${LOGO}
+
+devloop runs a strict Codex implement -> Claude review loop.
+
+Usage:
+  devloop [options] <spec.md> [max=5]
+
+Common commands:
+  devloop .specs/change.md
+  devloop --tui .specs/change.md
+  devloop --plain .specs/change.md
+  devloop --report-format markdown .specs/change.md 3
+  bun scripts/install.ts
+
+Options:
+  --tui                         force the collapsed TUI
+  --plain                       force plain output
+  --report-format html|markdown choose report format
+  --no-strict                   weaken acceptance gates
+  -h, --help                    show this screen`;
+}
+
 export function parseArgs(argv: string[], cwd = process.cwd()): Options | string {
   let reportFormat: ReportFormat = "html";
   let strict = true;
diff --git a/src/tui.ts b/src/tui.ts
index 0403fe2..abef745 100644
--- a/src/tui.ts
+++ b/src/tui.ts
@@ -1,15 +1,7 @@
-import type { Event, Result, Sink } from "./devloop.ts";
+import { LOGO, type Event, type Result, type Sink } from "./devloop.ts";
 
 type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean };
 
-const LOGO = [
-  "   ▐▌▗▞▀▚▖▄   ▄ █  ▄▄▄   ▄▄▄  ▄▄▄▄  ",
-  "   ▐▌▐▛▀▀▘█   █ █ █   █ █   █ █   █ ",
-  "▗▞▀▜▌▝▚▄▄▖ ▀▄▀  █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ",
-  "▝▚▄▟▌           █             █     ",
-  "                              ▀",
-];
-
 export async function createTuiSink(): Promise<Sink> {
   const { TextRenderable, createCliRenderer } = await import("@opentui/core");
   const renderer = await createCliRenderer({ exitOnCtrlC: true, consoleMode: "disabled", screenMode: "alternate-screen" });
@@ -56,7 +48,7 @@ export function view(rows: Row[], selected: number, result?: Result) {
     return item.open ? [head, ...item.lines.slice(-80).map((line) => `      ${line}`)] : [head];
   });
   const tail = result ? ["", `result:  ${result.status}`, `passes:  ${result.passes} / ${result.max}`, `branch:  ${result.branch}`, `commit:  ${result.commit || "none"}`, `report:  ${result.report}`, `track:   ${result.track}`] : ["", "enter toggles logs, j/k moves"];
-  return [...LOGO, "", ...body, ...tail].join("\n");
+  return [LOGO, "", ...body, ...tail].join("\n");
 }
 
 function row(rows: Row[], id: string) {
diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts
index b1bd610..0a93651 100644
--- a/tests/devloop.test.ts
+++ b/tests/devloop.test.ts
@@ -2,7 +2,7 @@ import { afterAll, beforeEach, describe, expect, test } from "bun:test";
 import { mkdtemp, readFile, realpath, rm, stat, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import path from "node:path";
-import { parseArgs, parseCriteria, parseVerdict, runDevloop, type Event, type Options } from "../src/devloop.ts";
+import { parseArgs, parseCriteria, parseVerdict, runDevloop, welcome, type Event, type Options } from "../src/devloop.ts";
 
 const root = await mkdtemp(path.join(tmpdir(), "devloop-test."));
 let oldPath = process.env.PATH ?? "";
@@ -40,6 +40,13 @@ describe("parsing", () => {
     expect(parseCriteria("# Spec")).toEqual([]);
     expect(parseVerdict("Verdict: ACCEPT\n")).toBe("ACCEPT");
   });
+
+  test("renders a useful default screen", () => {
+    expect(welcome()).toContain("▐▌▗▞▀▚▖");
+    expect(welcome()).toContain("Common commands:");
+    expect(welcome()).toContain("devloop .specs/change.md");
+    expect(welcome()).toContain("bun scripts/install.ts");
+  });
 });
 
 describe("loop", () => {

From 593a0d31af1f88f465312e1928d114e58004dbd4 Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Tue, 26 May 2026 12:54:40 +1000
Subject: [PATCH 06/11] chore: update devloop logo

---
 src/devloop.ts        | 11 ++++++-----
 tests/devloop.test.ts |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/devloop.ts b/src/devloop.ts
index ea49be3..7245e00 100644
--- a/src/devloop.ts
+++ b/src/devloop.ts
@@ -49,11 +49,12 @@ type RunResult = { code: number; output: string };
 type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise<RunResult>;
 
 export const LOGO = [
-  "   ▐▌▗▞▀▚▖▄   ▄ █  ▄▄▄   ▄▄▄  ▄▄▄▄  ",
-  "   ▐▌▐▛▀▀▘█   █ █ █   █ █   █ █   █ ",
-  "▗▞▀▜▌▝▚▄▄▖ ▀▄▀  █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ",
-  "▝▚▄▟▌           █             █     ",
-  "                              ▀",
+  "       __          __                ",
+  "  ____/ /__ _   __/ /___  ____  ____ ",
+  " / __  / _ \\ | / / / __ \\/ __ \\/ __ \\",
+  "/ /_/ /  __/ |/ / / /_/ / /_/ / /_/ /",
+  "\\__,_/\\___/|___/_/\\____/\\____/ .___/ ",
+  "                            /_/",
 ].join("\n");
 
 export function welcome() {
diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts
index 0a93651..b0e64f1 100644
--- a/tests/devloop.test.ts
+++ b/tests/devloop.test.ts
@@ -42,7 +42,7 @@ describe("parsing", () => {
   });
 
   test("renders a useful default screen", () => {
-    expect(welcome()).toContain("▐▌▗▞▀▚▖");
+    expect(welcome()).toContain("____/ /__");
     expect(welcome()).toContain("Common commands:");
     expect(welcome()).toContain("devloop .specs/change.md");
     expect(welcome()).toContain("bun scripts/install.ts");

From 95b9e333d83d3b50dc5d09a233c90385e3b1d7fd Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Tue, 26 May 2026 13:06:45 +1000
Subject: [PATCH 07/11] chore: format devloop source

---
 src/devloop.ts | 482 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 414 insertions(+), 68 deletions(-)

diff --git a/src/devloop.ts b/src/devloop.ts
index 7245e00..d65293d 100644
--- a/src/devloop.ts
+++ b/src/devloop.ts
@@ -43,10 +43,19 @@ export type Event =
   | { type: "done"; id: string; ok: boolean; detail: string }
   | { type: "result"; result: Result };
 
-export type Sink = { event(event: Event): void | Promise<void>; close?(): void | Promise<void> };
+export type Sink = {
+  event(event: Event): void | Promise<void>;
+  close?(): void | Promise<void>;
+};
 
 type RunResult = { code: number; output: string };
-type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise<RunResult>;
+type Runner = (
+  cmd: string,
+  args: string[],
+  input?: string,
+  log?: string,
+  id?: string,
+) => Promise<RunResult>;
 
 export const LOGO = [
   "       __          __                ",
@@ -80,7 +89,10 @@ Options:
   -h, --help                    show this screen`;
 }
 
-export function parseArgs(argv: string[], cwd = process.cwd()): Options | string {
+export function parseArgs(
+  argv: string[],
+  cwd = process.cwd(),
+): Options | string {
   let reportFormat: ReportFormat = "html";
   let strict = true;
   let spec = "";
@@ -91,7 +103,8 @@ export function parseArgs(argv: string[], cwd = process.cwd()): Options | string
     const arg = argv[i]!;
     if (arg === "--report-format") {
       const value = argv[++i];
-      if (value !== "html" && value !== "markdown" && value !== "md") return usage();
+      if (value !== "html" && value !== "markdown" && value !== "md")
+        return usage();
       reportFormat = value === "md" ? "markdown" : value;
     } else if (arg === "--html") reportFormat = "html";
     else if (arg === "--markdown" || arg === "--md") reportFormat = "markdown";
@@ -108,8 +121,15 @@ export function parseArgs(argv: string[], cwd = process.cwd()): Options | string
   }
 
   if (!spec) return usage();
-  if (!/^[+-]?\d+$/.test(maxRaw)) return "max must be an integer between 1 and 10";
-  return { spec, max: clamp(Number.parseInt(maxRaw, 10), 1, 10), reportFormat, strict, cwd };
+  if (!/^[+-]?\d+$/.test(maxRaw))
+    return "max must be an integer between 1 and 10";
+  return {
+    spec,
+    max: clamp(Number.parseInt(maxRaw, 10), 1, 10),
+    reportFormat,
+    strict,
+    cwd,
+  };
 }
 
 export function usage() {
@@ -118,7 +138,9 @@ export function usage() {
 
 export function parseCriteria(markdown: string): string[] {
   const lines = markdown.split(/\r?\n/);
-  const start = lines.findIndex((line) => /^##\s+acceptance criteria\s*$/i.test(line.trim()));
+  const start = lines.findIndex((line) =>
+    /^##\s+acceptance criteria\s*$/i.test(line.trim()),
+  );
   if (start < 0) return [];
   const body = lines.slice(start + 1);
   const end = body.findIndex((line) => /^##\s+/.test(line));
@@ -129,18 +151,22 @@ export function parseCriteria(markdown: string): string[] {
 }
 
 export function parseVerdict(review: string): Verdict | "" {
-  return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as Verdict | "";
+  return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as
+    | Verdict
+    | "";
 }
 
 export function hasPassingMatrix(review: string, count: number) {
   if (!/^## Acceptance matrix\s*$/m.test(review)) return false;
-  return Array.from({ length: count }, (_, i) => new RegExp(`^-\\s*AC${i + 1}:\\s*PASS\\b`, "mi")).every((r) =>
-    r.test(review),
-  );
+  return Array.from(
+    { length: count },
+    (_, i) => new RegExp(`^-\\s*AC${i + 1}:\\s*PASS\\b`, "mi"),
+  ).every((r) => r.test(review));
 }
 
 export function findingsHash(review: string) {
-  const body = review.match(/^## Findings\s*\n([\s\S]*?)(?:\n##\s+|$)/m)?.[1] ?? "";
+  const body =
+    review.match(/^## Findings\s*\n([\s\S]*?)(?:\n##\s+|$)/m)?.[1] ?? "";
   const normalized = body
     .replace(/\d+/g, "")
     .replace(/[ \t\r\n]+/g, " ")
@@ -152,27 +178,56 @@ export function findingsHash(review: string) {
   return createHash("sha256").update(normalized).digest("hex");
 }
 
-export async function runDevloop(options: Options, sink: Sink = { event: () => {} }): Promise<Result> {
+export async function runDevloop(
+  options: Options,
+  sink: Sink = { event: () => {} },
+): Promise<Result> {
   const spec = await absoluteFile(options.spec, options.cwd);
   const specText = await readFile(spec, "utf8");
   const criteria = parseCriteria(specText);
-  if (options.strict && criteria.length === 0) throw new Error("strict mode requires ## Acceptance criteria");
-  await sink.event({ type: "gate", name: "acceptance criteria", ok: criteria.length > 0, detail: `${criteria.length} found` });
-
-  const repo = (await command("git", ["-C", options.cwd, "rev-parse", "--show-toplevel"])).trim();
-  const branch = (await command("git", ["-C", repo, "rev-parse", "--abbrev-ref", "HEAD"])).trim();
+  if (options.strict && criteria.length === 0)
+    throw new Error("strict mode requires ## Acceptance criteria");
+  await sink.event({
+    type: "gate",
+    name: "acceptance criteria",
+    ok: criteria.length > 0,
+    detail: `${criteria.length} found`,
+  });
+
+  const repo = (
+    await command("git", ["-C", options.cwd, "rev-parse", "--show-toplevel"])
+  ).trim();
+  const branch = (
+    await command("git", ["-C", repo, "rev-parse", "--abbrev-ref", "HEAD"])
+  ).trim();
   const base = await baseBranch(repo);
   const initialDirty = await statusPaths(repo);
   const slug = path.basename(spec, ".md");
-  const dirs = [".codex/tracks", ".codex/reviews", ".codex/reports", ".codex/logs", ".codex/sessions"];
-  await Promise.all(dirs.map((dir) => mkdir(path.join(repo, dir), { recursive: true })));
+  const dirs = [
+    ".codex/tracks",
+    ".codex/reviews",
+    ".codex/reports",
+    ".codex/logs",
+    ".codex/sessions",
+  ];
+  await Promise.all(
+    dirs.map((dir) => mkdir(path.join(repo, dir), { recursive: true })),
+  );
 
   const track = `.codex/tracks/${slug}.md`;
   const report = `.codex/reports/${slug}.${options.reportFormat === "html" ? "html" : "md"}`;
   const codexSession = `.codex/sessions/${slug}-codex.id`;
   const claudeSession = `.codex/sessions/${slug}-claude.id`;
   const runner = makeRunner(repo, sink);
-  await initTrack(path.join(repo, track), { spec, cwd: options.cwd, base, branch, max: options.max, reportFormat: options.reportFormat, strict: options.strict });
+  await initTrack(path.join(repo, track), {
+    spec,
+    cwd: options.cwd,
+    base,
+    branch,
+    max: options.max,
+    reportFormat: options.reportFormat,
+    strict: options.strict,
+  });
 
   let status: Status = "max-turns";
   let prior = "";
@@ -184,9 +239,31 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => {
   for (pass = 1; pass <= options.max; pass++) {
     const codexLog = `.codex/logs/${slug}-r${pass}-codex.log`;
     const codexId = `codex-${pass}`;
-    await sink.event({ type: "step", id: codexId, title: `pass ${pass}/${options.max} codex` });
-    const codex = await runCodex(runner, repo, path.join(repo, codexSession), path.join(repo, codexLog), codexPrompt({ spec, track, pass, strict: options.strict, previous: `.codex/reviews/${slug}-r${pass - 1}.md`, criteria }));
-    await sink.event({ type: "done", id: codexId, ok: codex, detail: codex ? "completed" : "failed" });
+    await sink.event({
+      type: "step",
+      id: codexId,
+      title: `pass ${pass}/${options.max} codex`,
+    });
+    const codex = await runCodex(
+      runner,
+      repo,
+      path.join(repo, codexSession),
+      path.join(repo, codexLog),
+      codexPrompt({
+        spec,
+        track,
+        pass,
+        strict: options.strict,
+        previous: `.codex/reviews/${slug}-r${pass - 1}.md`,
+        criteria,
+      }),
+    );
+    await sink.event({
+      type: "done",
+      id: codexId,
+      ok: codex,
+      detail: codex ? "completed" : "failed",
+    });
     if (!codex) {
       status = "codex-error";
       break;
@@ -195,9 +272,33 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => {
     const review = `.codex/reviews/${slug}-r${pass}.md`;
     const claudeLog = `.codex/logs/${slug}-r${pass}-claude.log`;
     const claudeId = `claude-${pass}`;
-    await sink.event({ type: "step", id: claudeId, title: `pass ${pass}/${options.max} claude review` });
-    const ok = await runClaude(runner, repo, path.join(repo, claudeSession), path.join(repo, claudeLog), reviewPrompt({ spec, track, base, pass, output: review, priors: listReviews(slug, pass, options.max), criteria, strict: options.strict }));
-    await sink.event({ type: "done", id: claudeId, ok, detail: ok ? "completed" : "failed" });
+    await sink.event({
+      type: "step",
+      id: claudeId,
+      title: `pass ${pass}/${options.max} claude review`,
+    });
+    const ok = await runClaude(
+      runner,
+      repo,
+      path.join(repo, claudeSession),
+      path.join(repo, claudeLog),
+      reviewPrompt({
+        spec,
+        track,
+        base,
+        pass,
+        output: review,
+        priors: listReviews(slug, pass, options.max),
+        criteria,
+        strict: options.strict,
+      }),
+    );
+    await sink.event({
+      type: "done",
+      id: claudeId,
+      ok,
+      detail: ok ? "completed" : "failed",
+    });
     if (!ok) {
       status = "claude-error";
       break;
@@ -211,9 +312,17 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => {
       break;
     }
     const verdict = parseVerdict(reviewText);
-    await sink.event({ type: "gate", name: `pass ${pass} verdict`, ok: verdict === "ACCEPT", detail: verdict || "MISSING" });
+    await sink.event({
+      type: "gate",
+      name: `pass ${pass} verdict`,
+      ok: verdict === "ACCEPT",
+      detail: verdict || "MISSING",
+    });
     if (verdict === "ACCEPT") {
-      status = options.strict && !hasPassingMatrix(reviewText, criteria.length) ? "unclear" : "accepted";
+      status =
+        options.strict && !hasPassingMatrix(reviewText, criteria.length)
+          ? "unclear"
+          : "accepted";
       break;
     }
     if (verdict === "UNCLEAR") {
@@ -236,23 +345,69 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => {
   if (pass > options.max) pass = options.max;
   if (status === "accepted") {
     const commitId = "commit";
-    await sink.event({ type: "step", id: commitId, title: "local branch and commit" });
-    const committed = await commitAccepted(repo, slug, initialDirty).catch(() => undefined);
+    await sink.event({
+      type: "step",
+      id: commitId,
+      title: "local branch and commit",
+    });
+    const committed = await commitAccepted(repo, slug, initialDirty).catch(
+      () => undefined,
+    );
     if (committed) {
       finalBranch = committed.branch;
       commit = committed.commit;
       commitMessage = committed.message;
-      await sink.event({ type: "done", id: commitId, ok: true, detail: commit ? `${finalBranch} ${commit}` : `${finalBranch} no changes` });
+      await sink.event({
+        type: "done",
+        id: commitId,
+        ok: true,
+        detail: commit
+          ? `${finalBranch} ${commit}`
+          : `${finalBranch} no changes`,
+      });
     } else {
       status = "commit-error";
-      await sink.event({ type: "done", id: commitId, ok: false, detail: "failed" });
+      await sink.event({
+        type: "done",
+        id: commitId,
+        ok: false,
+        detail: "failed",
+      });
     }
   }
 
   const codexSessionId = await readLine(path.join(repo, codexSession));
   const claudeSessionId = await readLine(path.join(repo, claudeSession));
-  await synthesizeReport(runner, repo, { slug, spec, track, report, status, pass, max: options.max, base, initialBranch: branch, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId, format: options.reportFormat, reviews: listReviews(slug, pass, options.max) });
-  const result = { status, passes: pass, max: options.max, report, track, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId };
+  await synthesizeReport(runner, repo, {
+    slug,
+    spec,
+    track,
+    report,
+    status,
+    pass,
+    max: options.max,
+    base,
+    initialBranch: branch,
+    branch: finalBranch,
+    commit,
+    commitMessage,
+    codexSessionId,
+    claudeSessionId,
+    format: options.reportFormat,
+    reviews: listReviews(slug, pass, options.max),
+  });
+  const result = {
+    status,
+    passes: pass,
+    max: options.max,
+    report,
+    track,
+    branch: finalBranch,
+    commit,
+    commitMessage,
+    codexSessionId,
+    claudeSessionId,
+  };
   await sink.event({ type: "result", result });
   return result;
 }
@@ -265,7 +420,11 @@ async function absoluteFile(file: string, cwd: string) {
 
 async function command(cmd: string, args: string[]) {
   const proc = Bun.spawn([cmd, ...args], { stdout: "pipe", stderr: "pipe" });
-  const [out, err, code] = await Promise.all([new Response(proc.stdout).text(), new Response(proc.stderr).text(), proc.exited]);
+  const [out, err, code] = await Promise.all([
+    new Response(proc.stdout).text(),
+    new Response(proc.stderr).text(),
+    proc.exited,
+  ]);
   if (code !== 0) throw new Error(err.trim() || `${cmd} failed`);
   return out;
 }
@@ -276,9 +435,13 @@ async function baseBranch(repo: string) {
     ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/main"],
     ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/master"],
   ]) {
-    const proc = Bun.spawn(["git", ...args], { stdout: "pipe", stderr: "pipe" });
+    const proc = Bun.spawn(["git", ...args], {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
     if ((await proc.exited) === 0) {
-      if (args[2] === "symbolic-ref") return (await new Response(proc.stdout).text()).trim().replace(/^origin\//, "");
+      if (args[2] === "symbolic-ref")
+        return (await new Response(proc.stdout).text()).trim().replace(/^origin\//, "");
       return args.at(-1)!.split("/").pop()!;
     }
   }
@@ -286,7 +449,14 @@ async function baseBranch(repo: string) {
 }
 
 async function statusPaths(repo: string) {
-  const out = await command("git", ["-C", repo, "status", "--porcelain=v1", "-z", "--untracked-files=all"]);
+  const out = await command("git", [
+    "-C",
+    repo,
+    "status",
+    "--porcelain=v1",
+    "-z",
+    "--untracked-files=all",
+  ]);
   const parts = out.split("\0").filter(Boolean);
   const paths = new Set<string>();
   for (let i = 0; i < parts.length; i++) {
@@ -302,21 +472,49 @@ async function statusPaths(repo: string) {
   return paths;
 }
 
-async function commitAccepted(repo: string, slug: string, initialDirty: Set<string>) {
-  const current = (await command("git", ["-C", repo, "branch", "--show-current"])).trim();
+async function commitAccepted(
+  repo: string,
+  slug: string,
+  initialDirty: Set<string>,
+) {
+  const current = (
+    await command("git", ["-C", repo, "branch", "--show-current"])
+  ).trim();
   const branch = await nextBranch(repo, slug, current);
   const message = `feat: ${slugify(slug)}`;
-  if (branch !== current) await command("git", ["-C", repo, "switch", "-c", branch]);
-  const changed = [...(await statusPaths(repo))].filter((file) => !initialDirty.has(file) && !file.startsWith(".codex/"));
+  if (branch !== current)
+    await command("git", ["-C", repo, "switch", "-c", branch]);
+  const changed = [...(await statusPaths(repo))].filter(
+    (file) => !initialDirty.has(file) && !file.startsWith(".codex/"),
+  );
   if (changed.length === 0) return { branch, commit: "", message };
   await command("git", ["-C", repo, "add", "--", ...changed]);
-  await command("git", ["-C", repo, "commit", "--only", "-m", message, "--", ...changed]);
-  return { branch, commit: (await command("git", ["-C", repo, "rev-parse", "--short", "HEAD"])).trim(), message };
+  await command("git", [
+    "-C",
+    repo,
+    "commit",
+    "--only",
+    "-m",
+    message,
+    "--",
+    ...changed,
+  ]);
+  return {
+    branch,
+    commit: (
+      await command("git", ["-C", repo, "rev-parse", "--short", "HEAD"])
+    ).trim(),
+    message,
+  };
 }
 
 async function nextBranch(repo: string, slug: string, current: string) {
   const base = `devloop/${slugify(slug)}`;
-  if (current === base || new RegExp(`^${escapeRegex(base)}-\\d+$`).test(current)) return current;
+  if (
+    current === base ||
+    new RegExp(`^${escapeRegex(base)}-\\d+$`).test(current)
+  )
+    return current;
   let suffix = 1;
   let branch = base;
   while (await branchExists(repo, branch)) {
@@ -327,7 +525,15 @@ async function nextBranch(repo: string, slug: string, current: string) {
 }
 
 async function branchExists(repo: string, branch: string) {
-  const proc = Bun.spawn(["git", "-C", repo, "show-ref", "--verify", "--quiet", `refs/heads/${branch}`]);
+  const proc = Bun.spawn([
+    "git",
+    "-C",
+    repo,
+    "show-ref",
+    "--verify",
+    "--quiet",
+    `refs/heads/${branch}`,
+  ]);
   return (await proc.exited) === 0;
 }
 
@@ -335,7 +541,13 @@ function makeRunner(cwd: string, sink: Sink): Runner {
   return async (cmd, args, input = "", log, id) => {
     let proc: Bun.Subprocess<"pipe", "pipe", "pipe">;
     try {
-      proc = Bun.spawn([cmd, ...args], { cwd, stdin: "pipe", stdout: "pipe", stderr: "pipe", env: Bun.env });
+      proc = Bun.spawn([cmd, ...args], {
+        cwd,
+        stdin: "pipe",
+        stdout: "pipe",
+        stderr: "pipe",
+        env: Bun.env,
+      });
     } catch (error) {
       const output = error instanceof Error ? error.message : String(error);
       if (log) await writeFile(log, output);
@@ -356,17 +568,34 @@ function makeRunner(cwd: string, sink: Sink): Runner {
         pending += text;
         const lines = pending.split(/\r?\n/);
         pending = lines.pop() ?? "";
-        if (id) for (const line of lines.filter(Boolean)) await sink.event({ type: "log", id, line });
+        if (id)
+          for (const line of lines.filter(Boolean))
+            await sink.event({ type: "log", id, line });
       }
       if (id && pending) await sink.event({ type: "log", id, line: pending });
     };
-    const [, , code] = await Promise.all([pump(proc.stdout), pump(proc.stderr), proc.exited]);
+    const [, , code] = await Promise.all([
+      pump(proc.stdout),
+      pump(proc.stderr),
+      proc.exited,
+    ]);
     if (log) await writeFile(log, output);
     return { code, output };
   };
 }
 
-async function initTrack(file: string, data: { spec: string; cwd: string; base: string; branch: string; max: number; reportFormat: ReportFormat; strict: boolean }) {
+async function initTrack(
+  file: string,
+  data: {
+    spec: string;
+    cwd: string;
+    base: string;
+    branch: string;
+    max: number;
+    reportFormat: ReportFormat;
+    strict: boolean;
+  },
+) {
   if (await stat(file).catch(() => false)) return;
   await writeFile(
     file,
@@ -375,19 +604,39 @@ async function initTrack(file: string, data: { spec: string; cwd: string; base:
 }
 
 async function readLine(file: string) {
-  return (await readFile(file, "utf8").catch(() => "")).split(/\r?\n/, 1)[0] ?? "";
+  return (
+    (await readFile(file, "utf8").catch(() => "")).split(/\r?\n/, 1)[0] ?? ""
+  );
 }
 
 async function writeLine(file: string, value: string) {
   await writeFile(file, `${value}\n`);
 }
 
-async function runCodex(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) {
+async function runCodex(
+  runner: Runner,
+  repo: string,
+  sessionFile: string,
+  log: string,
+  prompt: string,
+) {
   const session = await readLine(sessionFile);
   const args = session
-    ? ["exec", "resume", "--dangerously-bypass-approvals-and-sandbox", session, "-"]
+    ? [
+        "exec",
+        "resume",
+        "--dangerously-bypass-approvals-and-sandbox",
+        session,
+        "-",
+      ]
     : ["exec", "--dangerously-bypass-approvals-and-sandbox", "-C", repo, "-"];
-  const result = await runner("codex", args, prompt, log, log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex");
+  const result = await runner(
+    "codex",
+    args,
+    prompt,
+    log,
+    log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex",
+  );
   if (result.code !== 0) return false;
   if (!session) {
     const next = extractSessionId(result.output);
@@ -397,13 +646,39 @@ async function runCodex(runner: Runner, repo: string, sessionFile: string, log:
   return true;
 }
 
-async function runClaude(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) {
+async function runClaude(
+  runner: Runner,
+  repo: string,
+  sessionFile: string,
+  log: string,
+  prompt: string,
+) {
   const session = await readLine(sessionFile);
   const next = session || randomUUID();
   const args = session
-    ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo]
-    : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo];
-  const result = await runner("claude", args, prompt, log, log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report");
+    ? [
+        "-p",
+        "--resume",
+        session,
+        "--dangerously-skip-permissions",
+        "--add-dir",
+        repo,
+      ]
+    : [
+        "-p",
+        "--session-id",
+        next,
+        "--dangerously-skip-permissions",
+        "--add-dir",
+        repo,
+      ];
+  const result = await runner(
+    "claude",
+    args,
+    prompt,
+    log,
+    log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report",
+  );
   if (result.code !== 0) return false;
   if (!session) await writeLine(sessionFile, next);
   return true;
@@ -412,21 +687,38 @@ async function runClaude(runner: Runner, repo: string, sessionFile: string, log:
 function extractSessionId(output: string) {
   return output
     .split(/\r?\n/)
-    .filter((line) => /(session.?id|thread_id|codex exec resume|codex resume|To continue this session)/i.test(line))
+    .filter((line) =>
+      /(session.?id|thread_id|codex exec resume|codex resume|To continue this session)/i.test(
+        line,
+      ),
+    )
     .join("\n")
     .match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i)?.[0]
     .toLowerCase();
 }
 
 function listReviews(slug: string, upto: number, max: number) {
-  return Array.from({ length: Math.min(upto, max) }, (_, i) => `- .codex/reviews/${slug}-r${i + 1}.md`).join("\n");
+  return Array.from(
+    { length: Math.min(upto, max) },
+    (_, i) => `- .codex/reviews/${slug}-r${i + 1}.md`,
+  ).join("\n");
 }
 
 function criteriaBlock(criteria: string[]) {
-  return criteria.map((criterion, i) => `AC${i + 1}: ${criterion}`).join("\n") || "No parsed acceptance criteria.";
+  return (
+    criteria.map((criterion, i) => `AC${i + 1}: ${criterion}`).join("\n") ||
+    "No parsed acceptance criteria."
+  );
 }
 
-function codexPrompt(input: { spec: string; track: string; pass: number; strict: boolean; previous: string; criteria: string[] }) {
+function codexPrompt(input: {
+  spec: string;
+  track: string;
+  pass: number;
+  strict: boolean;
+  previous: string;
+  criteria: string[];
+}) {
   const strict = input.strict
     ? "\nStrict lifecycle:\n1. Add or update regression tests before implementation.\n2. Run the narrow test first and record the failing result, unless impossible; if impossible, say why.\n3. Implement the smallest change.\n4. Run targeted tests, full tests, lint/typecheck, and coverage. Coverage must be 100% when the project exposes coverage tooling.\n"
     : "";
@@ -435,11 +727,41 @@ function codexPrompt(input: { spec: string; track: string; pass: number; strict:
     : `Fix only the findings in the review. Do not refactor unrelated code.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nReview: ${input.previous}\nPass: ${input.pass}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}${strict}\nTasks:\n1. Read the review file.\n2. Fix each finding or explain why it is wrong in the track.\n3. Re-run relevant tests.\n4. Append "## Pass ${input.pass} - fix" to ${input.track} with per-finding outcomes.\n`;
 }
 
-function reviewPrompt(input: { spec: string; track: string; base: string; pass: number; output: string; priors: string; criteria: string[]; strict: boolean }) {
+function reviewPrompt(input: {
+  spec: string;
+  track: string;
+  base: string;
+  pass: number;
+  output: string;
+  priors: string;
+  criteria: string[];
+  strict: boolean;
+}) {
   return `You are reviewing a Codex implementation. Be a senior reviewer, not a linter.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nBase: ${input.base}\nPass: ${input.pass}\nPrior reviews:\n${input.priors}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}\nOutput path: ${input.output}\n\nSteps:\n1. Read the spec and track.\n2. Run: git diff ${input.base}...HEAD\n3. Read prior reviews so you do not repeat resolved findings.\n4. Write the review to ${input.output} using this exact format:\n\n# Claude review ${input.pass}\n\nVerdict: <ACCEPT | REJECT | UNCLEAR>\n\n## Acceptance matrix\n\n- AC1: <PASS | FAIL | UNCLEAR> - <evidence>\n\n## Findings\n\n1. [severity] <file:line> - <symptom>. Root cause: <why>. Principle: <principle>.\n\n## Missing tests\n\n- <gap, or None>\n\n## Fix instructions\n\n1. <standalone instruction>\n\n## Notes\n\n- <scope, disputes, lessons, or None>\n\nRules:\n- The verdict line must appear verbatim.\n- ACCEPT requires every acceptance criterion PASS with concrete evidence.${input.strict ? "\n- ACCEPT also requires regression-test evidence, red/green evidence when behavior changed, passing full tests, and 100% coverage when coverage tooling exists." : ""}\n- For ACCEPT: Findings and Fix instructions bodies are "None".\n- Findings must explain WHY, not just WHAT.\n`;
 }
 
-async function synthesizeReport(runner: Runner, repo: string, input: { slug: string; spec: string; track: string; report: string; status: Status; pass: number; max: number; base: string; initialBranch: string; branch: string; commit: string; commitMessage: string; codexSessionId: string; claudeSessionId: string; format: ReportFormat; reviews: string }) {
+async function synthesizeReport(
+  runner: Runner,
+  repo: string,
+  input: {
+    slug: string;
+    spec: string;
+    track: string;
+    report: string;
+    status: Status;
+    pass: number;
+    max: number;
+    base: string;
+    initialBranch: string;
+    branch: string;
+    commit: string;
+    commitMessage: string;
+    codexSessionId: string;
+    claudeSessionId: string;
+    format: ReportFormat;
+    reviews: string;
+  },
+) {
   const metadata = `Result: ${input.status}
 Passes: ${input.pass} / ${input.max}
 Repository: ${repo}
@@ -458,12 +780,31 @@ ${input.reviews}`;
     input.format === "html"
       ? `Write the report to ${input.report} as valid standalone HTML. Use a readable document layout with embedded CSS, a compact metadata table at the top, and substantive sections after it. Include these visible section headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.`
       : `Write the report to ${input.report} in markdown with these headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.`;
-  const sessionFile = path.join(repo, `.codex/sessions/${input.slug}-claude.id`);
+  const sessionFile = path.join(
+    repo,
+    `.codex/sessions/${input.slug}-claude.id`,
+  );
   const session = await readLine(sessionFile);
   const next = session || randomUUID();
   await runner(
     "claude",
-    session ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo] : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo],
+    session
+      ? [
+          "-p",
+          "--resume",
+          session,
+          "--dangerously-skip-permissions",
+          "--add-dir",
+          repo,
+        ]
+      : [
+          "-p",
+          "--session-id",
+          next,
+          "--dangerously-skip-permissions",
+          "--add-dir",
+          repo,
+        ],
     `You are writing a learning-oriented post-mortem for a developer who just ran a Codex/Claude devloop.\n\nMetadata to render at the top exactly and visibly:\n${metadata}\n\nInputs:\n- spec: ${input.spec}\n- track: ${input.track}\nReview files:\n${input.reviews}\n- final status: ${input.status}\n- passes used: ${input.pass} / ${input.max}\n- base: ${input.base}, starting branch: ${input.initialBranch}, final branch: ${input.branch}, local commit: ${input.commit || "none"}\n\n${body}\n\nStyle:\n- Human readable, not ornamental.\n- Preserve useful substance over brevity.\n- Teach the why: symptom, root cause, principle, decision, tradeoff, and evidence.\n- No emoji.\n`,
     path.join(repo, `.codex/logs/${input.slug}-report.log`),
     "report",
@@ -476,7 +817,12 @@ function clamp(value: number, min: number, max: number) {
 }
 
 function slugify(value: string) {
-  return value.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "change";
+  return (
+    value
+      .toLowerCase()
+      .replace(/[^a-z0-9._-]+/g, "-")
+      .replace(/^-+|-+$/g, "") || "change"
+  );
 }
 
 function escapeRegex(value: string) {

From 9b61c9b88059eabb4858f0c3736551d0b532f74a Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Tue, 26 May 2026 13:15:25 +1000
Subject: [PATCH 08/11] chore: add spec template

---
 README.md         |  2 ++
 templates/spec.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 templates/spec.md

diff --git a/README.md b/README.md
index 9bc5974..b0168e5 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@ devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] spec.md [m
 
 Run from the target git worktree. The spec may live anywhere.
 
+Start new specs from [`templates/spec.md`](templates/spec.md), usually copied to `.specs/YYYY-MM-DD-slug.md`.
+
 ## Install
 
 From this checkout:
diff --git a/templates/spec.md b/templates/spec.md
new file mode 100644
index 0000000..6a61905
--- /dev/null
+++ b/templates/spec.md
@@ -0,0 +1,51 @@
+---
+status: draft
+type: feat|fix|chore
+created: YYYY-MM-DD
+pr: null
+---
+
+# <Concise sentence-case title>
+
+## Intent
+<State the real problem or user pain, not just the assumed solution. Include the concrete moment this hurts if known.>
+
+## Desired outcome
+<Describe the end state that would make the user say this is exactly what they meant.>
+
+## Scope
+- Touch: <paths, modules, commands, UI surfaces, or "agent to identify">
+- Do not touch: <explicit exclusions>
+
+## Behavior
+Happy path:
+1. <End-to-end behavior from the user's point of view.>
+
+Edge cases and failures:
+- <Condition>: <expected behavior>
+- <Condition>: <expected behavior>
+
+## Constraints
+- Must: <hard requirement>
+- Prefer: <soft preference or existing project convention>
+- Avoid: <forbidden approach, dependency, churn, or scope creep>
+
+## Acceptance criteria
+1. <Independently verifiable criterion with observable evidence.>
+2. <Independently verifiable criterion with observable evidence.>
+
+## Test plan
+- Regression first: <test to add or update before implementation, or why not applicable>
+- Targeted: <command(s)>
+- Full: <command(s)>
+- Coverage: <100% coverage command, or explicit reason coverage tooling is not applicable>
+
+## Implementation notes
+- <Known files, design direction, compatibility constraints, or migration notes.>
+- <If a decision is unclear, ask before coding.>
+
+## Out of scope
+- <Adjacent work explicitly excluded from this change.>
+
+## Review focus
+- <Risks Claude should scrutinize: acceptance evidence, tests, edge cases, compatibility, performance, security, or maintainability.>

From 8d31ac0454c319fa96b51d6cc3a989ed133714cb Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Tue, 26 May 2026 13:37:49 +1000
Subject: [PATCH 09/11] chore: tighten spec template

---
 README.md         | 10 +++++++++
 templates/spec.md | 54 +++++++++++++++++++++--------------------------
 2 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index b0168e5..fb39405 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@ Run from the target git worktree. The spec may live anywhere.
 
 Start new specs from [`templates/spec.md`](templates/spec.md), usually copied to `.specs/YYYY-MM-DD-slug.md`.
 
+The template is intentionally short: clear problem, observable outcome, tight scope, behavior examples, verifiable acceptance criteria, regression-first test plan, constraints, and only material notes.
+
 ## Install
 
 From this checkout:
@@ -101,3 +103,11 @@ bun test
 ```
 
 `bun test` enforces 100% line/function/statement coverage for the TypeScript core.
+
+## References
+
+- [ISO/IEC/IEEE 29148:2018](https://byui-cse.github.io/cse372-course/Reading/CSE372Week10-IEEE.pdf): requirements should be necessary, unambiguous, singular, feasible, verifiable, and focused on what is needed.
+- [Erdogmus, Morisio, and Torchiano, 2005](https://cs.unm.edu/~joel/cs351/paper/IEEE-Effectiveness_of_Test-First_Approach_to_Programming.pdf): test-first work formalizes functionality as tests, gives fast feedback, and supports small measurable tasks.
+- [Fucci et al., 2016](https://bura.brunel.ac.uk/bitstream/2438/14550/1/FullText.pdf): TDD benefits depend heavily on fine-grained, steady cycles, not ceremony.
+- [Rafique and Misic, 2013](https://openurl.ebsco.com/contentitem/doi%3A10.1109/tse.2012.28?id=ebsco%3Adoi%3A10.1109%2Ftse.2012.28&sid=ebsco%3Aplink%3Acrawler) and [Bissi, Neto, and Emer, 2016](https://www.sciencedirect.com/science/article/abs/pii/S0950584916300222): TDD evidence is strongest for quality, less conclusive for productivity.
+- [Agile Alliance user stories](https://agilealliance.org/glossary/user-stories/), [Given-When-Then](https://agilealliance.org/glossary/given-when-then/), and [Cucumber Gherkin reference](https://cucumber.io/docs/gherkin/reference/): acceptance criteria are strongest when they become concrete, observable examples.
diff --git a/templates/spec.md b/templates/spec.md
index 6a61905..34d6251 100644
--- a/templates/spec.md
+++ b/templates/spec.md
@@ -5,47 +5,41 @@ created: YYYY-MM-DD
 pr: null
 ---
 
-# <Concise sentence-case title>
+# <Concise title>
 
-## Intent
-<State the real problem or user pain, not just the assumed solution. Include the concrete moment this hurts if known.>
+## Problem
+<The real user pain or failure. Include the concrete moment this hurts.>
 
-## Desired outcome
-<Describe the end state that would make the user say this is exactly what they meant.>
+## Outcome
+<The observable end state that means this worked.>
 
 ## Scope
-- Touch: <paths, modules, commands, UI surfaces, or "agent to identify">
-- Do not touch: <explicit exclusions>
+- In: <paths, commands, APIs, UI surfaces, or behavior>
+- Out: <explicit exclusions>
 
 ## Behavior
 Happy path:
-1. <End-to-end behavior from the user's point of view.>
+1. <User/system action>
+2. <Expected observable result>
 
-Edge cases and failures:
-- <Condition>: <expected behavior>
-- <Condition>: <expected behavior>
-
-## Constraints
-- Must: <hard requirement>
-- Prefer: <soft preference or existing project convention>
-- Avoid: <forbidden approach, dependency, churn, or scope creep>
+Edge cases:
+- <Condition>: <expected result>
+- <Condition>: <expected result>
 
 ## Acceptance criteria
-1. <Independently verifiable criterion with observable evidence.>
-2. <Independently verifiable criterion with observable evidence.>
+1. <Singular, verifiable requirement with observable evidence.>
+2. <Singular, verifiable requirement with observable evidence.>
 
 ## Test plan
-- Regression first: <test to add or update before implementation, or why not applicable>
-- Targeted: <command(s)>
-- Full: <command(s)>
-- Coverage: <100% coverage command, or explicit reason coverage tooling is not applicable>
+- Red: <regression test to add/update first, or why not applicable>
+- Green: <targeted command(s)>
+- Full: <full test/typecheck/lint command(s)>
+- Coverage: <100% coverage command, or why unavailable>
 
-## Implementation notes
-- <Known files, design direction, compatibility constraints, or migration notes.>
-- <If a decision is unclear, ask before coding.>
-
-## Out of scope
-- <Adjacent work explicitly excluded from this change.>
+## Constraints
+- Must: <hard requirement>
+- Avoid: <forbidden approach, dependency, or churn>
+- Existing convention: <repo pattern to preserve>
 
-## Review focus
-- <Risks Claude should scrutinize: acceptance evidence, tests, edge cases, compatibility, performance, security, or maintainability.>
+## Notes
+<Only material implementation hints, risks, dependencies, migrations, or open questions.>

From 493031ab90ba593ad2bc904ec3d8c0ba1fd92b96 Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Tue, 26 May 2026 14:02:10 +1000
Subject: [PATCH 10/11] fix: surface devloop commit failures

---
 src/devloop.ts        | 29 +++++++++++++++++++++--------
 tests/devloop.test.ts |  6 ++++--
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/devloop.ts b/src/devloop.ts
index d65293d..179093c 100644
--- a/src/devloop.ts
+++ b/src/devloop.ts
@@ -151,9 +151,8 @@ export function parseCriteria(markdown: string): string[] {
 }
 
 export function parseVerdict(review: string): Verdict | "" {
-  return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as
-    | Verdict
-    | "";
+  const match = review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m);
+  return match ? (match[1] as Verdict) : "";
 }
 
 export function hasPassingMatrix(review: string, count: number) {
@@ -350,8 +349,12 @@ export async function runDevloop(
       id: commitId,
       title: "local branch and commit",
     });
+    let commitError = "";
     const committed = await commitAccepted(repo, slug, initialDirty).catch(
-      () => undefined,
+      (error) => {
+        commitError = error instanceof Error ? error.message : String(error);
+        return undefined;
+      },
     );
     if (committed) {
       finalBranch = committed.branch;
@@ -371,7 +374,7 @@ export async function runDevloop(
         type: "done",
         id: commitId,
         ok: false,
-        detail: "failed",
+        detail: commitError || "failed",
       });
     }
   }
@@ -425,7 +428,12 @@ async function command(cmd: string, args: string[]) {
     new Response(proc.stderr).text(),
     proc.exited,
   ]);
-  if (code !== 0) throw new Error(err.trim() || `${cmd} failed`);
+  if (code !== 0)
+    throw new Error(
+      err.trim() ||
+        out.trim() ||
+        `${cmd} ${args.join(" ")} failed with exit ${code}`,
+    );
   return out;
 }
 
@@ -635,7 +643,7 @@ async function runCodex(
     args,
     prompt,
     log,
-    log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex",
+    logId(log, "codex"),
   );
   if (result.code !== 0) return false;
   if (!session) {
@@ -677,13 +685,18 @@ async function runClaude(
     args,
     prompt,
     log,
-    log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report",
+    logId(log, "claude"),
   );
   if (result.code !== 0) return false;
   if (!session) await writeLine(sessionFile, next);
   return true;
 }
 
+function logId(log: string, kind: "codex" | "claude") {
+  const pass = log.match(new RegExp(`r(\\d+)-${kind}`))?.[1];
+  return pass ? `${kind}-${pass}` : kind === "codex" ? "codex" : "report";
+}
+
 function extractSessionId(output: string) {
   return output
     .split(/\r?\n/)
diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts
index b0e64f1..4859b48 100644
--- a/tests/devloop.test.ts
+++ b/tests/devloop.test.ts
@@ -39,6 +39,7 @@ describe("parsing", () => {
     expect(parseCriteria("# Spec\n\n## Acceptance criteria\n1. One\n- Two\n\n## Notes\nNope")).toEqual(["One", "Two"]);
     expect(parseCriteria("# Spec")).toEqual([]);
     expect(parseVerdict("Verdict: ACCEPT\n")).toBe("ACCEPT");
+    expect(parseVerdict("No verdict here\n")).toBe("");
   });
 
   test("renders a useful default screen", () => {
@@ -132,11 +133,12 @@ describe("loop", () => {
 
   test("reports commit errors", async () => {
     const { repo } = await fixture("commit-error");
-    await writeFile(path.join(repo, ".git/hooks/pre-commit"), "#!/usr/bin/env bash\nexit 1\n", { mode: 0o755 });
+    await writeFile(path.join(repo, ".git/hooks/pre-commit"), "#!/usr/bin/env bash\necho 'pre-commit blocked commit' >&2\nexit 1\n", { mode: 0o755 });
     process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT";
-    const { result } = await run(repo);
+    const { result, events } = await run(repo);
 
     expect(result.status).toBe("commit-error");
+    expect(events).toContainEqual({ type: "done", id: "commit", ok: false, detail: "pre-commit blocked commit" });
   });
 
   test("uses a suffixed branch when the default branch exists", async () => {

From 1478637874a20c00c41bd21126f7606be9128450 Mon Sep 17 00:00:00 2001
From: satyaborg <satya.borg@gmail.com>
Date: Tue, 26 May 2026 14:02:31 +1000
Subject: [PATCH 11/11] chore: cover tui view rendering

---
 src/tui-view.ts        | 18 +++++++++++++++
 src/tui.ts             | 20 ++---------------
 tests/tui-view.test.ts | 51 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 18 deletions(-)
 create mode 100644 src/tui-view.ts
 create mode 100644 tests/tui-view.test.ts

diff --git a/src/tui-view.ts b/src/tui-view.ts
new file mode 100644
index 0000000..93214ea
--- /dev/null
+++ b/src/tui-view.ts
@@ -0,0 +1,18 @@
+import { LOGO, type Result } from "./devloop.ts";
+
+export type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean };
+
+export function view(rows: Row[], selected: number, result?: Result) {
+  const body = rows.flatMap((item, i) => {
+    const mark = i === selected ? ">" : " ";
+    const fold = item.lines.length ? (item.open ? "[-]" : "[+]") : "   ";
+    const head = `${mark} ${icon(item.status)} ${fold} ${item.title} - ${item.detail}`;
+    return item.open ? [head, ...item.lines.slice(-80).map((line) => `      ${line}`)] : [head];
+  });
+  const tail = result ? ["", `result:  ${result.status}`, `passes:  ${result.passes} / ${result.max}`, `branch:  ${result.branch}`, `commit:  ${result.commit || "none"}`, `report:  ${result.report}`, `track:   ${result.track}`] : ["", "enter toggles logs, j/k moves"];
+  return [LOGO, "", ...body, ...tail].join("\n");
+}
+
+function icon(status: Row["status"]) {
+  return status === "ok" ? "ok" : status === "fail" ? "!!" : "..";
+}
diff --git a/src/tui.ts b/src/tui.ts
index abef745..a8ca027 100644
--- a/src/tui.ts
+++ b/src/tui.ts
@@ -1,6 +1,5 @@
-import { LOGO, type Event, type Result, type Sink } from "./devloop.ts";
-
-type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean };
+import { type Event, type Result, type Sink } from "./devloop.ts";
+import { view, type Row } from "./tui-view.ts";
 
 export async function createTuiSink(): Promise<Sink> {
   const { TextRenderable, createCliRenderer } = await import("@opentui/core");
@@ -40,21 +39,6 @@ export async function createTuiSink(): Promise<Sink> {
   };
 }
 
-export function view(rows: Row[], selected: number, result?: Result) {
-  const body = rows.flatMap((item, i) => {
-    const mark = i === selected ? ">" : " ";
-    const fold = item.lines.length ? (item.open ? "[-]" : "[+]") : "   ";
-    const head = `${mark} ${icon(item.status)} ${fold} ${item.title} - ${item.detail}`;
-    return item.open ? [head, ...item.lines.slice(-80).map((line) => `      ${line}`)] : [head];
-  });
-  const tail = result ? ["", `result:  ${result.status}`, `passes:  ${result.passes} / ${result.max}`, `branch:  ${result.branch}`, `commit:  ${result.commit || "none"}`, `report:  ${result.report}`, `track:   ${result.track}`] : ["", "enter toggles logs, j/k moves"];
-  return [LOGO, "", ...body, ...tail].join("\n");
-}
-
 function row(rows: Row[], id: string) {
   return rows.find((item) => item.id === id) ?? rows[rows.push({ id, title: id, status: "run", detail: "running", lines: [], open: false }) - 1]!;
 }
-
-function icon(status: Row["status"]) {
-  return status === "ok" ? "ok" : status === "fail" ? "!!" : "..";
-}
diff --git a/tests/tui-view.test.ts b/tests/tui-view.test.ts
new file mode 100644
index 0000000..d7085c4
--- /dev/null
+++ b/tests/tui-view.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, test } from "bun:test";
+import { view, type Row } from "../src/tui-view.ts";
+
+const baseRow = {
+  id: "step",
+  title: "run tests",
+  status: "run",
+  detail: "running",
+  lines: [],
+  open: false,
+} satisfies Row;
+
+describe("tui view", () => {
+  test("renders empty state with logo and help", () => {
+    const output = view([], 0);
+
+    expect(output).toContain("____/ /__");
+    expect(output).toContain("enter toggles logs, j/k moves");
+  });
+
+  test("renders closed and open rows", () => {
+    const closed = view([{ ...baseRow, lines: ["hidden"] }], 0);
+    const open = view([{ ...baseRow, status: "ok", detail: "completed", lines: Array.from({ length: 82 }, (_, i) => `line-${i}`), open: true }], 0);
+
+    expect(closed).toContain("> .. [+] run tests - running");
+    expect(closed).not.toContain("hidden");
+    expect(open).toContain("> ok [-] run tests - completed");
+    expect(open).not.toContain("line-0");
+    expect(open).toContain("line-81");
+  });
+
+  test("renders failed rows and result details", () => {
+    const output = view([{ ...baseRow, status: "fail", detail: "failed" }], 0, {
+      status: "commit-error",
+      passes: 1,
+      max: 5,
+      report: ".codex/reports/change.html",
+      track: ".codex/tracks/change.md",
+      branch: "devloop/change",
+      commit: "",
+      commitMessage: "",
+      codexSessionId: "codex-session",
+      claudeSessionId: "claude-session",
+    });
+
+    expect(output).toContain("> !!     run tests - failed");
+    expect(output).toContain("result:  commit-error");
+    expect(output).toContain("commit:  none");
+    expect(output).toContain("track:   .codex/tracks/change.md");
+  });
+});