From 38cf9bde799ec7e8c5b47b261e0ff986a14dd67f Mon Sep 17 00:00:00 2001 From: satyaborg Date: Mon, 25 May 2026 17:39:08 +1000 Subject: [PATCH 01/11] feat: port devloop to bun opentui --- bun.lock | 61 ++++++ bunfig.toml | 5 + devloop.sh | 454 -------------------------------------- package.json | 19 ++ src/cli.ts | 46 ++++ src/devloop.ts | 452 ++++++++++++++++++++++++++++++++++++++ src/tui.ts | 68 ++++++ tests/devloop.test.ts | 329 ++++++++++++++++++++++++++++ tests/devloop_test.sh | 495 +----------------------------------------- tsconfig.json | 14 ++ 10 files changed, 995 insertions(+), 948 deletions(-) create mode 100644 bun.lock create mode 100644 bunfig.toml delete mode 100755 devloop.sh create mode 100644 package.json create mode 100755 src/cli.ts create mode 100644 src/devloop.ts create mode 100644 src/tui.ts create mode 100644 tests/devloop.test.ts create mode 100644 tsconfig.json diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..9c82dff --- /dev/null +++ b/bun.lock @@ -0,0 +1,61 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "devloop", + "dependencies": { + "@opentui/core": "^0.2.15", + }, + "devDependencies": { + "@types/bun": "^1.3.1", + "typescript": "^5.9.3", + }, + }, + }, + "packages": { + "@opentui/core": ["@opentui/core@0.2.15", "", { "dependencies": { "bun-ffi-structs": "0.2.2", "diff": "9.0.0", "marked": "17.0.1", "string-width": "7.2.0", "strip-ansi": "7.1.2", "yoga-layout": "3.2.1" }, "optionalDependencies": { "@opentui/core-darwin-arm64": "0.2.15", "@opentui/core-darwin-x64": "0.2.15", "@opentui/core-linux-arm64": "0.2.15", "@opentui/core-linux-x64": "0.2.15", "@opentui/core-win32-arm64": "0.2.15", "@opentui/core-win32-x64": "0.2.15" }, "peerDependencies": { "web-tree-sitter": "0.25.10" } }, "sha512-YGHttdZWScMcSvtYgZkLR6VhUO1OoUiQzwYjZgIusf5eCkPLD8PapH+PTMVqAiX16CHO6JxfMlkHv5qDiHAccQ=="], + + "@opentui/core-darwin-arm64": ["@opentui/core-darwin-arm64@0.2.15", "", { "os": "darwin", "cpu": "arm64" }, "sha512-s25f9GmZd6wxNM5ExRmwwnLT+NLCKxnTWuO9aObOlqsXfLMGHQZrb6YwgAn/PSTua98KmH7GJCVWdPgZ/P+0RQ=="], + + "@opentui/core-darwin-x64": ["@opentui/core-darwin-x64@0.2.15", "", { "os": "darwin", "cpu": "x64" }, "sha512-GyaipN+nOcEr8rcTO2mqKTGmOBk0C300I69fLtubD3BadHcMI1DVNlQrcf/J1mkQEuMYbmBTi/1hT1ybWGr2Mw=="], + + "@opentui/core-linux-arm64": ["@opentui/core-linux-arm64@0.2.15", "", { "os": "linux", "cpu": "arm64" }, "sha512-h+uyufselGT4afKMP8Lg4yUl5Kp+DJBlhu3XpWXhphE5Pnq5+f0uGBr4P+34CNcWxMsDnvagSQLFRCS4rGrOWA=="], + + "@opentui/core-linux-x64": ["@opentui/core-linux-x64@0.2.15", "", { "os": "linux", "cpu": "x64" }, "sha512-jx+NImPq4wSp3Apfe7tlixiEJNnRyECTRJRWhGF6ZJz4PwFfgK2UHZKYR0DZHbV8nYawoDNQPJDXEWcoZShnMg=="], + + "@opentui/core-win32-arm64": ["@opentui/core-win32-arm64@0.2.15", "", { "os": "win32", "cpu": "arm64" }, "sha512-2SQQLvf3sgmToxrNika9AdcccKrjPJEn5jW6sSv0oEixNBzUzW41vSZZG4LM/V3lL8eg0LoYDnRZeKLB4gwSqQ=="], + + "@opentui/core-win32-x64": ["@opentui/core-win32-x64@0.2.15", "", { "os": "win32", "cpu": "x64" }, "sha512-SVMVgnC7LVEm+yVZKdmmhRBj/xAT94PanT+UCcHxaCWK+OLmv/AX+ohHq2m0odup6iXcEqj+7mAltO9fgJLFIg=="], + + "@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="], + + "@types/node": ["@types/node@25.9.1", "", { "dependencies": { "undici-types": ">=7.24.0 <7.24.7" } }, "sha512-xfrlY7UD5rMJk3ZVJP8BNzS28J36YJg+xp+LPXV1TdWxr8uMH5A860QNxYDGQe/ylDSgjxE52Q9VnO7p75tJxg=="], + + "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], + + "bun-ffi-structs": ["bun-ffi-structs@0.2.2", "", { "peerDependencies": { "typescript": "^5" } }, "sha512-N/ZWtyN0piZlrXQT7TO0V+q952orYqkfhXRXM1Hcbb+R3QSiBH4vLnib187Mrs1H7pWIYECAmPeapGYDOMCl+w=="], + + "bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="], + + "diff": ["diff@9.0.0", "", {}, "sha512-svtcdpS8CgJyqAjEQIXdb3OjhFVVYjzGAPO8WGCmRbrml64SPw/jJD4GoE98aR7r25A0XcgrK3F02yw9R/vhQw=="], + + "emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="], + + "get-east-asian-width": ["get-east-asian-width@1.6.0", "", {}, "sha512-QRbvDIbx6YklUe6RxeTeleMR0yv3cYH6PsPZHcnVn7xv7zO1BHN8r0XETu8n6Ye3Q+ahtSarc3WgtNWmehIBfA=="], + + "marked": ["marked@17.0.1", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-boeBdiS0ghpWcSwoNm/jJBwdpFaMnZWRzjA6SkUMYb40SVaN1x7mmfGKp0jvexGcx+7y2La5zRZsYFZI6Qpypg=="], + + "string-width": ["string-width@7.2.0", "", { "dependencies": { "emoji-regex": "^10.3.0", "get-east-asian-width": "^1.0.0", "strip-ansi": "^7.1.0" } }, "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ=="], + + "strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@7.24.6", "", {}, "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg=="], + + "web-tree-sitter": ["web-tree-sitter@0.25.10", "", { "peerDependencies": { "@types/emscripten": "^1.40.0" }, "optionalPeers": ["@types/emscripten"] }, "sha512-Y09sF44/13XvgVKgO2cNDw5rGk6s26MgoZPXLESvMXeefBf7i6/73eFurre0IsTW6E14Y0ArIzhUMmjoc7xyzA=="], + + "yoga-layout": ["yoga-layout@3.2.1", "", {}, "sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ=="], + } +} diff --git a/bunfig.toml b/bunfig.toml new file mode 100644 index 0000000..72f3051 --- /dev/null +++ b/bunfig.toml @@ -0,0 +1,5 @@ +[test] +coverage = true +coverageThreshold = { lines = 1.0, functions = 1.0, statements = 1.0 } +coverageReporter = ["text", "lcov"] +coverageSkipTestFiles = true diff --git a/devloop.sh b/devloop.sh deleted file mode 100755 index 77463bb..0000000 --- a/devloop.sh +++ /dev/null @@ -1,454 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# devloop.sh — codex implements, claude reviews, loop till ACCEPT/max/stall. -# Usage: devloop.sh [--report-format html|markdown] [max] - -usage() { - echo "usage: devloop.sh [--report-format html|markdown] [max=5]" >&2 -} - -REPORT_FORMAT="html" -SPEC="" -MAX_RAW="5" -MAX_SET=0 - -while (($#)); do - case "$1" in - --report-format) - shift - [[ $# -gt 0 ]] || { usage; exit 2; } - REPORT_FORMAT="$1" - ;; - --html) - REPORT_FORMAT="html" - ;; - --markdown|--md) - REPORT_FORMAT="markdown" - ;; - -h|--help) - usage - exit 0 - ;; - --*) - echo "unknown option: $1" >&2 - usage - exit 2 - ;; - *) - if [[ -z "$SPEC" ]]; then - SPEC="$1" - elif (( MAX_SET == 0 )); then - MAX_RAW="$1" - MAX_SET=1 - else - usage - exit 2 - fi - ;; - esac - shift -done - -case "$REPORT_FORMAT" in - html|markdown) ;; - md) REPORT_FORMAT="markdown" ;; - *) echo "report format must be html or markdown" >&2; exit 2 ;; -esac - -[[ -z "$SPEC" || ! -f "$SPEC" ]] && { usage; exit 2; } - -[[ "$MAX_RAW" =~ ^[+-]?[0-9]+$ ]] || { echo "max must be an integer between 1 and 10" >&2; exit 2; } -MAX_SIGN=1 -MAX_DIGITS="$MAX_RAW" -case "$MAX_DIGITS" in - -*) MAX_SIGN=-1; MAX_DIGITS="${MAX_DIGITS#-}" ;; - +*) MAX_DIGITS="${MAX_DIGITS#+}" ;; -esac -MAX=$(( MAX_SIGN * 10#$MAX_DIGITS )) -(( MAX < 1 )) && MAX=1; (( MAX > 10 )) && MAX=10 - -command -v claude >/dev/null || { echo "claude not on PATH" >&2; exit 2; } -command -v codex >/dev/null || { echo "codex not on PATH" >&2; exit 2; } - -RUN_DIR=$(pwd -P) -SPEC=$(cd "$(dirname "$SPEC")" && pwd)/$(basename "$SPEC") -REPO=$(git -C "$RUN_DIR" rev-parse --show-toplevel 2>/dev/null) \ - || { echo "current directory is not inside a git repo" >&2; exit 2; } -cd "$REPO" - -SLUG=$(basename "$SPEC" .md) -BRANCH=$(git rev-parse --abbrev-ref HEAD) -BASE=$(git symbolic-ref --short refs/remotes/origin/HEAD 2>/dev/null | sed 's|^origin/||' \ - || (git show-ref --verify -q refs/heads/main && echo main) \ - || (git show-ref --verify -q refs/heads/master && echo master) \ - || echo main) - -mkdir -p .codex/tracks .codex/reviews .codex/reports .codex/logs .codex/sessions -TRACK=".codex/tracks/$SLUG.md" -if [[ "$REPORT_FORMAT" == "html" ]]; then - REPORT=".codex/reports/$SLUG.html" -else - REPORT=".codex/reports/$SLUG.md" -fi -CODEX_SESSION_FILE=".codex/sessions/$SLUG-codex.id" -CLAUDE_SESSION_FILE=".codex/sessions/$SLUG-claude.id" - -[[ -f "$TRACK" ]] || cat > "$TRACK" <&2; } - -read_one_line() { - local path="$1" value="" - [[ -f "$path" ]] || return 0 - IFS= read -r value < "$path" || true - printf '%s' "$value" -} - -write_one_line() { - local path="$1" value="$2" - printf '%s\n' "$value" > "$path" -} - -new_uuid() { - if command -v uuidgen >/dev/null; then - uuidgen | tr '[:upper:]' '[:lower:]' - return - fi - if command -v python3 >/dev/null; then - python3 -c 'import uuid; print(uuid.uuid4())' - return - fi - echo "uuidgen or python3 not on PATH" >&2 - return 127 -} - -extract_session_id() { - local log_file="$1" - # Codex currently prints the resumable UUID in human-readable session/thread - # banners, commonly "To continue this session..." or "session id/thread_id". - # If those banners change, fail loudly instead of starting a fresh fix session. - grep -Ei '(session.?id|thread_id|codex exec resume|codex resume|To continue this session)' "$log_file" \ - | grep -Eio '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' \ - | tail -n 1 -} - -assert_repo_cwd() { - local cwd - cwd=$(pwd -P) - [[ "$cwd" == "$REPO" ]] || { echo "internal error: expected cwd $REPO, got $cwd" >&2; return 1; } -} - -run_codex() { - local log_file="$1"; shift - local session_id - assert_repo_cwd || return - session_id=$(read_one_line "$CODEX_SESSION_FILE") - - if [[ -n "$session_id" ]]; then - codex exec resume --dangerously-bypass-approvals-and-sandbox "$session_id" - 2>&1 | tee "$log_file" - return - fi - - codex exec --dangerously-bypass-approvals-and-sandbox -C "$REPO" - 2>&1 | tee "$log_file" - session_id=$(extract_session_id "$log_file" || true) - [[ -n "$session_id" ]] || { echo "could not determine codex session id from $log_file" >&2; return 1; } - write_one_line "$CODEX_SESSION_FILE" "$session_id" - log "codex session: $session_id" -} - -run_claude() { - local log_file="$1"; shift - local session_id - session_id=$(read_one_line "$CLAUDE_SESSION_FILE") - - if [[ -n "$session_id" ]]; then - claude -p --resume "$session_id" --dangerously-skip-permissions --add-dir "$REPO" 2>&1 | tee "$log_file" >/dev/null - return - fi - - session_id=$(new_uuid) || return - claude -p --session-id "$session_id" --dangerously-skip-permissions --add-dir "$REPO" 2>&1 | tee "$log_file" >/dev/null - write_one_line "$CLAUDE_SESSION_FILE" "$session_id" - log "claude session: $session_id" -} - -list_reviews() { - local i file - for ((i=1; i<=MAX; i++)); do - file=".codex/reviews/$SLUG-r$i.md" - [[ -f "$file" ]] && printf -- '- %s\n' "$file" - done - return 0 -} - -hash_stdin() { - if command -v sha256sum >/dev/null; then - sha256sum | awk '{print $1}' - return - fi - if command -v shasum >/dev/null; then - shasum -a 256 | awk '{print $1}' - return - fi - echo "sha256sum or shasum not on PATH" >&2 - return 127 -} - -findings_hash() { - awk '/^## Findings/{f=1;next} /^## /{f=0} f' "$1" \ - | sed -E 's/[0-9]+//g; s/[[:space:]]+/ /g' | sort -u | hash_stdin -} - -status="unknown"; prior=""; N=0 - -for ((N=1; N<=MAX; N++)); do - log "pass $N/$MAX — codex" - CODEX_LOG=".codex/logs/$SLUG-r$N-codex.log" - - if (( N == 1 )); then - PROMPT=$(cat < - -## Findings - -1. [severity] . Root cause: . Principle: . - -## Missing tests - -- - -## Fix instructions - -1. - -## Notes - -- - -Rules: -- The line "Verdict: ACCEPT" or "Verdict: REJECT" or "Verdict: UNCLEAR" must appear verbatim. -- For ACCEPT: "## Findings" body is "None" and "## Fix instructions" body is "None". -- Findings must explain WHY, not just WHAT. If you cannot articulate the principle, the finding is too shallow — drop it or sharpen it. -- Rubric: acceptance criteria, bugs, edge cases, missing tests, scope creep, security/perf/compat/migration risk. -EOF -) - - printf '%s' "$PROMPT" | run_claude "$CLAUDE_LOG" || { status="claude-error"; break; } - [[ -f "$REVIEW" ]] || { status="review-missing"; break; } - - V=$(grep -m1 -oE '^Verdict:[[:space:]]+(ACCEPT|REJECT|UNCLEAR)' "$REVIEW" | awk '{print $2}' || true) - log "pass $N verdict: ${V:-MISSING}" - - case "$V" in - ACCEPT) status="accepted"; break ;; - UNCLEAR) status="unclear"; break ;; - REJECT) - h=$(findings_hash "$REVIEW") - [[ -n "$prior" && "$h" == "$prior" ]] && { status="stalled"; break; } - prior="$h" - ;; - *) status="no-verdict"; break ;; - esac -done - -[[ "$status" == "unknown" ]] && status="max-turns" - -log "synthesizing report" -PRIORS=$(list_reviews) - -if [[ "$REPORT_FORMAT" == "html" ]]; then - REPORT_INSTRUCTIONS=$(cat <, semantic sections, and minimal embedded CSS for readable typography. Do not wrap the HTML in a markdown code fence. Be concrete, no filler, no recap of what the reader can see in the diff. - -Use this content structure, with these visible section headings and no others: - -

$SLUG — devloop report

- -Opening result line: -Result: $status in $N pass(es). - -
-

The shape of the problem

-

2-4 sentences: what the spec actually asked for, the real constraint behind it, and which alternative designs were ruled out and why. If the track or reviews surfaced a hidden assumption, name it.

-
- -
-

What was built

-
  • 3-6 bullets describing the implementation at the level of design choices, not file lists. For each non-trivial choice, name the tradeoff that was weighed. The reader should be able to defend each choice in code review.
-
- -
-

What the review caught (and why it mattered)

-

For each unique finding across all review passes — even resolved ones — write one paragraph: the symptom, the root cause, and the principle. Group recurring themes. If a class of bug appeared twice, call that out as a pattern to internalize. If nothing was caught, say so and speculate on why: was the spec tight, was the change small, did the reviewer miss something.

-
- -
-

What to remember next time

-
  • 3-5 sharp, transferable lessons. Each lesson must be actionable in a future task, not specific to this slug. Frame as "When X, prefer Y because Z." If there is nothing transferable, write a single honest line saying so.
-
- -
-

Residual risk

-

Concrete remaining risks, or "None known". Be specific: "untested on empty input" beats "edge cases".

-
- -
-

Pointers

-
    -
  • Spec: $SPEC
  • -
  • Track: $TRACK
  • -
  • Reviews: include the review files listed in the Inputs block above.
  • -
-
-EOF -) -else - REPORT_INSTRUCTIONS=$(cat < - -## The shape of the problem -<2-4 sentences: what the spec actually asked for, the real constraint behind it, and which alternative designs were ruled out and why. If the track or reviews surfaced a hidden assumption, name it.> - -## What was built -<3-6 bullets describing the implementation at the level of design choices, not file lists. For each non-trivial choice, name the tradeoff that was weighed. The reader should be able to defend each choice in code review.> - -## What the review caught (and why it mattered) - - -## What to remember next time -<3-5 sharp, transferable lessons. Each lesson must be actionable in a future task, not specific to this slug. Frame as "When X, prefer Y because Z." If there is nothing transferable, write a single line saying so honestly.> - -## Residual risk - - -## Pointers -- Spec: $SPEC -- Track: $TRACK -- Reviews: $PRIORS -EOF -) -fi - -SYNTH_PROMPT=$(cat <; close?(): void | Promise }; + +type RunResult = { code: number; output: string }; +type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise; + +export function parseArgs(argv: string[], cwd = process.cwd()): Options | string { + let reportFormat: ReportFormat = "html"; + let strict = true; + let spec = ""; + let maxRaw = "5"; + let maxSet = false; + + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]!; + if (arg === "--report-format") { + const value = argv[++i]; + if (value !== "html" && value !== "markdown" && value !== "md") return usage(); + reportFormat = value === "md" ? "markdown" : value; + } else if (arg === "--html") reportFormat = "html"; + else if (arg === "--markdown" || arg === "--md") reportFormat = "markdown"; + else if (arg === "--no-strict") strict = false; + else if (arg === "--strict") strict = true; + else if (arg === "--plain" || arg === "--tui") continue; + else if (arg === "-h" || arg === "--help") return usage(); + else if (arg.startsWith("--")) return `unknown option: ${arg}\n${usage()}`; + else if (!spec) spec = arg; + else if (!maxSet) { + maxRaw = arg; + maxSet = true; + } else return usage(); + } + + if (!spec) return usage(); + if (!/^[+-]?\d+$/.test(maxRaw)) return "max must be an integer between 1 and 10"; + return { spec, max: clamp(Number.parseInt(maxRaw, 10), 1, 10), reportFormat, strict, cwd }; +} + +export function usage() { + return "usage: devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] [max=5]"; +} + +export function parseCriteria(markdown: string): string[] { + const lines = markdown.split(/\r?\n/); + const start = lines.findIndex((line) => /^##\s+acceptance criteria\s*$/i.test(line.trim())); + if (start < 0) return []; + const body = lines.slice(start + 1); + const end = body.findIndex((line) => /^##\s+/.test(line)); + return body + .slice(0, end < 0 ? body.length : end) + .map((line) => line.trim().replace(/^([-*]|\d+[.)])\s+/, "")) + .filter(Boolean); +} + +export function parseVerdict(review: string): Verdict | "" { + return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as Verdict | ""; +} + +export function hasPassingMatrix(review: string, count: number) { + if (!/^## Acceptance matrix\s*$/m.test(review)) return false; + return Array.from({ length: count }, (_, i) => new RegExp(`^-\\s*AC${i + 1}:\\s*PASS\\b`, "mi")).every((r) => + r.test(review), + ); +} + +export function findingsHash(review: string) { + const body = review.match(/^## Findings\s*\n([\s\S]*?)(?:\n##\s+|$)/m)?.[1] ?? ""; + const normalized = body + .replace(/\d+/g, "") + .replace(/[ \t\r\n]+/g, " ") + .split(".") + .map((line) => line.trim()) + .filter(Boolean) + .sort() + .join("\n"); + return createHash("sha256").update(normalized).digest("hex"); +} + +export async function runDevloop(options: Options, sink: Sink = { event: () => {} }): Promise { + const spec = await absoluteFile(options.spec, options.cwd); + const specText = await readFile(spec, "utf8"); + const criteria = parseCriteria(specText); + if (options.strict && criteria.length === 0) throw new Error("strict mode requires ## Acceptance criteria"); + await sink.event({ type: "gate", name: "acceptance criteria", ok: criteria.length > 0, detail: `${criteria.length} found` }); + + const repo = (await command("git", ["-C", options.cwd, "rev-parse", "--show-toplevel"])).trim(); + const branch = (await command("git", ["-C", repo, "rev-parse", "--abbrev-ref", "HEAD"])).trim(); + const base = await baseBranch(repo); + const initialDirty = await statusPaths(repo); + const slug = path.basename(spec, ".md"); + const dirs = [".codex/tracks", ".codex/reviews", ".codex/reports", ".codex/logs", ".codex/sessions"]; + await Promise.all(dirs.map((dir) => mkdir(path.join(repo, dir), { recursive: true }))); + + const track = `.codex/tracks/${slug}.md`; + const report = `.codex/reports/${slug}.${options.reportFormat === "html" ? "html" : "md"}`; + const codexSession = `.codex/sessions/${slug}-codex.id`; + const claudeSession = `.codex/sessions/${slug}-claude.id`; + const runner = makeRunner(repo, sink); + await initTrack(path.join(repo, track), { spec, cwd: options.cwd, base, branch, max: options.max, reportFormat: options.reportFormat, strict: options.strict }); + + let status: Status = "max-turns"; + let prior = ""; + let pass = 0; + let commit = ""; + let commitMessage = ""; + let finalBranch = branch; + + for (pass = 1; pass <= options.max; pass++) { + const codexLog = `.codex/logs/${slug}-r${pass}-codex.log`; + const codexId = `codex-${pass}`; + await sink.event({ type: "step", id: codexId, title: `pass ${pass}/${options.max} codex` }); + const codex = await runCodex(runner, repo, path.join(repo, codexSession), path.join(repo, codexLog), codexPrompt({ spec, track, pass, strict: options.strict, previous: `.codex/reviews/${slug}-r${pass - 1}.md`, criteria })); + await sink.event({ type: "done", id: codexId, ok: codex, detail: codex ? "completed" : "failed" }); + if (!codex) { + status = "codex-error"; + break; + } + + const review = `.codex/reviews/${slug}-r${pass}.md`; + const claudeLog = `.codex/logs/${slug}-r${pass}-claude.log`; + const claudeId = `claude-${pass}`; + await sink.event({ type: "step", id: claudeId, title: `pass ${pass}/${options.max} claude review` }); + const ok = await runClaude(runner, repo, path.join(repo, claudeSession), path.join(repo, claudeLog), reviewPrompt({ spec, track, base, pass, output: review, priors: listReviews(slug, pass, options.max), criteria, strict: options.strict })); + await sink.event({ type: "done", id: claudeId, ok, detail: ok ? "completed" : "failed" }); + if (!ok) { + status = "claude-error"; + break; + } + + let reviewText = ""; + try { + reviewText = await readFile(path.join(repo, review), "utf8"); + } catch { + status = "review-missing"; + break; + } + const verdict = parseVerdict(reviewText); + await sink.event({ type: "gate", name: `pass ${pass} verdict`, ok: verdict === "ACCEPT", detail: verdict || "MISSING" }); + if (verdict === "ACCEPT") { + status = options.strict && !hasPassingMatrix(reviewText, criteria.length) ? "unclear" : "accepted"; + break; + } + if (verdict === "UNCLEAR") { + status = "unclear"; + break; + } + if (verdict === "REJECT") { + const hash = findingsHash(reviewText); + if (prior && hash === prior) { + status = "stalled"; + break; + } + prior = hash; + } else { + status = "no-verdict"; + break; + } + } + + if (pass > options.max) pass = options.max; + if (status === "accepted") { + const commitId = "commit"; + await sink.event({ type: "step", id: commitId, title: "local branch and commit" }); + const committed = await commitAccepted(repo, slug, initialDirty).catch(() => undefined); + if (committed) { + finalBranch = committed.branch; + commit = committed.commit; + commitMessage = committed.message; + await sink.event({ type: "done", id: commitId, ok: true, detail: commit ? `${finalBranch} ${commit}` : `${finalBranch} no changes` }); + } else { + status = "commit-error"; + await sink.event({ type: "done", id: commitId, ok: false, detail: "failed" }); + } + } + + const codexSessionId = await readLine(path.join(repo, codexSession)); + const claudeSessionId = await readLine(path.join(repo, claudeSession)); + await synthesizeReport(runner, repo, { slug, spec, track, report, status, pass, max: options.max, base, initialBranch: branch, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId, format: options.reportFormat, reviews: listReviews(slug, pass, options.max) }); + const result = { status, passes: pass, max: options.max, report, track, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId }; + await sink.event({ type: "result", result }); + return result; +} + +async function absoluteFile(file: string, cwd: string) { + const full = path.resolve(cwd, file); + if (!(await stat(full).catch(() => false))) throw new Error(usage()); + return realpath(full); +} + +async function command(cmd: string, args: string[]) { + const proc = Bun.spawn([cmd, ...args], { stdout: "pipe", stderr: "pipe" }); + const [out, err, code] = await Promise.all([new Response(proc.stdout).text(), new Response(proc.stderr).text(), proc.exited]); + if (code !== 0) throw new Error(err.trim() || `${cmd} failed`); + return out; +} + +async function baseBranch(repo: string) { + for (const args of [ + ["-C", repo, "symbolic-ref", "--short", "refs/remotes/origin/HEAD"], + ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/main"], + ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/master"], + ]) { + const proc = Bun.spawn(["git", ...args], { stdout: "pipe", stderr: "pipe" }); + if ((await proc.exited) === 0) { + if (args[2] === "symbolic-ref") return (await new Response(proc.stdout).text()).trim().replace(/^origin\//, ""); + return args.at(-1)!.split("/").pop()!; + } + } + return "main"; +} + +async function statusPaths(repo: string) { + const out = await command("git", ["-C", repo, "status", "--porcelain=v1", "-z", "--untracked-files=all"]); + const parts = out.split("\0").filter(Boolean); + const paths = new Set(); + for (let i = 0; i < parts.length; i++) { + const item = parts[i]!; + const code = item.slice(0, 2); + const file = item.slice(3); + if (file) paths.add(file); + if (code.includes("R") || code.includes("C")) { + const next = parts[++i]; + if (next) paths.add(next); + } + } + return paths; +} + +async function commitAccepted(repo: string, slug: string, initialDirty: Set) { + const current = (await command("git", ["-C", repo, "branch", "--show-current"])).trim(); + const branch = await nextBranch(repo, slug, current); + const message = `feat: ${slugify(slug)}`; + if (branch !== current) await command("git", ["-C", repo, "switch", "-c", branch]); + const changed = [...(await statusPaths(repo))].filter((file) => !initialDirty.has(file) && !file.startsWith(".codex/")); + if (changed.length === 0) return { branch, commit: "", message }; + await command("git", ["-C", repo, "add", "--", ...changed]); + await command("git", ["-C", repo, "commit", "--only", "-m", message, "--", ...changed]); + return { branch, commit: (await command("git", ["-C", repo, "rev-parse", "--short", "HEAD"])).trim(), message }; +} + +async function nextBranch(repo: string, slug: string, current: string) { + const base = `devloop/${slugify(slug)}`; + if (current === base || new RegExp(`^${escapeRegex(base)}-\\d+$`).test(current)) return current; + let suffix = 1; + let branch = base; + while (await branchExists(repo, branch)) { + suffix++; + branch = `${base}-${suffix}`; + } + return branch; +} + +async function branchExists(repo: string, branch: string) { + const proc = Bun.spawn(["git", "-C", repo, "show-ref", "--verify", "--quiet", `refs/heads/${branch}`]); + return (await proc.exited) === 0; +} + +function makeRunner(cwd: string, sink: Sink): Runner { + return async (cmd, args, input = "", log, id) => { + let proc: Bun.Subprocess<"pipe", "pipe", "pipe">; + try { + proc = Bun.spawn([cmd, ...args], { cwd, stdin: "pipe", stdout: "pipe", stderr: "pipe", env: Bun.env }); + } catch (error) { + const output = error instanceof Error ? error.message : String(error); + if (log) await writeFile(log, output); + return { code: 127, output }; + } + proc.stdin.write(input); + proc.stdin.end(); + let output = ""; + const pump = async (stream: ReadableStream) => { + const reader = stream.getReader(); + const decoder = new TextDecoder(); + let pending = ""; + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + const text = decoder.decode(value); + output += text; + pending += text; + const lines = pending.split(/\r?\n/); + pending = lines.pop() ?? ""; + if (id) for (const line of lines.filter(Boolean)) await sink.event({ type: "log", id, line }); + } + if (id && pending) await sink.event({ type: "log", id, line: pending }); + }; + const [, , code] = await Promise.all([pump(proc.stdout), pump(proc.stderr), proc.exited]); + if (log) await writeFile(log, output); + return { code, output }; + }; +} + +async function initTrack(file: string, data: { spec: string; cwd: string; base: string; branch: string; max: number; reportFormat: ReportFormat; strict: boolean }) { + if (await stat(file).catch(() => false)) return; + await writeFile( + file, + `# Track: ${path.basename(file, ".md")}\n\n- spec: ${data.spec}\n- cwd: ${data.cwd}\n- base: ${data.base}\n- branch: ${data.branch}\n- max: ${data.max}\n- report-format: ${data.reportFormat}\n- strict: ${data.strict}\n- started: ${new Date().toISOString()}\n\n`, + ); +} + +async function readLine(file: string) { + return (await readFile(file, "utf8").catch(() => "")).split(/\r?\n/, 1)[0] ?? ""; +} + +async function writeLine(file: string, value: string) { + await writeFile(file, `${value}\n`); +} + +async function runCodex(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) { + const session = await readLine(sessionFile); + const args = session + ? ["exec", "resume", "--dangerously-bypass-approvals-and-sandbox", session, "-"] + : ["exec", "--dangerously-bypass-approvals-and-sandbox", "-C", repo, "-"]; + const result = await runner("codex", args, prompt, log, log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex"); + if (result.code !== 0) return false; + if (!session) { + const next = extractSessionId(result.output); + if (!next) return false; + await writeLine(sessionFile, next); + } + return true; +} + +async function runClaude(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) { + const session = await readLine(sessionFile); + const next = session || randomUUID(); + const args = session + ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo] + : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo]; + const result = await runner("claude", args, prompt, log, log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report"); + if (result.code !== 0) return false; + if (!session) await writeLine(sessionFile, next); + return true; +} + +function extractSessionId(output: string) { + return output + .split(/\r?\n/) + .filter((line) => /(session.?id|thread_id|codex exec resume|codex resume|To continue this session)/i.test(line)) + .join("\n") + .match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i)?.[0] + .toLowerCase(); +} + +function listReviews(slug: string, upto: number, max: number) { + return Array.from({ length: Math.min(upto, max) }, (_, i) => `- .codex/reviews/${slug}-r${i + 1}.md`).join("\n"); +} + +function criteriaBlock(criteria: string[]) { + return criteria.map((criterion, i) => `AC${i + 1}: ${criterion}`).join("\n") || "No parsed acceptance criteria."; +} + +function codexPrompt(input: { spec: string; track: string; pass: number; strict: boolean; previous: string; criteria: string[] }) { + const strict = input.strict + ? "\nStrict lifecycle:\n1. Add or update regression tests before implementation.\n2. Run the narrow test first and record the failing result, unless impossible; if impossible, say why.\n3. Implement the smallest change.\n4. Run targeted tests, full tests, lint/typecheck, and coverage. Coverage must be 100% when the project exposes coverage tooling.\n" + : ""; + return input.pass === 1 + ? `You are implementing against an approved spec.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nPass: ${input.pass}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}${strict}\nTasks:\n1. Read the spec.\n2. Implement the smallest working change satisfying the acceptance criteria.\n3. Append "## Pass ${input.pass} - implement" to ${input.track} with changed files, design tradeoffs, verification, and residual risk.\n\nConstraints:\n- Do not commit.\n- Do not edit the spec.\n- Do not revert unrelated dirty files.\n` + : `Fix only the findings in the review. Do not refactor unrelated code.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nReview: ${input.previous}\nPass: ${input.pass}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}${strict}\nTasks:\n1. Read the review file.\n2. Fix each finding or explain why it is wrong in the track.\n3. Re-run relevant tests.\n4. Append "## Pass ${input.pass} - fix" to ${input.track} with per-finding outcomes.\n`; +} + +function reviewPrompt(input: { spec: string; track: string; base: string; pass: number; output: string; priors: string; criteria: string[]; strict: boolean }) { + return `You are reviewing a Codex implementation. Be a senior reviewer, not a linter.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nBase: ${input.base}\nPass: ${input.pass}\nPrior reviews:\n${input.priors}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}\nOutput path: ${input.output}\n\nSteps:\n1. Read the spec and track.\n2. Run: git diff ${input.base}...HEAD\n3. Read prior reviews so you do not repeat resolved findings.\n4. Write the review to ${input.output} using this exact format:\n\n# Claude review ${input.pass}\n\nVerdict: \n\n## Acceptance matrix\n\n- AC1: - \n\n## Findings\n\n1. [severity] - . Root cause: . Principle: .\n\n## Missing tests\n\n- \n\n## Fix instructions\n\n1. \n\n## Notes\n\n- \n\nRules:\n- The verdict line must appear verbatim.\n- ACCEPT requires every acceptance criterion PASS with concrete evidence.${input.strict ? "\n- ACCEPT also requires regression-test evidence, red/green evidence when behavior changed, passing full tests, and 100% coverage when coverage tooling exists." : ""}\n- For ACCEPT: Findings and Fix instructions bodies are "None".\n- Findings must explain WHY, not just WHAT.\n`; +} + +async function synthesizeReport(runner: Runner, repo: string, input: { slug: string; spec: string; track: string; report: string; status: Status; pass: number; max: number; base: string; initialBranch: string; branch: string; commit: string; commitMessage: string; codexSessionId: string; claudeSessionId: string; format: ReportFormat; reviews: string }) { + const metadata = `Result: ${input.status} +Passes: ${input.pass} / ${input.max} +Repository: ${repo} +Spec: ${input.spec} +Base branch: ${input.base} +Starting branch: ${input.initialBranch} +Final branch: ${input.branch} +Local commit: ${input.commit || "none"} +Commit message: ${input.commitMessage || "none"} +Codex session: ${input.codexSessionId || "unknown"} +Claude session: ${input.claudeSessionId || "unknown"} +Track: ${input.track} +Reviews: +${input.reviews}`; + const body = + input.format === "html" + ? `Write the report to ${input.report} as valid standalone HTML. Use a readable document layout with embedded CSS, a compact metadata table at the top, and substantive sections after it. Include these visible section headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.` + : `Write the report to ${input.report} in markdown with these headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.`; + const sessionFile = path.join(repo, `.codex/sessions/${input.slug}-claude.id`); + const session = await readLine(sessionFile); + const next = session || randomUUID(); + await runner( + "claude", + session ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo] : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo], + `You are writing a learning-oriented post-mortem for a developer who just ran a Codex/Claude devloop.\n\nMetadata to render at the top exactly and visibly:\n${metadata}\n\nInputs:\n- spec: ${input.spec}\n- track: ${input.track}\nReview files:\n${input.reviews}\n- final status: ${input.status}\n- passes used: ${input.pass} / ${input.max}\n- base: ${input.base}, starting branch: ${input.initialBranch}, final branch: ${input.branch}, local commit: ${input.commit || "none"}\n\n${body}\n\nStyle:\n- Human readable, not ornamental.\n- Preserve useful substance over brevity.\n- Teach the why: symptom, root cause, principle, decision, tradeoff, and evidence.\n- No emoji.\n`, + path.join(repo, `.codex/logs/${input.slug}-report.log`), + "report", + ); + if (!session) await writeLine(sessionFile, next); +} + +function clamp(value: number, min: number, max: number) { + return Math.max(min, Math.min(max, value)); +} + +function slugify(value: string) { + return value.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "change"; +} + +function escapeRegex(value: string) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} diff --git a/src/tui.ts b/src/tui.ts new file mode 100644 index 0000000..0403fe2 --- /dev/null +++ b/src/tui.ts @@ -0,0 +1,68 @@ +import type { Event, Result, Sink } from "./devloop.ts"; + +type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean }; + +const LOGO = [ + " ▐▌▗▞▀▚▖▄ ▄ █ ▄▄▄ ▄▄▄ ▄▄▄▄ ", + " ▐▌▐▛▀▀▘█ █ █ █ █ █ █ █ █ ", + "▗▞▀▜▌▝▚▄▄▖ ▀▄▀ █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ", + "▝▚▄▟▌ █ █ ", + " ▀", +]; + +export async function createTuiSink(): Promise { + const { TextRenderable, createCliRenderer } = await import("@opentui/core"); + const renderer = await createCliRenderer({ exitOnCtrlC: true, consoleMode: "disabled", screenMode: "alternate-screen" }); + const text = new TextRenderable(renderer, { id: "devloop", width: "100%", height: "100%", content: "" }); + const rows: Row[] = []; + let selected = 0; + let result: Result | undefined; + + renderer.root.add(text); + renderer.keyInput.on("keypress", (key) => { + if (key.name === "up" || key.name === "k") selected = Math.max(0, selected - 1); + else if (key.name === "down" || key.name === "j") selected = Math.min(rows.length - 1, selected + 1); + else if (rows.length && (key.name === "return" || key.name === "space")) rows[selected]!.open = !rows[selected]!.open; + render(); + }); + + function render() { + text.content = view(rows, selected, result); + renderer.requestRender(); + } + + render(); + return { + event(event: Event) { + if (event.type === "step") rows.push({ id: event.id, title: event.title, status: "run", detail: "running", lines: [], open: false }); + else if (event.type === "log") row(rows, event.id).lines.push(event.line); + else if (event.type === "done") Object.assign(row(rows, event.id), { status: event.ok ? "ok" : "fail", detail: event.detail }); + else if (event.type === "gate") rows.push({ id: event.name, title: event.name, status: event.ok ? "ok" : "fail", detail: event.detail, lines: [], open: false }); + else result = event.result; + selected = Math.min(selected, Math.max(0, rows.length - 1)); + render(); + }, + close() { + renderer.destroy(); + }, + }; +} + +export function view(rows: Row[], selected: number, result?: Result) { + const body = rows.flatMap((item, i) => { + const mark = i === selected ? ">" : " "; + const fold = item.lines.length ? (item.open ? "[-]" : "[+]") : " "; + const head = `${mark} ${icon(item.status)} ${fold} ${item.title} - ${item.detail}`; + return item.open ? [head, ...item.lines.slice(-80).map((line) => ` ${line}`)] : [head]; + }); + const tail = result ? ["", `result: ${result.status}`, `passes: ${result.passes} / ${result.max}`, `branch: ${result.branch}`, `commit: ${result.commit || "none"}`, `report: ${result.report}`, `track: ${result.track}`] : ["", "enter toggles logs, j/k moves"]; + return [...LOGO, "", ...body, ...tail].join("\n"); +} + +function row(rows: Row[], id: string) { + return rows.find((item) => item.id === id) ?? rows[rows.push({ id, title: id, status: "run", detail: "running", lines: [], open: false }) - 1]!; +} + +function icon(status: Row["status"]) { + return status === "ok" ? "ok" : status === "fail" ? "!!" : ".."; +} diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts new file mode 100644 index 0000000..b1bd610 --- /dev/null +++ b/tests/devloop.test.ts @@ -0,0 +1,329 @@ +import { afterAll, beforeEach, describe, expect, test } from "bun:test"; +import { mkdtemp, readFile, realpath, rm, stat, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { parseArgs, parseCriteria, parseVerdict, runDevloop, type Event, type Options } from "../src/devloop.ts"; + +const root = await mkdtemp(path.join(tmpdir(), "devloop-test.")); +let oldPath = process.env.PATH ?? ""; + +afterAll(async () => rm(root, { recursive: true, force: true })); +beforeEach(() => { + oldPath = process.env.PATH ?? ""; + delete process.env.DEVLOOP_TEST_VERDICTS; + delete process.env.DEVLOOP_TEST_STATE; + delete process.env.DEVLOOP_TEST_NO_MATRIX; + delete process.env.DEVLOOP_TEST_NO_REVIEW; + delete process.env.DEVLOOP_TEST_NO_VERDICT; + delete process.env.DEVLOOP_TEST_FAIL_CODEX; + delete process.env.DEVLOOP_TEST_FAIL_CLAUDE; +}); + +describe("parsing", () => { + test("parses options tightly", () => { + expect(parseArgs(["--no-strict", "--report-format", "md", "spec.md", "08"], "/x")).toEqual({ + spec: "spec.md", + max: 8, + reportFormat: "markdown", + strict: false, + cwd: "/x", + } satisfies Options); + expect(parseArgs(["spec.md", "0"], "/x")).toMatchObject({ max: 1 }); + expect(parseArgs(["spec.md", "99"], "/x")).toMatchObject({ max: 10 }); + expect(parseArgs(["--wat"], "/x")).toContain("unknown option"); + expect(parseArgs([], "/x")).toContain("usage:"); + expect(parseArgs(["spec.md", "nope"], "/x")).toBe("max must be an integer between 1 and 10"); + }); + + test("extracts acceptance criteria", () => { + expect(parseCriteria("# Spec\n\n## Acceptance criteria\n1. One\n- Two\n\n## Notes\nNope")).toEqual(["One", "Two"]); + expect(parseCriteria("# Spec")).toEqual([]); + expect(parseVerdict("Verdict: ACCEPT\n")).toBe("ACCEPT"); + }); +}); + +describe("loop", () => { + test("accepts and writes core artifacts", async () => { + const { repo, state } = await fixture("accept"); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + const { result, events } = await run(repo); + + expect(result.status).toBe("accepted"); + expect(result.passes).toBe(1); + expect(result.branch).toBe("devloop/change"); + expect(result.commit).toMatch(/^[0-9a-f]+$/); + expect(result.commitMessage).toBe("feat: change"); + await exists(path.join(repo, ".codex/tracks/change.md")); + await exists(path.join(repo, ".codex/reviews/change-r1.md")); + await exists(path.join(repo, ".codex/reports/change.html")); + expect(await readFile(path.join(repo, ".codex/sessions/change-codex.id"), "utf8")).toContain("00000000-0000-4000-8000-000000000001"); + expect(await readFile(path.join(repo, ".codex/tracks/change.md"), "utf8")).toContain("- strict: true"); + expect(await readFile(path.join(repo, ".codex/reviews/change-r1.md"), "utf8")).toContain("- AC1: PASS"); + expect(await readFile(path.join(state, "codex-args.log"), "utf8")).toContain(`exec --dangerously-bypass-approvals-and-sandbox -C ${repo} -`); + expect((await Bun.$`git -C ${repo} branch --show-current`.text()).trim()).toBe("devloop/change"); + expect((await Bun.$`git -C ${repo} log -1 --format=%s`.text()).trim()).toBe("feat: change"); + expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).toContain("feature.txt"); + expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).not.toContain(".codex/"); + const reportPrompt = await readFile(path.join(state, "claude-prompts.log"), "utf8"); + expect(reportPrompt).toContain("Codex session: 00000000-0000-4000-8000-000000000001"); + expect(reportPrompt).toContain("Final branch: devloop/change"); + expect(reportPrompt).toContain(`Local commit: ${result.commit}`); + expect(reportPrompt).toContain("Commit message: feat: change"); + expect(events.some((event) => event.type === "gate" && event.name === "acceptance criteria" && event.ok)).toBe(true); + expect(events).toContainEqual({ type: "log", id: "codex-1", line: "codex-tail" }); + }); + + test("rejects then accepts with resumed sessions", async () => { + const { repo, state } = await fixture("reject-accept"); + process.env.DEVLOOP_TEST_VERDICTS = "REJECT,ACCEPT"; + const { result } = await run(repo, { max: 3 }); + + expect(result.status).toBe("accepted"); + expect(result.passes).toBe(2); + expect(await readFile(path.join(repo, ".codex/reviews/change-r1.md"), "utf8")).toContain("Verdict: REJECT"); + expect(await readFile(path.join(repo, ".codex/reviews/change-r2.md"), "utf8")).toContain("Verdict: ACCEPT"); + expect(await readFile(path.join(state, "codex-args.log"), "utf8")).toContain("exec resume --dangerously-bypass-approvals-and-sandbox 00000000-0000-4000-8000-000000000001 -"); + }); + + test("stalls on repeated reject findings", async () => { + const { repo } = await fixture("stall"); + process.env.DEVLOOP_TEST_VERDICTS = "REJECT,REJECT"; + const { result } = await run(repo, { max: 5 }); + + expect(result.status).toBe("stalled"); + expect(result.passes).toBe(2); + }); + + test("supports markdown reports", async () => { + const { repo, state } = await fixture("markdown"); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + const { result } = await run(repo, { reportFormat: "markdown" }); + + expect(result.report).toBe(".codex/reports/change.md"); + await exists(path.join(repo, ".codex/reports/change.md")); + expect(await exists(path.join(repo, ".codex/reports/change.html"), false)).toBe(false); + expect(await readFile(path.join(state, "claude-prompts.log"), "utf8")).toContain("in markdown"); + }); + + test("skips files dirty before the run when committing", async () => { + const { repo } = await fixture("dirty-before"); + await writeFile(path.join(repo, "dirty.txt"), "do not commit\n"); + await writeFile(path.join(repo, "old.txt"), "old\n"); + await Bun.$`git -C ${repo} add old.txt`.quiet(); + await Bun.$`git -C ${repo} commit -q -m old`.quiet(); + await Bun.$`git -C ${repo} mv old.txt renamed.txt`.quiet(); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + const { result } = await run(repo); + + expect(result.status).toBe("accepted"); + expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).toContain("feature.txt"); + expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).not.toContain("dirty.txt"); + expect(await Bun.$`git -C ${repo} show --name-only --format= HEAD`.text()).not.toContain("renamed.txt"); + expect(await Bun.$`git -C ${repo} status --short -- dirty.txt`.text()).toContain("?? dirty.txt"); + expect(await Bun.$`git -C ${repo} status --short -- renamed.txt`.text()).toContain("renamed.txt"); + }); + + test("reports commit errors", async () => { + const { repo } = await fixture("commit-error"); + await writeFile(path.join(repo, ".git/hooks/pre-commit"), "#!/usr/bin/env bash\nexit 1\n", { mode: 0o755 }); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + const { result } = await run(repo); + + expect(result.status).toBe("commit-error"); + }); + + test("uses a suffixed branch when the default branch exists", async () => { + const { repo } = await fixture("branch-exists"); + await Bun.$`git -C ${repo} branch devloop/change`.quiet(); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + const { result } = await run(repo); + + expect(result.status).toBe("accepted"); + expect(result.branch).toBe("devloop/change-2"); + }); + + test("preserves spacey slugs and invocation repo ownership", async () => { + const work = await fixture("space-work", undefined, "change with spaces.md"); + const specOnly = await fixture("space-spec", undefined, "external spec.md"); + process.env.PATH = `${work.bin}:${oldPath}`; + process.env.DEVLOOP_TEST_STATE = work.state; + process.env.DEVLOOP_TEST_VERDICTS = "REJECT,ACCEPT"; + + const spaced = await runDevloop({ spec: work.specPath, max: 2, reportFormat: "html", strict: true, cwd: work.repo }); + expect(spaced.status).toBe("accepted"); + await exists(path.join(work.repo, ".codex/reviews/change with spaces-r2.md")); + + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + const external = await runDevloop({ spec: specOnly.specPath, max: 1, reportFormat: "html", strict: true, cwd: work.repo }); + expect(external.status).toBe("accepted"); + await exists(path.join(work.repo, ".codex/tracks/external spec.md")); + expect(await exists(path.join(specOnly.repo, ".codex"), false)).toBe(false); + }); + + test("requires acceptance criteria in strict mode", async () => { + const { repo } = await fixture("no-criteria", "# Spec\n"); + await expect(run(repo)).rejects.toThrow("strict mode requires ## Acceptance criteria"); + await expect(runDevloop({ spec: path.join(repo, ".specs/missing.md"), max: 1, reportFormat: "html", strict: true, cwd: repo })).rejects.toThrow("usage:"); + }); + + test("allows missing criteria only when strict is off", async () => { + const { repo } = await fixture("loose", "# Spec\n"); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + const { result, events } = await run(repo, { strict: false }); + + expect(result.status).toBe("accepted"); + expect(events).toContainEqual({ type: "gate", name: "acceptance criteria", ok: false, detail: "0 found" }); + }); + + test("turns strict accepts without matrix into unclear", async () => { + const { repo } = await fixture("no-matrix"); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + process.env.DEVLOOP_TEST_NO_MATRIX = "1"; + const { result } = await run(repo); + + expect(result.status).toBe("unclear"); + expect(result.passes).toBe(1); + }); + + test("handles agent and review failures", async () => { + const codex = await fixture("codex-fail"); + process.env.DEVLOOP_TEST_FAIL_CODEX = "1"; + expect((await run(codex.repo)).result.status).toBe("codex-error"); + delete process.env.DEVLOOP_TEST_FAIL_CODEX; + + const claude = await fixture("claude-fail"); + process.env.DEVLOOP_TEST_FAIL_CLAUDE = "1"; + expect((await run(claude.repo)).result.status).toBe("claude-error"); + delete process.env.DEVLOOP_TEST_FAIL_CLAUDE; + + const missing = await fixture("missing-review"); + process.env.DEVLOOP_TEST_NO_REVIEW = "1"; + expect((await run(missing.repo)).result.status).toBe("review-missing"); + delete process.env.DEVLOOP_TEST_NO_REVIEW; + + const noVerdict = await fixture("no-verdict"); + process.env.DEVLOOP_TEST_NO_VERDICT = "1"; + expect((await run(noVerdict.repo)).result.status).toBe("no-verdict"); + delete process.env.DEVLOOP_TEST_NO_VERDICT; + }); + + test("handles unclear verdicts and missing executables", async () => { + const unclear = await fixture("unclear"); + process.env.DEVLOOP_TEST_VERDICTS = "UNCLEAR"; + expect((await run(unclear.repo)).result.status).toBe("unclear"); + + const missingClaude = await fixture("missing-claude-bin"); + await rm(path.join(missingClaude.repo, "../bin/claude"), { force: true }); + process.env.PATH = `${path.join(missingClaude.repo, "../bin")}:/usr/bin:/bin`; + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + expect((await run(missingClaude.repo)).result.status).toBe("claude-error"); + }); + + test("falls back to main when no base branch exists", async () => { + const { repo } = await fixture("no-base"); + await Bun.$`git -C ${repo} branch -m topic`.quiet(); + process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; + expect((await runDevloop({ spec: path.join(repo, ".specs/change.md"), max: 1, reportFormat: "html", strict: true, cwd: repo })).status).toBe("accepted"); + expect(await readFile(path.join(repo, ".codex/tracks/change.md"), "utf8")).toContain("- base: main"); + }); +}); + +async function fixture(name: string, spec = "# Fixture spec\n\n## Acceptance criteria\n1. The loop runs deterministically under test.\n", specName = "change.md") { + const dir = path.join(root, name); + const repo = path.join(dir, "repo"); + const bin = path.join(dir, "bin"); + const state = path.join(dir, "state"); + await Bun.$`mkdir -p ${repo}/.specs ${bin} ${state}`.quiet(); + await Bun.$`git init -q ${repo}`.quiet(); + await Bun.$`git -C ${repo} symbolic-ref HEAD refs/heads/main`.quiet(); + await writeFile(path.join(repo, "README.md"), "# Fixture\n"); + const specPath = path.join(repo, ".specs", specName); + await writeFile(specPath, spec); + await Bun.$`git -C ${repo} config user.email devloop-test@example.com`.quiet(); + await Bun.$`git -C ${repo} config user.name "devloop test"`.quiet(); + await Bun.$`git -C ${repo} add README.md`.quiet(); + await Bun.$`git -C ${repo} commit -q -m init`.quiet(); + await installMocks(bin); + process.env.PATH = `${bin}:${oldPath}`; + process.env.DEVLOOP_TEST_STATE = state; + return { repo: await real(repo), state, bin, specPath }; +} + +async function installMocks(bin: string) { + await writeFile( + path.join(bin, "codex"), + `#!/usr/bin/env bash +set -euo pipefail +[[ -z "\${DEVLOOP_TEST_FAIL_CODEX:-}" ]] || exit 42 +prompt=$(cat) +mkdir -p "$DEVLOOP_TEST_STATE" +count=$(( $(cat "$DEVLOOP_TEST_STATE/codex-count" 2>/dev/null || echo 0) + 1 )) +printf '%s\\n' "$count" > "$DEVLOOP_TEST_STATE/codex-count" +printf '%s\\n' "$*" >> "$DEVLOOP_TEST_STATE/codex-args.log" +printf '%s\\n---\\n' "$prompt" >> "$DEVLOOP_TEST_STATE/codex-prompts.log" +track=$(printf '%s\\n' "$prompt" | awk -F': ' '/^Track: /{print $2; exit}') +[[ -z "$track" ]] || printf '\\n## Pass %s - mock codex\\n- verification: fixture\\n' "$count" >> "$track" +printf 'feature pass %s\\n' "$count" >> feature.txt +printf 'codex pass %s\\n' "$count" +printf 'To continue this session, run codex exec resume 00000000-0000-4000-8000-000000000001\\n' +printf 'codex-tail' >&2 +`, + { mode: 0o755 }, + ); + await writeFile( + path.join(bin, "claude"), + `#!/usr/bin/env bash +set -euo pipefail +[[ -z "\${DEVLOOP_TEST_FAIL_CLAUDE:-}" ]] || exit 43 +prompt=$(cat) +mkdir -p "$DEVLOOP_TEST_STATE" +printf '%s\\n' "$*" >> "$DEVLOOP_TEST_STATE/claude-args.log" +printf '%s\\n---\\n' "$prompt" >> "$DEVLOOP_TEST_STATE/claude-prompts.log" +if [[ "$prompt" == *"Output path:"* ]]; then + [[ -z "\${DEVLOOP_TEST_NO_REVIEW:-}" ]] || exit 0 + review_file=$(printf '%s\\n' "$prompt" | awk -F': ' '/^Output path: /{print $2; exit}') + count=$(( $(cat "$DEVLOOP_TEST_STATE/claude-review-count" 2>/dev/null || echo 0) + 1 )) + printf '%s\\n' "$count" > "$DEVLOOP_TEST_STATE/claude-review-count" + IFS=',' read -r -a verdicts <<< "\${DEVLOOP_TEST_VERDICTS:-ACCEPT}" + verdict="\${verdicts[$(( count <= \${#verdicts[@]} ? count - 1 : \${#verdicts[@]} - 1 ))]}" + mkdir -p "$(dirname "$review_file")" + { + printf '# Claude review %s\\n\\n' "$count" + [[ -n "\${DEVLOOP_TEST_NO_VERDICT:-}" ]] || printf 'Verdict: %s\\n\\n' "$verdict" + if [[ -z "\${DEVLOOP_TEST_NO_MATRIX:-}" ]]; then + printf '## Acceptance matrix\\n\\n' + printf -- '- AC1: PASS - mock evidence\\n\\n' + fi + printf '## Findings\\n\\n' + if [[ "$verdict" == "ACCEPT" ]]; then printf 'None\\n\\n'; else printf '1. [must-fix] devloop.ts:1 - repeated fixture finding. Root cause: mock review. Principle: deterministic retry behavior.\\n\\n'; fi + printf '## Missing tests\\n\\n- None\\n\\n## Fix instructions\\n\\n' + if [[ "$verdict" == "ACCEPT" ]]; then printf 'None\\n\\n'; else printf '1. Fix the repeated fixture finding.\\n\\n'; fi + printf '## Notes\\n\\n- None\\n' + } > "$review_file" +else + report_file=$(printf '%s\\n' "$prompt" | sed -n 's/^Write the report to \\([^ ]*\\).*/\\1/p' | head -n 1) + [[ -z "$report_file" ]] || { mkdir -p "$(dirname "$report_file")"; printf '# mock devloop report\\n' > "$report_file"; } +fi +`, + { mode: 0o755 }, + ); +} + +async function run(repo: string, overrides: Partial = {}) { + const events: Event[] = []; + const result = await runDevloop( + { spec: path.join(repo, ".specs/change.md"), max: 1, reportFormat: "html", strict: true, cwd: repo, ...overrides }, + { event: (event) => void events.push(event) }, + ); + return { result, events }; +} + +async function exists(file: string, expected = true) { + const ok = Boolean(await stat(file).catch(() => false)); + if (expected) expect(ok).toBe(true); + return ok; +} + +async function real(file: string) { + return realpath(file); +} diff --git a/tests/devloop_test.sh b/tests/devloop_test.sh index 4e06387..58ac515 100755 --- a/tests/devloop_test.sh +++ b/tests/devloop_test.sh @@ -2,497 +2,4 @@ set -euo pipefail ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) -DEVLOOP="$ROOT/devloop.sh" -TMP_ROOT=${TMPDIR:-/tmp} -TEST_TMP=$(mktemp -d "$TMP_ROOT/devloop-test.XXXXXX") - -total=0 -passed=0 - -cleanup() { - rm -rf "$TEST_TMP" -} -trap cleanup EXIT - -fail() { - printf 'FAIL: %s\n' "$*" >&2 - exit 1 -} - -assert_eq() { - local expected="$1" - local actual="$2" - local message="$3" - - [[ "$actual" == "$expected" ]] || fail "$message: expected '$expected', got '$actual'" -} - -assert_file_exists() { - local path="$1" - - [[ -f "$path" ]] || fail "expected file to exist: $path" -} - -assert_file_not_exists() { - local path="$1" - - [[ ! -e "$path" ]] || fail "expected file not to exist: $path" -} - -assert_contains() { - local needle="$1" - local path="$2" - - grep -Fq -- "$needle" "$path" || { - printf '%s\n' "--- $path ---" >&2 - sed -n '1,220p' "$path" >&2 || true - fail "expected '$path' to contain: $needle" - } -} - -assert_not_contains() { - local needle="$1" - local path="$2" - - ! grep -Fq -- "$needle" "$path" || fail "did not expect '$path' to contain: $needle" -} - -make_repo() { - local name="$1" - local spec_name="${2:-change.md}" - local repo="$TEST_TMP/$name/repo" - - mkdir -p "$repo/.specs" - git init -q "$repo" - git -C "$repo" symbolic-ref HEAD refs/heads/main - ( - cd "$repo" - git config user.email "devloop-test@example.com" - git config user.name "devloop test" - printf '# Fixture\n' > README.md - git add README.md - git commit -q -m init - ) - - cat > "$repo/.specs/$spec_name" <<'EOF' -# Fixture spec - -## Acceptance criteria -1. The loop runs deterministically under test. -EOF - - printf '%s\n' "$repo" -} - -install_mocks() { - local bin_dir="$1" - - mkdir -p "$bin_dir" - - cat > "$bin_dir/codex" <<'EOF' -#!/usr/bin/env bash -set -euo pipefail - -: "${DEVLOOP_TEST_STATE:?DEVLOOP_TEST_STATE is required}" -prompt=$(cat) -session_id="${DEVLOOP_TEST_CODEX_SESSION_ID:-00000000-0000-4000-8000-000000000001}" - -mkdir -p "$DEVLOOP_TEST_STATE" -count_file="$DEVLOOP_TEST_STATE/codex-count" -count=$(( $(cat "$count_file" 2>/dev/null || echo 0) + 1 )) -printf '%s\n' "$count" > "$count_file" -printf '%s\n' "$*" >> "$DEVLOOP_TEST_STATE/codex-args.log" -printf '%s\n---\n' "$prompt" >> "$DEVLOOP_TEST_STATE/codex-prompts.log" - -track=$(printf '%s\n' "$prompt" | awk -F': ' '/^Track: /{print $2; exit}') -if [[ -n "$track" ]]; then - { - printf '\n## Pass %s - mock codex\n' "$count" - printf -- '- changed files: fixture\n' - printf -- '- verification: fixture\n' - } >> "$track" -fi - -printf 'codex pass %s\n' "$count" -printf 'To continue this session, run codex exec resume %s\n' "$session_id" -EOF - - cat > "$bin_dir/claude" <<'EOF' -#!/usr/bin/env bash -set -euo pipefail - -: "${DEVLOOP_TEST_STATE:?DEVLOOP_TEST_STATE is required}" -prompt=$(cat) - -mkdir -p "$DEVLOOP_TEST_STATE" -total_file="$DEVLOOP_TEST_STATE/claude-total-count" -total=$(( $(cat "$total_file" 2>/dev/null || echo 0) + 1 )) -printf '%s\n' "$total" > "$total_file" -printf '%s\n' "$*" >> "$DEVLOOP_TEST_STATE/claude-args.log" -printf '%s\n---\n' "$prompt" >> "$DEVLOOP_TEST_STATE/claude-prompts.log" - -if [[ "$prompt" == *"Output path:"* ]]; then - review_file=$(printf '%s\n' "$prompt" | awk -F': ' '/^Output path: /{print $2; exit}') - review_count_file="$DEVLOOP_TEST_STATE/claude-review-count" - review_count=$(( $(cat "$review_count_file" 2>/dev/null || echo 0) + 1 )) - printf '%s\n' "$review_count" > "$review_count_file" - - IFS=',' read -r -a verdicts <<< "${DEVLOOP_TEST_VERDICTS:-ACCEPT}" - if (( review_count <= ${#verdicts[@]} )); then - verdict="${verdicts[$((review_count - 1))]}" - else - verdict="${verdicts[$((${#verdicts[@]} - 1))]}" - fi - - mkdir -p "$(dirname "$review_file")" - { - printf '# Claude review %s\n\n' "$review_count" - printf 'Verdict: %s\n\n' "$verdict" - printf '## Findings\n\n' - if [[ "$verdict" == "ACCEPT" ]]; then - printf 'None\n\n' - else - printf '1. [should-fix] devloop.sh:10 - repeated fixture finding. Root cause: mock review. Principle: deterministic retry behavior.\n\n' - fi - printf '## Missing tests\n\n' - printf -- '- None\n\n' - printf '## Fix instructions\n\n' - if [[ "$verdict" == "ACCEPT" ]]; then - printf 'None\n\n' - else - printf '1. Fix the repeated fixture finding.\n\n' - fi - printf '## Notes\n\n' - printf -- '- None\n' - } > "$review_file" -else - report_line=$(printf '%s\n' "$prompt" | awk '/^Write the report to /{print; exit}') - report_file="${report_line#Write the report to }" - report_file="${report_file%% in this structure.*}" - report_file="${report_file%% in this markdown structure.*}" - report_file="${report_file%% as valid standalone HTML.*}" - [[ -n "$report_file" ]] || exit 0 - mkdir -p "$(dirname "$report_file")" - { - printf '# mock devloop report\n\n' - printf 'Report synthesized by test double.\n' - } > "$report_file" -fi -EOF - - chmod +x "$bin_dir/codex" "$bin_dir/claude" -} - -run_devloop() { - local cwd="$1" - local stdout="$2" - local stderr="$3" - shift 3 - - set +e - (cd "$cwd" && "$BASH" "$DEVLOOP" "$@") >"$stdout" 2>"$stderr" - local rc=$? - set -e - return "$rc" -} - -test_usage_when_spec_missing() { - local out="$TEST_TMP/usage.out" - local err="$TEST_TMP/usage.err" - - set +e - "$BASH" "$DEVLOOP" >"$out" 2>"$err" - local rc=$? - set -e - - assert_eq 2 "$rc" "missing spec exit code" - assert_contains "usage: devloop.sh [--report-format html|markdown] [max=5]" "$err" - assert_not_contains "claude not on PATH" "$err" -} - -test_missing_claude_is_reported_before_git_setup() { - local work="$TEST_TMP/missing-claude" - local out="$work.out" - local err="$work.err" - local spec="$work/spec.md" - - mkdir -p "$work" - printf '# Spec\n' > "$spec" - - set +e - PATH="$work" "$BASH" "$DEVLOOP" "$spec" >"$out" 2>"$err" - local rc=$? - set -e - - assert_eq 2 "$rc" "missing claude exit code" - assert_contains "claude not on PATH" "$err" -} - -test_invalid_max_is_usage_error() { - local work="$TEST_TMP/invalid-max" - local out="$work.out" - local err="$work.err" - local spec="$work/spec.md" - - mkdir -p "$work" - printf '# Spec\n' > "$spec" - - set +e - "$BASH" "$DEVLOOP" "$spec" nope >"$out" 2>"$err" - local rc=$? - set -e - - assert_eq 2 "$rc" "invalid max exit code" - assert_contains "max must be an integer between 1 and 10" "$err" - assert_not_contains "unbound variable" "$err" -} - -test_report_alias_is_not_accepted() { - local work="$TEST_TMP/report-alias" - local out="$work.out" - local err="$work.err" - local spec="$work/spec.md" - - mkdir -p "$work" - printf '# Spec\n' > "$spec" - - set +e - "$BASH" "$DEVLOOP" --report markdown "$spec" >"$out" 2>"$err" - local rc=$? - set -e - - assert_eq 2 "$rc" "report alias exit code" - assert_contains "unknown option: --report" "$err" -} - -test_accept_writes_core_artifacts() { - local repo repo_real state bin out err rc - repo=$(make_repo "accept") - repo_real=$(cd "$repo" && pwd -P) - state="$TEST_TMP/accept/state" - bin="$TEST_TMP/accept/bin" - out="$TEST_TMP/accept.out" - err="$TEST_TMP/accept.err" - install_mocks "$bin" - - set +e - PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \ - run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 5 - rc=$? - set -e - - assert_eq 0 "$rc" "accepted loop exit code" - assert_contains "result: accepted" "$out" - assert_contains "passes: 1 / 5" "$out" - assert_file_exists "$repo/.codex/tracks/change.md" - assert_file_exists "$repo/.codex/reviews/change-r1.md" - assert_file_exists "$repo/.codex/reports/change.html" - assert_file_exists "$repo/.codex/sessions/change-codex.id" - assert_file_exists "$repo/.codex/sessions/change-claude.id" - assert_contains "Verdict: ACCEPT" "$repo/.codex/reviews/change-r1.md" - assert_contains "## Pass 1 - mock codex" "$repo/.codex/tracks/change.md" - assert_contains "- report-format: html" "$repo/.codex/tracks/change.md" - assert_contains "valid standalone HTML" "$state/claude-prompts.log" - assert_contains "3-5 sharp, transferable lessons" "$state/claude-prompts.log" - assert_contains "exec --dangerously-bypass-approvals-and-sandbox -C $repo_real -" "$state/codex-args.log" - assert_eq "00000000-0000-4000-8000-000000000001" "$(cat "$repo/.codex/sessions/change-codex.id")" "codex session id" - assert_eq 1 "$(grep -c -- '--session-id' "$state/claude-args.log")" "claude initial session count" - assert_eq 1 "$(grep -c -- '--resume' "$state/claude-args.log")" "claude report resume count" - assert_eq 1 "$(cat "$state/codex-count")" "codex call count" - assert_eq 1 "$(cat "$state/claude-review-count")" "claude review count" - assert_eq 2 "$(cat "$state/claude-total-count")" "claude total count including synthesis" -} - -test_reject_then_accept_runs_fix_pass() { - local repo state bin out err rc - repo=$(make_repo "reject-accept") - state="$TEST_TMP/reject-accept/state" - bin="$TEST_TMP/reject-accept/bin" - out="$TEST_TMP/reject-accept.out" - err="$TEST_TMP/reject-accept.err" - install_mocks "$bin" - - set +e - PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="REJECT,ACCEPT" \ - run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 3 - rc=$? - set -e - - assert_eq 0 "$rc" "reject then accept exit code" - assert_contains "passes: 2 / 3" "$out" - assert_contains "Verdict: REJECT" "$repo/.codex/reviews/change-r1.md" - assert_contains "Verdict: ACCEPT" "$repo/.codex/reviews/change-r2.md" - assert_contains "Fix only the findings in the review." "$state/codex-prompts.log" - assert_contains "Review: .codex/reviews/change-r1.md" "$state/codex-prompts.log" - assert_contains "exec resume --dangerously-bypass-approvals-and-sandbox 00000000-0000-4000-8000-000000000001 -" "$state/codex-args.log" - assert_eq 1 "$(grep -c -- '--session-id' "$state/claude-args.log")" "claude initial session count" - assert_eq 2 "$(grep -c -- '--resume' "$state/claude-args.log")" "claude resumed review and report count" - assert_eq 2 "$(cat "$state/codex-count")" "codex call count" - assert_eq 2 "$(cat "$state/claude-review-count")" "claude review count" -} - -test_spec_slug_with_spaces_preserves_prior_reviews() { - local repo state bin out err rc - repo=$(make_repo "space-spec" "change with spaces.md") - state="$TEST_TMP/space-spec/state" - bin="$TEST_TMP/space-spec/bin" - out="$TEST_TMP/space-spec.out" - err="$TEST_TMP/space-spec.err" - install_mocks "$bin" - - set +e - PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="REJECT,ACCEPT" \ - run_devloop "$repo" "$out" "$err" "$repo/.specs/change with spaces.md" 2 - rc=$? - set -e - - assert_eq 0 "$rc" "space slug loop exit code" - assert_file_exists "$repo/.codex/reviews/change with spaces-r1.md" - assert_file_exists "$repo/.codex/reviews/change with spaces-r2.md" - assert_contains "Prior reviews:" "$state/claude-prompts.log" - assert_contains "- .codex/reviews/change with spaces-r1.md" "$state/claude-prompts.log" - assert_contains "- .codex/reviews/change with spaces-r2.md" "$state/claude-prompts.log" - assert_contains "Review files:" "$state/claude-prompts.log" -} - -test_invocation_repo_controls_workdir_not_spec_location() { - local repo repo_real spec_repo spec_path state bin out err rc - repo=$(make_repo "invocation-repo") - repo_real=$(cd "$repo" && pwd -P) - spec_repo=$(make_repo "spec-repo") - spec_path=$(cd "$spec_repo/.specs" && pwd)/change.md - state="$TEST_TMP/invocation-repo/state" - bin="$TEST_TMP/invocation-repo/bin" - out="$TEST_TMP/invocation-repo.out" - err="$TEST_TMP/invocation-repo.err" - install_mocks "$bin" - - set +e - PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \ - run_devloop "$repo" "$out" "$err" "$spec_repo/.specs/change.md" 1 - rc=$? - set -e - - assert_eq 0 "$rc" "invocation repo exit code" - assert_file_exists "$repo/.codex/tracks/change.md" - assert_file_not_exists "$spec_repo/.codex" - assert_contains "- spec: $spec_path" "$repo/.codex/tracks/change.md" - assert_contains "- cwd: $repo_real" "$repo/.codex/tracks/change.md" - assert_contains "exec --dangerously-bypass-approvals-and-sandbox -C $repo_real -" "$state/codex-args.log" -} - -test_markdown_report_option() { - local repo state bin out err rc - repo=$(make_repo "markdown-report") - state="$TEST_TMP/markdown-report/state" - bin="$TEST_TMP/markdown-report/bin" - out="$TEST_TMP/markdown-report.out" - err="$TEST_TMP/markdown-report.err" - install_mocks "$bin" - - set +e - PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \ - run_devloop "$repo" "$out" "$err" --report-format markdown "$repo/.specs/change.md" 1 - rc=$? - set -e - - assert_eq 0 "$rc" "markdown report exit code" - assert_file_exists "$repo/.codex/reports/change.md" - assert_file_not_exists "$repo/.codex/reports/change.html" - assert_contains "report: .codex/reports/change.md" "$out" - assert_contains "- report-format: markdown" "$repo/.codex/tracks/change.md" - assert_contains "in this markdown structure" "$state/claude-prompts.log" -} - -test_repeated_reject_findings_stall_the_loop() { - local repo state bin out err rc - repo=$(make_repo "stall") - state="$TEST_TMP/stall/state" - bin="$TEST_TMP/stall/bin" - out="$TEST_TMP/stall.out" - err="$TEST_TMP/stall.err" - install_mocks "$bin" - - set +e - PATH="$bin:/usr/bin:/bin" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="REJECT,REJECT,REJECT" \ - run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 5 - rc=$? - set -e - - assert_eq 1 "$rc" "stalled loop exit code" - assert_contains "result: stalled" "$out" - assert_contains "passes: 2 / 5" "$out" - assert_file_exists "$repo/.codex/reviews/change-r1.md" - assert_file_exists "$repo/.codex/reviews/change-r2.md" - assert_eq 2 "$(cat "$state/codex-count")" "codex call count before stall" - assert_eq 2 "$(cat "$state/claude-review-count")" "claude review count before stall" -} - -test_max_is_clamped_to_one() { - local repo state bin out err rc - repo=$(make_repo "max-clamp") - state="$TEST_TMP/max-clamp/state" - bin="$TEST_TMP/max-clamp/bin" - out="$TEST_TMP/max-clamp.out" - err="$TEST_TMP/max-clamp.err" - install_mocks "$bin" - - set +e - PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \ - run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 0 - rc=$? - set -e - - assert_eq 0 "$rc" "max clamp exit code" - assert_contains "passes: 1 / 1" "$out" - assert_contains "- max: 1" "$repo/.codex/tracks/change.md" -} - -test_leading_zero_max_is_decimal() { - local repo state bin out err rc - repo=$(make_repo "leading-zero-max") - state="$TEST_TMP/leading-zero-max/state" - bin="$TEST_TMP/leading-zero-max/bin" - out="$TEST_TMP/leading-zero-max.out" - err="$TEST_TMP/leading-zero-max.err" - install_mocks "$bin" - - set +e - PATH="$bin:$PATH" DEVLOOP_TEST_STATE="$state" DEVLOOP_TEST_VERDICTS="ACCEPT" \ - run_devloop "$repo" "$out" "$err" "$repo/.specs/change.md" 08 - rc=$? - set -e - - assert_eq 0 "$rc" "leading zero max exit code" - assert_contains "passes: 1 / 8" "$out" - assert_contains "- max: 8" "$repo/.codex/tracks/change.md" -} - -run_test() { - local name="$1" - - total=$((total + 1)) - printf 'test %s ... ' "$name" - if ( "$name" ); then - passed=$((passed + 1)) - printf 'ok\n' - else - printf 'not ok\n' - return 1 - fi -} - -run_test test_usage_when_spec_missing -run_test test_missing_claude_is_reported_before_git_setup -run_test test_invalid_max_is_usage_error -run_test test_report_alias_is_not_accepted -run_test test_accept_writes_core_artifacts -run_test test_reject_then_accept_runs_fix_pass -run_test test_spec_slug_with_spaces_preserves_prior_reviews -run_test test_invocation_repo_controls_workdir_not_spec_location -run_test test_markdown_report_option -run_test test_repeated_reject_findings_stall_the_loop -run_test test_max_is_clamped_to_one -run_test test_leading_zero_max_is_decimal - -printf '\n%d/%d tests passed\n' "$passed" "$total" +exec bun test "$ROOT/tests/devloop.test.ts" diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..b1f1259 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "allowImportingTsExtensions": true, + "isolatedModules": true, + "module": "Preserve", + "moduleResolution": "Bundler", + "noEmit": true, + "skipLibCheck": true, + "strict": true, + "target": "ES2022", + "types": ["bun-types"] + }, + "include": ["src/**/*.ts", "tests/**/*.ts"] +} From c5401315dbda4aa1902965373ef2179e72f22fce Mon Sep 17 00:00:00 2001 From: satyaborg Date: Mon, 25 May 2026 17:39:26 +1000 Subject: [PATCH 02/11] chore: document bun devloop workflow --- .gitignore | 2 + README.md | 128 ++++++++++++++++++++++++----------------------------- 2 files changed, 60 insertions(+), 70 deletions(-) diff --git a/.gitignore b/.gitignore index f99b306..30726a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .codex/ .specs/ .DS_Store +coverage/ +node_modules/ diff --git a/README.md b/README.md index 1b87a4c..292c6e9 100644 --- a/README.md +++ b/README.md @@ -1,112 +1,100 @@ # devloop -Spec in, accepted code out. Codex implements, Claude reviews, loop until ACCEPT, stall, or max turns. One bash file. +Spec in, accepted code out. Codex implements, Claude reviews, loop until ACCEPT, stall, unclear, error, or max turns. +```sh +devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5] +bun src/cli.ts [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5] ``` -devloop.sh [--report-format html|markdown] path/to/spec.md [max=5] -``` - -## Why -Skills-as-orchestrators drift. The LLM driver has discretion to skip steps and often does, especially under load. A shell state machine cannot. devloop is the same workflow as the [`/devloop`](https://github.com/anthropics/claude-code) skill it replaces, minus the discretion. +## Defaults -What stays in the LLMs (because they are good at it): -- Codex: implementation, design decisions, fix passes -- Claude: review judgment, verdict, final synthesis +- strict mode is on +- HTML report output is on +- max turns defaults to 5 and is clamped to 1-10 +- interactive terminals use the OpenTUI view +- non-TTY runs use plain output +- accepted runs create a local `devloop/` branch and a local Conventional Commit -What moves to bash (because the LLM does not need discretion here): -- Sequencing the loop -- Spawning each agent headless -- Reusing one Codex implementation session and one Claude review session -- Parsing the verdict -- Detecting stalls -- File path conventions -- Stopping at max turns +Use `--plain` for CI or debugging. Use `--tui` to force the collapsed terminal UI. Use `--no-strict` only when you deliberately want to bypass strict acceptance-gate behavior. -## Quick start +## Strict Mode -Prereqs on PATH: `claude`, `codex`, `git`. - -```sh -# 1. write a spec -cat > .specs/add-foo-flag.md <<'EOF' -# Add foo flag to bar config +Strict mode requires the spec to contain: +```md ## Acceptance criteria 1. ... -EOF - -# 2. loop -./devloop.sh .specs/add-foo-flag.md ``` -Defaults to unattended (`--dangerously-bypass-approvals-and-sandbox` for codex, `--dangerously-skip-permissions` for claude). Run inside a git worktree, not your main checkout. +Codex is prompted to follow a regression-first lifecycle: tests first, red phase when behavior changes, smallest implementation, targeted tests, full tests, lint/typecheck, and 100% coverage when the target project exposes coverage tooling. -The implementation worktree is resolved from the directory where you invoke `devloop.sh`, not from the spec file's location. The spec can live elsewhere; Codex and Claude are pointed at the current worktree. +Claude must write an acceptance matrix: -## The loop +```md +## Acceptance matrix -``` -pass 1: codex implements against spec - claude reviews → ACCEPT | REJECT | UNCLEAR -pass N: codex fixes findings from review N-1 - claude reviews -exit: ACCEPT → 0 - stall | max | unclear → 1 - codex/claude error → 2 +- AC1: PASS - evidence ``` -Stall = normalized findings hash matches the prior REJECT. +`Verdict: ACCEPT` is only honored in strict mode when every parsed acceptance criterion has a passing matrix row. Missing evidence becomes `unclear`. -## Sessions +## Local Commit -Each spec slug gets two persisted sessions: +On `accepted`, devloop creates or reuses a local branch: +```text +devloop/ ``` -.codex/sessions/-codex.id -.codex/sessions/-claude.id + +It commits only files that were not already dirty when the run started, and it excludes `.codex/` artifacts from the commit. The generated commit message uses a Conventional Commit type: + +```text +feat: ``` -Pass 1 starts the Codex implementation session and records the resumable session ID. Later fix passes call `codex exec resume `, so Codex keeps the implementation context. Claude uses one review session for every review pass and the final report. +No push or PR is performed. ## Artifacts -``` -.codex/tracks/.md codex's running notes per pass -.codex/reviews/-r.md one per review turn -.codex/reports/.html synthesized post-mortem by default -.codex/reports/.md synthesized post-mortem with --report-format markdown -.codex/logs/ raw agent stdout for debugging +```text +.codex/tracks/.md +.codex/reviews/-r.md +.codex/reports/.html +.codex/reports/.md +.codex/logs/ +.codex/sessions/ ``` -## Tests +Report format stays deliberately narrow: ```sh -./tests/devloop_test.sh +devloop --report-format html .specs/change.md +devloop --report-format markdown .specs/change.md +devloop --md .specs/change.md ``` -The tests run the shell state machine against temporary git repos with mocked `codex` and `claude` commands, so they do not call either agent. +Reports include top-level metadata: result, passes, repository, spec, base branch, starting branch, final branch, local commit, commit message, Codex session ID, Claude session ID, track, and review files. -## The report +## Sessions -Not a mechanical concat. Claude is called one more time in the same review session with the spec + track + all reviews and writes a learning-oriented post-mortem: +Each spec slug gets one Codex session and one Claude session: -- **Shape of the problem** — what the spec really asked for, alternatives ruled out -- **What was built** — design choices and the tradeoffs weighed -- **What review caught (and why it mattered)** — symptom → root cause → principle violated, grouped into patterns -- **What to remember next time** — transferable lessons in `When X, prefer Y because Z` form -- **Residual risk** — concrete, not generic +```text +.codex/sessions/-codex.id +.codex/sessions/-claude.id +``` -The "why" is enforced in the prompts: codex must explain decisions, claude must articulate the principle behind each finding ("if you cannot articulate the principle, the finding is too shallow — drop it or sharpen it"). +Pass 1 starts the sessions. Later fix passes resume them. -## Caveats +## Development -- **Unattended = trusts both agents.** Use worktrees. -- **Sessions persist per spec slug.** Delete the matching files in `.codex/sessions/` when you want a fresh Codex or Claude context for the same spec filename. -- **No spec writing.** Deliberate. Write the spec yourself (or via an interview skill) and hand the path in. -- **Stall detection is hash-based.** Cosmetic rewording of identical findings will defeat it. -- **Base branch is auto-guessed** (`origin/HEAD` → `main` → `master`). Edit the `BASE=` line for stacked branches. +Prereqs: `bun`, `codex`, `claude`, `git`. -## License +```sh +bun install +bun run typecheck +bun test +``` -MIT +`bun test` enforces 100% line/function/statement coverage for the TypeScript core. From a84cab7118b67d27e78e991a2571fc9c853b33b2 Mon Sep 17 00:00:00 2001 From: satyaborg Date: Mon, 25 May 2026 17:57:58 +1000 Subject: [PATCH 03/11] chore: clarify readme --- README.md | 64 +++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 292c6e9..975dcd7 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,36 @@ # devloop -Spec in, accepted code out. Codex implements, Claude reviews, loop until ACCEPT, stall, unclear, error, or max turns. +Codex implements. Claude reviews. devloop runs the loop until the work is accepted, stalls, becomes unclear, hits max turns, or an agent fails. ```sh -devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5] -bun src/cli.ts [--plain|--tui] [--no-strict] [--report-format html|markdown] path/to/spec.md [max=5] +devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] spec.md [max=5] ``` +Run from the target git worktree. The spec may live anywhere. + ## Defaults - strict mode is on -- HTML report output is on -- max turns defaults to 5 and is clamped to 1-10 -- interactive terminals use the OpenTUI view +- HTML reports are on +- max turns default to 5 and clamp to 1-10 +- TTY runs use the collapsed OpenTUI view - non-TTY runs use plain output -- accepted runs create a local `devloop/` branch and a local Conventional Commit +- accepted runs create a local branch and local commit -Use `--plain` for CI or debugging. Use `--tui` to force the collapsed terminal UI. Use `--no-strict` only when you deliberately want to bypass strict acceptance-gate behavior. +Use `--plain` for CI. Use `--tui` to force the TUI. Use `--no-strict` only when you explicitly want weaker gates. -## Strict Mode +## Strict Acceptance -Strict mode requires the spec to contain: +Strict mode requires: ```md ## Acceptance criteria 1. ... ``` -Codex is prompted to follow a regression-first lifecycle: tests first, red phase when behavior changes, smallest implementation, targeted tests, full tests, lint/typecheck, and 100% coverage when the target project exposes coverage tooling. +Codex is prompted to work regression-first: add or update tests, observe the red phase when behavior changes, implement the smallest fix, then run targeted tests, full tests, lint/typecheck, and coverage. -Claude must write an acceptance matrix: +Claude must write: ```md ## Acceptance matrix @@ -37,25 +38,9 @@ Claude must write an acceptance matrix: - AC1: PASS - evidence ``` -`Verdict: ACCEPT` is only honored in strict mode when every parsed acceptance criterion has a passing matrix row. Missing evidence becomes `unclear`. - -## Local Commit +In strict mode, `Verdict: ACCEPT` only counts when every parsed criterion has a `PASS` matrix row. Missing evidence exits as `unclear`. -On `accepted`, devloop creates or reuses a local branch: - -```text -devloop/ -``` - -It commits only files that were not already dirty when the run started, and it excludes `.codex/` artifacts from the commit. The generated commit message uses a Conventional Commit type: - -```text -feat: -``` - -No push or PR is performed. - -## Artifacts +## Output ```text .codex/tracks/.md @@ -66,7 +51,7 @@ No push or PR is performed. .codex/sessions/ ``` -Report format stays deliberately narrow: +Reports can be HTML or Markdown: ```sh devloop --report-format html .specs/change.md @@ -74,18 +59,23 @@ devloop --report-format markdown .specs/change.md devloop --md .specs/change.md ``` -Reports include top-level metadata: result, passes, repository, spec, base branch, starting branch, final branch, local commit, commit message, Codex session ID, Claude session ID, track, and review files. +Reports include result, passes, repo, spec, base branch, starting branch, final branch, local commit, commit message, Codex session ID, Claude session ID, track, and review files. + +## Local Commit + +On `accepted`, devloop creates or reuses: -## Sessions +```text +devloop/ +``` -Each spec slug gets one Codex session and one Claude session: +It commits only files that were clean when the run started and excludes `.codex/`. Commit messages are Conventional Commit style: ```text -.codex/sessions/-codex.id -.codex/sessions/-claude.id +feat: ``` -Pass 1 starts the sessions. Later fix passes resume them. +devloop does not push or open a PR. ## Development From cfbfea0bcffaee11310178f76f4a72a73557eb37 Mon Sep 17 00:00:00 2001 From: satyaborg Date: Mon, 25 May 2026 18:06:21 +1000 Subject: [PATCH 04/11] chore: add local installer --- README.md | 12 +++++++++++- package.json | 1 + scripts/install.ts | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 scripts/install.ts diff --git a/README.md b/README.md index 975dcd7..40194d5 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,16 @@ devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] spec.md [m Run from the target git worktree. The spec may live anywhere. +## Install + +From this checkout: + +```sh +bun scripts/install.ts +``` + +That installs dependencies and links `devloop` into `~/.local/bin`. Set `DEVLOOP_BIN_DIR` to choose another bin directory. + ## Defaults - strict mode is on @@ -82,7 +92,7 @@ devloop does not push or open a PR. Prereqs: `bun`, `codex`, `claude`, `git`. ```sh -bun install +bun scripts/install.ts bun run typecheck bun test ``` diff --git a/package.json b/package.json index e2061ac..2c2799f 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "devloop": "./src/cli.ts" }, "scripts": { + "install:local": "bun scripts/install.ts", "test": "bun test", "typecheck": "tsc --noEmit" }, diff --git a/scripts/install.ts b/scripts/install.ts new file mode 100644 index 0000000..34ab3ea --- /dev/null +++ b/scripts/install.ts @@ -0,0 +1,42 @@ +#!/usr/bin/env bun +import { chmod, mkdir, readlink, rm, symlink } from "node:fs/promises"; +import { homedir } from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const cli = path.join(root, "src", "cli.ts"); +const binDir = process.env.DEVLOOP_BIN_DIR ?? path.join(homedir(), ".local", "bin"); +const link = path.join(binDir, "devloop"); + +await run(["bun", "install"], root); +await mkdir(binDir, { recursive: true }); +await chmod(cli, 0o755); + +const existing = await readlink(link).catch(() => ""); +if (existing && path.resolve(binDir, existing) === cli) { + console.log(`devloop already points to ${cli}`); +} else { + await rm(link, { force: true }); + await symlink(cli, link); + console.log(`installed devloop -> ${cli}`); +} + +if (!pathInEnv(binDir)) { + console.log(""); + console.log(`${binDir} is not on PATH. Add this to ~/.zshrc:`); + console.log(`export PATH="${binDir}:$PATH"`); +} + +console.log(""); +console.log("try: devloop --help"); + +async function run(cmd: string[], cwd: string) { + const proc = Bun.spawn(cmd, { cwd, stdout: "inherit", stderr: "inherit" }); + const code = await proc.exited; + if (code !== 0) process.exit(code); +} + +function pathInEnv(dir: string) { + return (process.env.PATH ?? "").split(path.delimiter).some((entry) => path.resolve(entry) === dir); +} From 4e551816a2446ac041100b22c988eaa0e3e5a398 Mon Sep 17 00:00:00 2001 From: satyaborg Date: Mon, 25 May 2026 18:24:45 +1000 Subject: [PATCH 05/11] feat: show default cli welcome --- README.md | 1 + src/cli.ts | 7 ++++++- src/devloop.ts | 31 +++++++++++++++++++++++++++++++ src/tui.ts | 12 ++---------- tests/devloop.test.ts | 9 ++++++++- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 40194d5..9bc5974 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ That installs dependencies and links `devloop` into `~/.local/bin`. Set `DEVLOOP - TTY runs use the collapsed OpenTUI view - non-TTY runs use plain output - accepted runs create a local branch and local commit +- no-arg `devloop` shows the logo and common commands Use `--plain` for CI. Use `--tui` to force the TUI. Use `--no-strict` only when you explicitly want weaker gates. diff --git a/src/cli.ts b/src/cli.ts index 4dfe73b..38c17c0 100755 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,8 +1,13 @@ #!/usr/bin/env bun -import { parseArgs, runDevloop, type Event, type Sink } from "./devloop.ts"; +import { parseArgs, runDevloop, welcome, type Event, type Sink } from "./devloop.ts"; import { createTuiSink } from "./tui.ts"; const argv = process.argv.slice(2); +if (argv.length === 0 || argv.includes("-h") || argv.includes("--help")) { + console.log(welcome()); + process.exit(0); +} + const parsed = parseArgs(argv); if (typeof parsed === "string") { diff --git a/src/devloop.ts b/src/devloop.ts index f5b952f..ea49be3 100644 --- a/src/devloop.ts +++ b/src/devloop.ts @@ -48,6 +48,37 @@ export type Sink = { event(event: Event): void | Promise; close?(): void | type RunResult = { code: number; output: string }; type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise; +export const LOGO = [ + " ▐▌▗▞▀▚▖▄ ▄ █ ▄▄▄ ▄▄▄ ▄▄▄▄ ", + " ▐▌▐▛▀▀▘█ █ █ █ █ █ █ █ █ ", + "▗▞▀▜▌▝▚▄▄▖ ▀▄▀ █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ", + "▝▚▄▟▌ █ █ ", + " ▀", +].join("\n"); + +export function welcome() { + return `${LOGO} + +devloop runs a strict Codex implement -> Claude review loop. + +Usage: + devloop [options] [max=5] + +Common commands: + devloop .specs/change.md + devloop --tui .specs/change.md + devloop --plain .specs/change.md + devloop --report-format markdown .specs/change.md 3 + bun scripts/install.ts + +Options: + --tui force the collapsed TUI + --plain force plain output + --report-format html|markdown choose report format + --no-strict weaken acceptance gates + -h, --help show this screen`; +} + export function parseArgs(argv: string[], cwd = process.cwd()): Options | string { let reportFormat: ReportFormat = "html"; let strict = true; diff --git a/src/tui.ts b/src/tui.ts index 0403fe2..abef745 100644 --- a/src/tui.ts +++ b/src/tui.ts @@ -1,15 +1,7 @@ -import type { Event, Result, Sink } from "./devloop.ts"; +import { LOGO, type Event, type Result, type Sink } from "./devloop.ts"; type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean }; -const LOGO = [ - " ▐▌▗▞▀▚▖▄ ▄ █ ▄▄▄ ▄▄▄ ▄▄▄▄ ", - " ▐▌▐▛▀▀▘█ █ █ █ █ █ █ █ █ ", - "▗▞▀▜▌▝▚▄▄▖ ▀▄▀ █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ", - "▝▚▄▟▌ █ █ ", - " ▀", -]; - export async function createTuiSink(): Promise { const { TextRenderable, createCliRenderer } = await import("@opentui/core"); const renderer = await createCliRenderer({ exitOnCtrlC: true, consoleMode: "disabled", screenMode: "alternate-screen" }); @@ -56,7 +48,7 @@ export function view(rows: Row[], selected: number, result?: Result) { return item.open ? [head, ...item.lines.slice(-80).map((line) => ` ${line}`)] : [head]; }); const tail = result ? ["", `result: ${result.status}`, `passes: ${result.passes} / ${result.max}`, `branch: ${result.branch}`, `commit: ${result.commit || "none"}`, `report: ${result.report}`, `track: ${result.track}`] : ["", "enter toggles logs, j/k moves"]; - return [...LOGO, "", ...body, ...tail].join("\n"); + return [LOGO, "", ...body, ...tail].join("\n"); } function row(rows: Row[], id: string) { diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts index b1bd610..0a93651 100644 --- a/tests/devloop.test.ts +++ b/tests/devloop.test.ts @@ -2,7 +2,7 @@ import { afterAll, beforeEach, describe, expect, test } from "bun:test"; import { mkdtemp, readFile, realpath, rm, stat, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import path from "node:path"; -import { parseArgs, parseCriteria, parseVerdict, runDevloop, type Event, type Options } from "../src/devloop.ts"; +import { parseArgs, parseCriteria, parseVerdict, runDevloop, welcome, type Event, type Options } from "../src/devloop.ts"; const root = await mkdtemp(path.join(tmpdir(), "devloop-test.")); let oldPath = process.env.PATH ?? ""; @@ -40,6 +40,13 @@ describe("parsing", () => { expect(parseCriteria("# Spec")).toEqual([]); expect(parseVerdict("Verdict: ACCEPT\n")).toBe("ACCEPT"); }); + + test("renders a useful default screen", () => { + expect(welcome()).toContain("▐▌▗▞▀▚▖"); + expect(welcome()).toContain("Common commands:"); + expect(welcome()).toContain("devloop .specs/change.md"); + expect(welcome()).toContain("bun scripts/install.ts"); + }); }); describe("loop", () => { From 593a0d31af1f88f465312e1928d114e58004dbd4 Mon Sep 17 00:00:00 2001 From: satyaborg Date: Tue, 26 May 2026 12:54:40 +1000 Subject: [PATCH 06/11] chore: update devloop logo --- src/devloop.ts | 11 ++++++----- tests/devloop.test.ts | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/devloop.ts b/src/devloop.ts index ea49be3..7245e00 100644 --- a/src/devloop.ts +++ b/src/devloop.ts @@ -49,11 +49,12 @@ type RunResult = { code: number; output: string }; type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise; export const LOGO = [ - " ▐▌▗▞▀▚▖▄ ▄ █ ▄▄▄ ▄▄▄ ▄▄▄▄ ", - " ▐▌▐▛▀▀▘█ █ █ █ █ █ █ █ █ ", - "▗▞▀▜▌▝▚▄▄▖ ▀▄▀ █ ▀▄▄▄▀ ▀▄▄▄▀ █▄▄▄▀ ", - "▝▚▄▟▌ █ █ ", - " ▀", + " __ __ ", + " ____/ /__ _ __/ /___ ____ ____ ", + " / __ / _ \\ | / / / __ \\/ __ \\/ __ \\", + "/ /_/ / __/ |/ / / /_/ / /_/ / /_/ /", + "\\__,_/\\___/|___/_/\\____/\\____/ .___/ ", + " /_/", ].join("\n"); export function welcome() { diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts index 0a93651..b0e64f1 100644 --- a/tests/devloop.test.ts +++ b/tests/devloop.test.ts @@ -42,7 +42,7 @@ describe("parsing", () => { }); test("renders a useful default screen", () => { - expect(welcome()).toContain("▐▌▗▞▀▚▖"); + expect(welcome()).toContain("____/ /__"); expect(welcome()).toContain("Common commands:"); expect(welcome()).toContain("devloop .specs/change.md"); expect(welcome()).toContain("bun scripts/install.ts"); From 95b9e333d83d3b50dc5d09a233c90385e3b1d7fd Mon Sep 17 00:00:00 2001 From: satyaborg Date: Tue, 26 May 2026 13:06:45 +1000 Subject: [PATCH 07/11] chore: format devloop source --- src/devloop.ts | 482 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 414 insertions(+), 68 deletions(-) diff --git a/src/devloop.ts b/src/devloop.ts index 7245e00..d65293d 100644 --- a/src/devloop.ts +++ b/src/devloop.ts @@ -43,10 +43,19 @@ export type Event = | { type: "done"; id: string; ok: boolean; detail: string } | { type: "result"; result: Result }; -export type Sink = { event(event: Event): void | Promise; close?(): void | Promise }; +export type Sink = { + event(event: Event): void | Promise; + close?(): void | Promise; +}; type RunResult = { code: number; output: string }; -type Runner = (cmd: string, args: string[], input?: string, log?: string, id?: string) => Promise; +type Runner = ( + cmd: string, + args: string[], + input?: string, + log?: string, + id?: string, +) => Promise; export const LOGO = [ " __ __ ", @@ -80,7 +89,10 @@ Options: -h, --help show this screen`; } -export function parseArgs(argv: string[], cwd = process.cwd()): Options | string { +export function parseArgs( + argv: string[], + cwd = process.cwd(), +): Options | string { let reportFormat: ReportFormat = "html"; let strict = true; let spec = ""; @@ -91,7 +103,8 @@ export function parseArgs(argv: string[], cwd = process.cwd()): Options | string const arg = argv[i]!; if (arg === "--report-format") { const value = argv[++i]; - if (value !== "html" && value !== "markdown" && value !== "md") return usage(); + if (value !== "html" && value !== "markdown" && value !== "md") + return usage(); reportFormat = value === "md" ? "markdown" : value; } else if (arg === "--html") reportFormat = "html"; else if (arg === "--markdown" || arg === "--md") reportFormat = "markdown"; @@ -108,8 +121,15 @@ export function parseArgs(argv: string[], cwd = process.cwd()): Options | string } if (!spec) return usage(); - if (!/^[+-]?\d+$/.test(maxRaw)) return "max must be an integer between 1 and 10"; - return { spec, max: clamp(Number.parseInt(maxRaw, 10), 1, 10), reportFormat, strict, cwd }; + if (!/^[+-]?\d+$/.test(maxRaw)) + return "max must be an integer between 1 and 10"; + return { + spec, + max: clamp(Number.parseInt(maxRaw, 10), 1, 10), + reportFormat, + strict, + cwd, + }; } export function usage() { @@ -118,7 +138,9 @@ export function usage() { export function parseCriteria(markdown: string): string[] { const lines = markdown.split(/\r?\n/); - const start = lines.findIndex((line) => /^##\s+acceptance criteria\s*$/i.test(line.trim())); + const start = lines.findIndex((line) => + /^##\s+acceptance criteria\s*$/i.test(line.trim()), + ); if (start < 0) return []; const body = lines.slice(start + 1); const end = body.findIndex((line) => /^##\s+/.test(line)); @@ -129,18 +151,22 @@ export function parseCriteria(markdown: string): string[] { } export function parseVerdict(review: string): Verdict | "" { - return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as Verdict | ""; + return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as + | Verdict + | ""; } export function hasPassingMatrix(review: string, count: number) { if (!/^## Acceptance matrix\s*$/m.test(review)) return false; - return Array.from({ length: count }, (_, i) => new RegExp(`^-\\s*AC${i + 1}:\\s*PASS\\b`, "mi")).every((r) => - r.test(review), - ); + return Array.from( + { length: count }, + (_, i) => new RegExp(`^-\\s*AC${i + 1}:\\s*PASS\\b`, "mi"), + ).every((r) => r.test(review)); } export function findingsHash(review: string) { - const body = review.match(/^## Findings\s*\n([\s\S]*?)(?:\n##\s+|$)/m)?.[1] ?? ""; + const body = + review.match(/^## Findings\s*\n([\s\S]*?)(?:\n##\s+|$)/m)?.[1] ?? ""; const normalized = body .replace(/\d+/g, "") .replace(/[ \t\r\n]+/g, " ") @@ -152,27 +178,56 @@ export function findingsHash(review: string) { return createHash("sha256").update(normalized).digest("hex"); } -export async function runDevloop(options: Options, sink: Sink = { event: () => {} }): Promise { +export async function runDevloop( + options: Options, + sink: Sink = { event: () => {} }, +): Promise { const spec = await absoluteFile(options.spec, options.cwd); const specText = await readFile(spec, "utf8"); const criteria = parseCriteria(specText); - if (options.strict && criteria.length === 0) throw new Error("strict mode requires ## Acceptance criteria"); - await sink.event({ type: "gate", name: "acceptance criteria", ok: criteria.length > 0, detail: `${criteria.length} found` }); - - const repo = (await command("git", ["-C", options.cwd, "rev-parse", "--show-toplevel"])).trim(); - const branch = (await command("git", ["-C", repo, "rev-parse", "--abbrev-ref", "HEAD"])).trim(); + if (options.strict && criteria.length === 0) + throw new Error("strict mode requires ## Acceptance criteria"); + await sink.event({ + type: "gate", + name: "acceptance criteria", + ok: criteria.length > 0, + detail: `${criteria.length} found`, + }); + + const repo = ( + await command("git", ["-C", options.cwd, "rev-parse", "--show-toplevel"]) + ).trim(); + const branch = ( + await command("git", ["-C", repo, "rev-parse", "--abbrev-ref", "HEAD"]) + ).trim(); const base = await baseBranch(repo); const initialDirty = await statusPaths(repo); const slug = path.basename(spec, ".md"); - const dirs = [".codex/tracks", ".codex/reviews", ".codex/reports", ".codex/logs", ".codex/sessions"]; - await Promise.all(dirs.map((dir) => mkdir(path.join(repo, dir), { recursive: true }))); + const dirs = [ + ".codex/tracks", + ".codex/reviews", + ".codex/reports", + ".codex/logs", + ".codex/sessions", + ]; + await Promise.all( + dirs.map((dir) => mkdir(path.join(repo, dir), { recursive: true })), + ); const track = `.codex/tracks/${slug}.md`; const report = `.codex/reports/${slug}.${options.reportFormat === "html" ? "html" : "md"}`; const codexSession = `.codex/sessions/${slug}-codex.id`; const claudeSession = `.codex/sessions/${slug}-claude.id`; const runner = makeRunner(repo, sink); - await initTrack(path.join(repo, track), { spec, cwd: options.cwd, base, branch, max: options.max, reportFormat: options.reportFormat, strict: options.strict }); + await initTrack(path.join(repo, track), { + spec, + cwd: options.cwd, + base, + branch, + max: options.max, + reportFormat: options.reportFormat, + strict: options.strict, + }); let status: Status = "max-turns"; let prior = ""; @@ -184,9 +239,31 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => { for (pass = 1; pass <= options.max; pass++) { const codexLog = `.codex/logs/${slug}-r${pass}-codex.log`; const codexId = `codex-${pass}`; - await sink.event({ type: "step", id: codexId, title: `pass ${pass}/${options.max} codex` }); - const codex = await runCodex(runner, repo, path.join(repo, codexSession), path.join(repo, codexLog), codexPrompt({ spec, track, pass, strict: options.strict, previous: `.codex/reviews/${slug}-r${pass - 1}.md`, criteria })); - await sink.event({ type: "done", id: codexId, ok: codex, detail: codex ? "completed" : "failed" }); + await sink.event({ + type: "step", + id: codexId, + title: `pass ${pass}/${options.max} codex`, + }); + const codex = await runCodex( + runner, + repo, + path.join(repo, codexSession), + path.join(repo, codexLog), + codexPrompt({ + spec, + track, + pass, + strict: options.strict, + previous: `.codex/reviews/${slug}-r${pass - 1}.md`, + criteria, + }), + ); + await sink.event({ + type: "done", + id: codexId, + ok: codex, + detail: codex ? "completed" : "failed", + }); if (!codex) { status = "codex-error"; break; @@ -195,9 +272,33 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => { const review = `.codex/reviews/${slug}-r${pass}.md`; const claudeLog = `.codex/logs/${slug}-r${pass}-claude.log`; const claudeId = `claude-${pass}`; - await sink.event({ type: "step", id: claudeId, title: `pass ${pass}/${options.max} claude review` }); - const ok = await runClaude(runner, repo, path.join(repo, claudeSession), path.join(repo, claudeLog), reviewPrompt({ spec, track, base, pass, output: review, priors: listReviews(slug, pass, options.max), criteria, strict: options.strict })); - await sink.event({ type: "done", id: claudeId, ok, detail: ok ? "completed" : "failed" }); + await sink.event({ + type: "step", + id: claudeId, + title: `pass ${pass}/${options.max} claude review`, + }); + const ok = await runClaude( + runner, + repo, + path.join(repo, claudeSession), + path.join(repo, claudeLog), + reviewPrompt({ + spec, + track, + base, + pass, + output: review, + priors: listReviews(slug, pass, options.max), + criteria, + strict: options.strict, + }), + ); + await sink.event({ + type: "done", + id: claudeId, + ok, + detail: ok ? "completed" : "failed", + }); if (!ok) { status = "claude-error"; break; @@ -211,9 +312,17 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => { break; } const verdict = parseVerdict(reviewText); - await sink.event({ type: "gate", name: `pass ${pass} verdict`, ok: verdict === "ACCEPT", detail: verdict || "MISSING" }); + await sink.event({ + type: "gate", + name: `pass ${pass} verdict`, + ok: verdict === "ACCEPT", + detail: verdict || "MISSING", + }); if (verdict === "ACCEPT") { - status = options.strict && !hasPassingMatrix(reviewText, criteria.length) ? "unclear" : "accepted"; + status = + options.strict && !hasPassingMatrix(reviewText, criteria.length) + ? "unclear" + : "accepted"; break; } if (verdict === "UNCLEAR") { @@ -236,23 +345,69 @@ export async function runDevloop(options: Options, sink: Sink = { event: () => { if (pass > options.max) pass = options.max; if (status === "accepted") { const commitId = "commit"; - await sink.event({ type: "step", id: commitId, title: "local branch and commit" }); - const committed = await commitAccepted(repo, slug, initialDirty).catch(() => undefined); + await sink.event({ + type: "step", + id: commitId, + title: "local branch and commit", + }); + const committed = await commitAccepted(repo, slug, initialDirty).catch( + () => undefined, + ); if (committed) { finalBranch = committed.branch; commit = committed.commit; commitMessage = committed.message; - await sink.event({ type: "done", id: commitId, ok: true, detail: commit ? `${finalBranch} ${commit}` : `${finalBranch} no changes` }); + await sink.event({ + type: "done", + id: commitId, + ok: true, + detail: commit + ? `${finalBranch} ${commit}` + : `${finalBranch} no changes`, + }); } else { status = "commit-error"; - await sink.event({ type: "done", id: commitId, ok: false, detail: "failed" }); + await sink.event({ + type: "done", + id: commitId, + ok: false, + detail: "failed", + }); } } const codexSessionId = await readLine(path.join(repo, codexSession)); const claudeSessionId = await readLine(path.join(repo, claudeSession)); - await synthesizeReport(runner, repo, { slug, spec, track, report, status, pass, max: options.max, base, initialBranch: branch, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId, format: options.reportFormat, reviews: listReviews(slug, pass, options.max) }); - const result = { status, passes: pass, max: options.max, report, track, branch: finalBranch, commit, commitMessage, codexSessionId, claudeSessionId }; + await synthesizeReport(runner, repo, { + slug, + spec, + track, + report, + status, + pass, + max: options.max, + base, + initialBranch: branch, + branch: finalBranch, + commit, + commitMessage, + codexSessionId, + claudeSessionId, + format: options.reportFormat, + reviews: listReviews(slug, pass, options.max), + }); + const result = { + status, + passes: pass, + max: options.max, + report, + track, + branch: finalBranch, + commit, + commitMessage, + codexSessionId, + claudeSessionId, + }; await sink.event({ type: "result", result }); return result; } @@ -265,7 +420,11 @@ async function absoluteFile(file: string, cwd: string) { async function command(cmd: string, args: string[]) { const proc = Bun.spawn([cmd, ...args], { stdout: "pipe", stderr: "pipe" }); - const [out, err, code] = await Promise.all([new Response(proc.stdout).text(), new Response(proc.stderr).text(), proc.exited]); + const [out, err, code] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + proc.exited, + ]); if (code !== 0) throw new Error(err.trim() || `${cmd} failed`); return out; } @@ -276,9 +435,13 @@ async function baseBranch(repo: string) { ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/main"], ["-C", repo, "show-ref", "--verify", "-q", "refs/heads/master"], ]) { - const proc = Bun.spawn(["git", ...args], { stdout: "pipe", stderr: "pipe" }); + const proc = Bun.spawn(["git", ...args], { + stdout: "pipe", + stderr: "pipe", + }); if ((await proc.exited) === 0) { - if (args[2] === "symbolic-ref") return (await new Response(proc.stdout).text()).trim().replace(/^origin\//, ""); + if (args[2] === "symbolic-ref") + return (await new Response(proc.stdout).text()).trim().replace(/^origin\//, ""); return args.at(-1)!.split("/").pop()!; } } @@ -286,7 +449,14 @@ async function baseBranch(repo: string) { } async function statusPaths(repo: string) { - const out = await command("git", ["-C", repo, "status", "--porcelain=v1", "-z", "--untracked-files=all"]); + const out = await command("git", [ + "-C", + repo, + "status", + "--porcelain=v1", + "-z", + "--untracked-files=all", + ]); const parts = out.split("\0").filter(Boolean); const paths = new Set(); for (let i = 0; i < parts.length; i++) { @@ -302,21 +472,49 @@ async function statusPaths(repo: string) { return paths; } -async function commitAccepted(repo: string, slug: string, initialDirty: Set) { - const current = (await command("git", ["-C", repo, "branch", "--show-current"])).trim(); +async function commitAccepted( + repo: string, + slug: string, + initialDirty: Set, +) { + const current = ( + await command("git", ["-C", repo, "branch", "--show-current"]) + ).trim(); const branch = await nextBranch(repo, slug, current); const message = `feat: ${slugify(slug)}`; - if (branch !== current) await command("git", ["-C", repo, "switch", "-c", branch]); - const changed = [...(await statusPaths(repo))].filter((file) => !initialDirty.has(file) && !file.startsWith(".codex/")); + if (branch !== current) + await command("git", ["-C", repo, "switch", "-c", branch]); + const changed = [...(await statusPaths(repo))].filter( + (file) => !initialDirty.has(file) && !file.startsWith(".codex/"), + ); if (changed.length === 0) return { branch, commit: "", message }; await command("git", ["-C", repo, "add", "--", ...changed]); - await command("git", ["-C", repo, "commit", "--only", "-m", message, "--", ...changed]); - return { branch, commit: (await command("git", ["-C", repo, "rev-parse", "--short", "HEAD"])).trim(), message }; + await command("git", [ + "-C", + repo, + "commit", + "--only", + "-m", + message, + "--", + ...changed, + ]); + return { + branch, + commit: ( + await command("git", ["-C", repo, "rev-parse", "--short", "HEAD"]) + ).trim(), + message, + }; } async function nextBranch(repo: string, slug: string, current: string) { const base = `devloop/${slugify(slug)}`; - if (current === base || new RegExp(`^${escapeRegex(base)}-\\d+$`).test(current)) return current; + if ( + current === base || + new RegExp(`^${escapeRegex(base)}-\\d+$`).test(current) + ) + return current; let suffix = 1; let branch = base; while (await branchExists(repo, branch)) { @@ -327,7 +525,15 @@ async function nextBranch(repo: string, slug: string, current: string) { } async function branchExists(repo: string, branch: string) { - const proc = Bun.spawn(["git", "-C", repo, "show-ref", "--verify", "--quiet", `refs/heads/${branch}`]); + const proc = Bun.spawn([ + "git", + "-C", + repo, + "show-ref", + "--verify", + "--quiet", + `refs/heads/${branch}`, + ]); return (await proc.exited) === 0; } @@ -335,7 +541,13 @@ function makeRunner(cwd: string, sink: Sink): Runner { return async (cmd, args, input = "", log, id) => { let proc: Bun.Subprocess<"pipe", "pipe", "pipe">; try { - proc = Bun.spawn([cmd, ...args], { cwd, stdin: "pipe", stdout: "pipe", stderr: "pipe", env: Bun.env }); + proc = Bun.spawn([cmd, ...args], { + cwd, + stdin: "pipe", + stdout: "pipe", + stderr: "pipe", + env: Bun.env, + }); } catch (error) { const output = error instanceof Error ? error.message : String(error); if (log) await writeFile(log, output); @@ -356,17 +568,34 @@ function makeRunner(cwd: string, sink: Sink): Runner { pending += text; const lines = pending.split(/\r?\n/); pending = lines.pop() ?? ""; - if (id) for (const line of lines.filter(Boolean)) await sink.event({ type: "log", id, line }); + if (id) + for (const line of lines.filter(Boolean)) + await sink.event({ type: "log", id, line }); } if (id && pending) await sink.event({ type: "log", id, line: pending }); }; - const [, , code] = await Promise.all([pump(proc.stdout), pump(proc.stderr), proc.exited]); + const [, , code] = await Promise.all([ + pump(proc.stdout), + pump(proc.stderr), + proc.exited, + ]); if (log) await writeFile(log, output); return { code, output }; }; } -async function initTrack(file: string, data: { spec: string; cwd: string; base: string; branch: string; max: number; reportFormat: ReportFormat; strict: boolean }) { +async function initTrack( + file: string, + data: { + spec: string; + cwd: string; + base: string; + branch: string; + max: number; + reportFormat: ReportFormat; + strict: boolean; + }, +) { if (await stat(file).catch(() => false)) return; await writeFile( file, @@ -375,19 +604,39 @@ async function initTrack(file: string, data: { spec: string; cwd: string; base: } async function readLine(file: string) { - return (await readFile(file, "utf8").catch(() => "")).split(/\r?\n/, 1)[0] ?? ""; + return ( + (await readFile(file, "utf8").catch(() => "")).split(/\r?\n/, 1)[0] ?? "" + ); } async function writeLine(file: string, value: string) { await writeFile(file, `${value}\n`); } -async function runCodex(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) { +async function runCodex( + runner: Runner, + repo: string, + sessionFile: string, + log: string, + prompt: string, +) { const session = await readLine(sessionFile); const args = session - ? ["exec", "resume", "--dangerously-bypass-approvals-and-sandbox", session, "-"] + ? [ + "exec", + "resume", + "--dangerously-bypass-approvals-and-sandbox", + session, + "-", + ] : ["exec", "--dangerously-bypass-approvals-and-sandbox", "-C", repo, "-"]; - const result = await runner("codex", args, prompt, log, log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex"); + const result = await runner( + "codex", + args, + prompt, + log, + log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex", + ); if (result.code !== 0) return false; if (!session) { const next = extractSessionId(result.output); @@ -397,13 +646,39 @@ async function runCodex(runner: Runner, repo: string, sessionFile: string, log: return true; } -async function runClaude(runner: Runner, repo: string, sessionFile: string, log: string, prompt: string) { +async function runClaude( + runner: Runner, + repo: string, + sessionFile: string, + log: string, + prompt: string, +) { const session = await readLine(sessionFile); const next = session || randomUUID(); const args = session - ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo] - : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo]; - const result = await runner("claude", args, prompt, log, log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report"); + ? [ + "-p", + "--resume", + session, + "--dangerously-skip-permissions", + "--add-dir", + repo, + ] + : [ + "-p", + "--session-id", + next, + "--dangerously-skip-permissions", + "--add-dir", + repo, + ]; + const result = await runner( + "claude", + args, + prompt, + log, + log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report", + ); if (result.code !== 0) return false; if (!session) await writeLine(sessionFile, next); return true; @@ -412,21 +687,38 @@ async function runClaude(runner: Runner, repo: string, sessionFile: string, log: function extractSessionId(output: string) { return output .split(/\r?\n/) - .filter((line) => /(session.?id|thread_id|codex exec resume|codex resume|To continue this session)/i.test(line)) + .filter((line) => + /(session.?id|thread_id|codex exec resume|codex resume|To continue this session)/i.test( + line, + ), + ) .join("\n") .match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i)?.[0] .toLowerCase(); } function listReviews(slug: string, upto: number, max: number) { - return Array.from({ length: Math.min(upto, max) }, (_, i) => `- .codex/reviews/${slug}-r${i + 1}.md`).join("\n"); + return Array.from( + { length: Math.min(upto, max) }, + (_, i) => `- .codex/reviews/${slug}-r${i + 1}.md`, + ).join("\n"); } function criteriaBlock(criteria: string[]) { - return criteria.map((criterion, i) => `AC${i + 1}: ${criterion}`).join("\n") || "No parsed acceptance criteria."; + return ( + criteria.map((criterion, i) => `AC${i + 1}: ${criterion}`).join("\n") || + "No parsed acceptance criteria." + ); } -function codexPrompt(input: { spec: string; track: string; pass: number; strict: boolean; previous: string; criteria: string[] }) { +function codexPrompt(input: { + spec: string; + track: string; + pass: number; + strict: boolean; + previous: string; + criteria: string[]; +}) { const strict = input.strict ? "\nStrict lifecycle:\n1. Add or update regression tests before implementation.\n2. Run the narrow test first and record the failing result, unless impossible; if impossible, say why.\n3. Implement the smallest change.\n4. Run targeted tests, full tests, lint/typecheck, and coverage. Coverage must be 100% when the project exposes coverage tooling.\n" : ""; @@ -435,11 +727,41 @@ function codexPrompt(input: { spec: string; track: string; pass: number; strict: : `Fix only the findings in the review. Do not refactor unrelated code.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nReview: ${input.previous}\nPass: ${input.pass}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}${strict}\nTasks:\n1. Read the review file.\n2. Fix each finding or explain why it is wrong in the track.\n3. Re-run relevant tests.\n4. Append "## Pass ${input.pass} - fix" to ${input.track} with per-finding outcomes.\n`; } -function reviewPrompt(input: { spec: string; track: string; base: string; pass: number; output: string; priors: string; criteria: string[]; strict: boolean }) { +function reviewPrompt(input: { + spec: string; + track: string; + base: string; + pass: number; + output: string; + priors: string; + criteria: string[]; + strict: boolean; +}) { return `You are reviewing a Codex implementation. Be a senior reviewer, not a linter.\n\nSpec: ${input.spec}\nTrack: ${input.track}\nBase: ${input.base}\nPass: ${input.pass}\nPrior reviews:\n${input.priors}\nAcceptance criteria:\n${criteriaBlock(input.criteria)}\nOutput path: ${input.output}\n\nSteps:\n1. Read the spec and track.\n2. Run: git diff ${input.base}...HEAD\n3. Read prior reviews so you do not repeat resolved findings.\n4. Write the review to ${input.output} using this exact format:\n\n# Claude review ${input.pass}\n\nVerdict: \n\n## Acceptance matrix\n\n- AC1: - \n\n## Findings\n\n1. [severity] - . Root cause: . Principle: .\n\n## Missing tests\n\n- \n\n## Fix instructions\n\n1. \n\n## Notes\n\n- \n\nRules:\n- The verdict line must appear verbatim.\n- ACCEPT requires every acceptance criterion PASS with concrete evidence.${input.strict ? "\n- ACCEPT also requires regression-test evidence, red/green evidence when behavior changed, passing full tests, and 100% coverage when coverage tooling exists." : ""}\n- For ACCEPT: Findings and Fix instructions bodies are "None".\n- Findings must explain WHY, not just WHAT.\n`; } -async function synthesizeReport(runner: Runner, repo: string, input: { slug: string; spec: string; track: string; report: string; status: Status; pass: number; max: number; base: string; initialBranch: string; branch: string; commit: string; commitMessage: string; codexSessionId: string; claudeSessionId: string; format: ReportFormat; reviews: string }) { +async function synthesizeReport( + runner: Runner, + repo: string, + input: { + slug: string; + spec: string; + track: string; + report: string; + status: Status; + pass: number; + max: number; + base: string; + initialBranch: string; + branch: string; + commit: string; + commitMessage: string; + codexSessionId: string; + claudeSessionId: string; + format: ReportFormat; + reviews: string; + }, +) { const metadata = `Result: ${input.status} Passes: ${input.pass} / ${input.max} Repository: ${repo} @@ -458,12 +780,31 @@ ${input.reviews}`; input.format === "html" ? `Write the report to ${input.report} as valid standalone HTML. Use a readable document layout with embedded CSS, a compact metadata table at the top, and substantive sections after it. Include these visible section headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.` : `Write the report to ${input.report} in markdown with these headings: Metadata, The shape of the problem, What was built, What the review caught (and why it mattered), What to remember next time, Residual risk, Pointers. Do not optimize away substance: explain the decisions, tradeoffs, evidence, and transferable lessons clearly enough that the reader learns from the run.`; - const sessionFile = path.join(repo, `.codex/sessions/${input.slug}-claude.id`); + const sessionFile = path.join( + repo, + `.codex/sessions/${input.slug}-claude.id`, + ); const session = await readLine(sessionFile); const next = session || randomUUID(); await runner( "claude", - session ? ["-p", "--resume", session, "--dangerously-skip-permissions", "--add-dir", repo] : ["-p", "--session-id", next, "--dangerously-skip-permissions", "--add-dir", repo], + session + ? [ + "-p", + "--resume", + session, + "--dangerously-skip-permissions", + "--add-dir", + repo, + ] + : [ + "-p", + "--session-id", + next, + "--dangerously-skip-permissions", + "--add-dir", + repo, + ], `You are writing a learning-oriented post-mortem for a developer who just ran a Codex/Claude devloop.\n\nMetadata to render at the top exactly and visibly:\n${metadata}\n\nInputs:\n- spec: ${input.spec}\n- track: ${input.track}\nReview files:\n${input.reviews}\n- final status: ${input.status}\n- passes used: ${input.pass} / ${input.max}\n- base: ${input.base}, starting branch: ${input.initialBranch}, final branch: ${input.branch}, local commit: ${input.commit || "none"}\n\n${body}\n\nStyle:\n- Human readable, not ornamental.\n- Preserve useful substance over brevity.\n- Teach the why: symptom, root cause, principle, decision, tradeoff, and evidence.\n- No emoji.\n`, path.join(repo, `.codex/logs/${input.slug}-report.log`), "report", @@ -476,7 +817,12 @@ function clamp(value: number, min: number, max: number) { } function slugify(value: string) { - return value.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "change"; + return ( + value + .toLowerCase() + .replace(/[^a-z0-9._-]+/g, "-") + .replace(/^-+|-+$/g, "") || "change" + ); } function escapeRegex(value: string) { From 9b61c9b88059eabb4858f0c3736551d0b532f74a Mon Sep 17 00:00:00 2001 From: satyaborg Date: Tue, 26 May 2026 13:15:25 +1000 Subject: [PATCH 08/11] chore: add spec template --- README.md | 2 ++ templates/spec.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 templates/spec.md diff --git a/README.md b/README.md index 9bc5974..b0168e5 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ devloop [--plain|--tui] [--no-strict] [--report-format html|markdown] spec.md [m Run from the target git worktree. The spec may live anywhere. +Start new specs from [`templates/spec.md`](templates/spec.md), usually copied to `.specs/YYYY-MM-DD-slug.md`. + ## Install From this checkout: diff --git a/templates/spec.md b/templates/spec.md new file mode 100644 index 0000000..6a61905 --- /dev/null +++ b/templates/spec.md @@ -0,0 +1,51 @@ +--- +status: draft +type: feat|fix|chore +created: YYYY-MM-DD +pr: null +--- + +# + +## Intent + + +## Desired outcome + + +## Scope +- Touch: +- Do not touch: + +## Behavior +Happy path: +1. + +Edge cases and failures: +- : +- : + +## Constraints +- Must: +- Prefer: +- Avoid: + +## Acceptance criteria +1. +2. + +## Test plan +- Regression first: +- Targeted: +- Full: +- Coverage: <100% coverage command, or explicit reason coverage tooling is not applicable> + +## Implementation notes +- +- + +## Out of scope +- + +## Review focus +- From 8d31ac0454c319fa96b51d6cc3a989ed133714cb Mon Sep 17 00:00:00 2001 From: satyaborg Date: Tue, 26 May 2026 13:37:49 +1000 Subject: [PATCH 09/11] chore: tighten spec template --- README.md | 10 +++++++++ templates/spec.md | 54 +++++++++++++++++++++-------------------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index b0168e5..fb39405 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ Run from the target git worktree. The spec may live anywhere. Start new specs from [`templates/spec.md`](templates/spec.md), usually copied to `.specs/YYYY-MM-DD-slug.md`. +The template is intentionally short: clear problem, observable outcome, tight scope, behavior examples, verifiable acceptance criteria, regression-first test plan, constraints, and only material notes. + ## Install From this checkout: @@ -101,3 +103,11 @@ bun test ``` `bun test` enforces 100% line/function/statement coverage for the TypeScript core. + +## References + +- [ISO/IEC/IEEE 29148:2018](https://byui-cse.github.io/cse372-course/Reading/CSE372Week10-IEEE.pdf): requirements should be necessary, unambiguous, singular, feasible, verifiable, and focused on what is needed. +- [Erdogmus, Morisio, and Torchiano, 2005](https://cs.unm.edu/~joel/cs351/paper/IEEE-Effectiveness_of_Test-First_Approach_to_Programming.pdf): test-first work formalizes functionality as tests, gives fast feedback, and supports small measurable tasks. +- [Fucci et al., 2016](https://bura.brunel.ac.uk/bitstream/2438/14550/1/FullText.pdf): TDD benefits depend heavily on fine-grained, steady cycles, not ceremony. +- [Rafique and Misic, 2013](https://openurl.ebsco.com/contentitem/doi%3A10.1109/tse.2012.28?id=ebsco%3Adoi%3A10.1109%2Ftse.2012.28&sid=ebsco%3Aplink%3Acrawler) and [Bissi, Neto, and Emer, 2016](https://www.sciencedirect.com/science/article/abs/pii/S0950584916300222): TDD evidence is strongest for quality, less conclusive for productivity. +- [Agile Alliance user stories](https://agilealliance.org/glossary/user-stories/), [Given-When-Then](https://agilealliance.org/glossary/given-when-then/), and [Cucumber Gherkin reference](https://cucumber.io/docs/gherkin/reference/): acceptance criteria are strongest when they become concrete, observable examples. diff --git a/templates/spec.md b/templates/spec.md index 6a61905..34d6251 100644 --- a/templates/spec.md +++ b/templates/spec.md @@ -5,47 +5,41 @@ created: YYYY-MM-DD pr: null --- -# +# -## Intent - +## Problem + -## Desired outcome - +## Outcome + ## Scope -- Touch: -- Do not touch: +- In: +- Out: ## Behavior Happy path: -1. +1. +2. -Edge cases and failures: -- : -- : - -## Constraints -- Must: -- Prefer: -- Avoid: +Edge cases: +- : +- : ## Acceptance criteria -1. -2. +1. +2. ## Test plan -- Regression first: -- Targeted: -- Full: -- Coverage: <100% coverage command, or explicit reason coverage tooling is not applicable> +- Red: +- Green: +- Full: +- Coverage: <100% coverage command, or why unavailable> -## Implementation notes -- -- - -## Out of scope -- +## Constraints +- Must: +- Avoid: +- Existing convention: -## Review focus -- +## Notes + From 493031ab90ba593ad2bc904ec3d8c0ba1fd92b96 Mon Sep 17 00:00:00 2001 From: satyaborg Date: Tue, 26 May 2026 14:02:10 +1000 Subject: [PATCH 10/11] fix: surface devloop commit failures --- src/devloop.ts | 29 +++++++++++++++++++++-------- tests/devloop.test.ts | 6 ++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/devloop.ts b/src/devloop.ts index d65293d..179093c 100644 --- a/src/devloop.ts +++ b/src/devloop.ts @@ -151,9 +151,8 @@ export function parseCriteria(markdown: string): string[] { } export function parseVerdict(review: string): Verdict | "" { - return review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m)?.[1] as - | Verdict - | ""; + const match = review.match(/^Verdict:\s+(ACCEPT|REJECT|UNCLEAR)/m); + return match ? (match[1] as Verdict) : ""; } export function hasPassingMatrix(review: string, count: number) { @@ -350,8 +349,12 @@ export async function runDevloop( id: commitId, title: "local branch and commit", }); + let commitError = ""; const committed = await commitAccepted(repo, slug, initialDirty).catch( - () => undefined, + (error) => { + commitError = error instanceof Error ? error.message : String(error); + return undefined; + }, ); if (committed) { finalBranch = committed.branch; @@ -371,7 +374,7 @@ export async function runDevloop( type: "done", id: commitId, ok: false, - detail: "failed", + detail: commitError || "failed", }); } } @@ -425,7 +428,12 @@ async function command(cmd: string, args: string[]) { new Response(proc.stderr).text(), proc.exited, ]); - if (code !== 0) throw new Error(err.trim() || `${cmd} failed`); + if (code !== 0) + throw new Error( + err.trim() || + out.trim() || + `${cmd} ${args.join(" ")} failed with exit ${code}`, + ); return out; } @@ -635,7 +643,7 @@ async function runCodex( args, prompt, log, - log.match(/r(\d+)-codex/) ? `codex-${RegExp.$1}` : "codex", + logId(log, "codex"), ); if (result.code !== 0) return false; if (!session) { @@ -677,13 +685,18 @@ async function runClaude( args, prompt, log, - log.match(/r(\d+)-claude/) ? `claude-${RegExp.$1}` : "report", + logId(log, "claude"), ); if (result.code !== 0) return false; if (!session) await writeLine(sessionFile, next); return true; } +function logId(log: string, kind: "codex" | "claude") { + const pass = log.match(new RegExp(`r(\\d+)-${kind}`))?.[1]; + return pass ? `${kind}-${pass}` : kind === "codex" ? "codex" : "report"; +} + function extractSessionId(output: string) { return output .split(/\r?\n/) diff --git a/tests/devloop.test.ts b/tests/devloop.test.ts index b0e64f1..4859b48 100644 --- a/tests/devloop.test.ts +++ b/tests/devloop.test.ts @@ -39,6 +39,7 @@ describe("parsing", () => { expect(parseCriteria("# Spec\n\n## Acceptance criteria\n1. One\n- Two\n\n## Notes\nNope")).toEqual(["One", "Two"]); expect(parseCriteria("# Spec")).toEqual([]); expect(parseVerdict("Verdict: ACCEPT\n")).toBe("ACCEPT"); + expect(parseVerdict("No verdict here\n")).toBe(""); }); test("renders a useful default screen", () => { @@ -132,11 +133,12 @@ describe("loop", () => { test("reports commit errors", async () => { const { repo } = await fixture("commit-error"); - await writeFile(path.join(repo, ".git/hooks/pre-commit"), "#!/usr/bin/env bash\nexit 1\n", { mode: 0o755 }); + await writeFile(path.join(repo, ".git/hooks/pre-commit"), "#!/usr/bin/env bash\necho 'pre-commit blocked commit' >&2\nexit 1\n", { mode: 0o755 }); process.env.DEVLOOP_TEST_VERDICTS = "ACCEPT"; - const { result } = await run(repo); + const { result, events } = await run(repo); expect(result.status).toBe("commit-error"); + expect(events).toContainEqual({ type: "done", id: "commit", ok: false, detail: "pre-commit blocked commit" }); }); test("uses a suffixed branch when the default branch exists", async () => { From 1478637874a20c00c41bd21126f7606be9128450 Mon Sep 17 00:00:00 2001 From: satyaborg Date: Tue, 26 May 2026 14:02:31 +1000 Subject: [PATCH 11/11] chore: cover tui view rendering --- src/tui-view.ts | 18 +++++++++++++++ src/tui.ts | 20 ++--------------- tests/tui-view.test.ts | 51 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 src/tui-view.ts create mode 100644 tests/tui-view.test.ts diff --git a/src/tui-view.ts b/src/tui-view.ts new file mode 100644 index 0000000..93214ea --- /dev/null +++ b/src/tui-view.ts @@ -0,0 +1,18 @@ +import { LOGO, type Result } from "./devloop.ts"; + +export type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean }; + +export function view(rows: Row[], selected: number, result?: Result) { + const body = rows.flatMap((item, i) => { + const mark = i === selected ? ">" : " "; + const fold = item.lines.length ? (item.open ? "[-]" : "[+]") : " "; + const head = `${mark} ${icon(item.status)} ${fold} ${item.title} - ${item.detail}`; + return item.open ? [head, ...item.lines.slice(-80).map((line) => ` ${line}`)] : [head]; + }); + const tail = result ? ["", `result: ${result.status}`, `passes: ${result.passes} / ${result.max}`, `branch: ${result.branch}`, `commit: ${result.commit || "none"}`, `report: ${result.report}`, `track: ${result.track}`] : ["", "enter toggles logs, j/k moves"]; + return [LOGO, "", ...body, ...tail].join("\n"); +} + +function icon(status: Row["status"]) { + return status === "ok" ? "ok" : status === "fail" ? "!!" : ".."; +} diff --git a/src/tui.ts b/src/tui.ts index abef745..a8ca027 100644 --- a/src/tui.ts +++ b/src/tui.ts @@ -1,6 +1,5 @@ -import { LOGO, type Event, type Result, type Sink } from "./devloop.ts"; - -type Row = { id: string; title: string; status: "run" | "ok" | "fail"; detail: string; lines: string[]; open: boolean }; +import { type Event, type Result, type Sink } from "./devloop.ts"; +import { view, type Row } from "./tui-view.ts"; export async function createTuiSink(): Promise { const { TextRenderable, createCliRenderer } = await import("@opentui/core"); @@ -40,21 +39,6 @@ export async function createTuiSink(): Promise { }; } -export function view(rows: Row[], selected: number, result?: Result) { - const body = rows.flatMap((item, i) => { - const mark = i === selected ? ">" : " "; - const fold = item.lines.length ? (item.open ? "[-]" : "[+]") : " "; - const head = `${mark} ${icon(item.status)} ${fold} ${item.title} - ${item.detail}`; - return item.open ? [head, ...item.lines.slice(-80).map((line) => ` ${line}`)] : [head]; - }); - const tail = result ? ["", `result: ${result.status}`, `passes: ${result.passes} / ${result.max}`, `branch: ${result.branch}`, `commit: ${result.commit || "none"}`, `report: ${result.report}`, `track: ${result.track}`] : ["", "enter toggles logs, j/k moves"]; - return [LOGO, "", ...body, ...tail].join("\n"); -} - function row(rows: Row[], id: string) { return rows.find((item) => item.id === id) ?? rows[rows.push({ id, title: id, status: "run", detail: "running", lines: [], open: false }) - 1]!; } - -function icon(status: Row["status"]) { - return status === "ok" ? "ok" : status === "fail" ? "!!" : ".."; -} diff --git a/tests/tui-view.test.ts b/tests/tui-view.test.ts new file mode 100644 index 0000000..d7085c4 --- /dev/null +++ b/tests/tui-view.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, test } from "bun:test"; +import { view, type Row } from "../src/tui-view.ts"; + +const baseRow = { + id: "step", + title: "run tests", + status: "run", + detail: "running", + lines: [], + open: false, +} satisfies Row; + +describe("tui view", () => { + test("renders empty state with logo and help", () => { + const output = view([], 0); + + expect(output).toContain("____/ /__"); + expect(output).toContain("enter toggles logs, j/k moves"); + }); + + test("renders closed and open rows", () => { + const closed = view([{ ...baseRow, lines: ["hidden"] }], 0); + const open = view([{ ...baseRow, status: "ok", detail: "completed", lines: Array.from({ length: 82 }, (_, i) => `line-${i}`), open: true }], 0); + + expect(closed).toContain("> .. [+] run tests - running"); + expect(closed).not.toContain("hidden"); + expect(open).toContain("> ok [-] run tests - completed"); + expect(open).not.toContain("line-0"); + expect(open).toContain("line-81"); + }); + + test("renders failed rows and result details", () => { + const output = view([{ ...baseRow, status: "fail", detail: "failed" }], 0, { + status: "commit-error", + passes: 1, + max: 5, + report: ".codex/reports/change.html", + track: ".codex/tracks/change.md", + branch: "devloop/change", + commit: "", + commitMessage: "", + codexSessionId: "codex-session", + claudeSessionId: "claude-session", + }); + + expect(output).toContain("> !! run tests - failed"); + expect(output).toContain("result: commit-error"); + expect(output).toContain("commit: none"); + expect(output).toContain("track: .codex/tracks/change.md"); + }); +});