From 91d8343955853bfb40895a888bdc0e332b20bb8f Mon Sep 17 00:00:00 2001
From: Rex Lorenzo <rexlorenzo@gmail.com>
Date: Fri, 20 Mar 2026 16:28:51 -0700
Subject: [PATCH 1/3] feat(test): add BATS unit tests and smoke tests for
 review loops

- BATS tests cover config parsing, prompt loading, agent
  flags, MCP config, and review status checks
- Smoke tests run real agents as editor/reviewer against a
  fixture repo to catch CLI integration issues
- Pre-commit runs unit tests; pre-push runs smoke tests
- Shared helpers moved from bin/ scripts to lib/lib-review-loop
---
 .gitmodules                              |   9 +
 .markdownlint-cli2.yaml                  |   2 +
 .pre-commit-config.yaml                  |  18 ++
 README.md                                |  31 +++
 bin/code-review-loop                     |  28 +-
 bin/plan-review-loop                     |  60 +----
 lib/lib-review-loop                      |  95 +++++++
 test/bats/bats-assert                    |   1 +
 test/bats/bats-core                      |   1 +
 test/bats/bats-support                   |   1 +
 test/code-review-loop.bats               |  35 +++
 test/fixtures/prompt-blank-lines.md      |   5 +
 test/fixtures/prompt-no-frontmatter.md   |   1 +
 test/fixtures/prompt-with-frontmatter.md |   6 +
 test/fixtures/review-clean.md            |   9 +
 test/fixtures/review-issues.md           |   9 +
 test/lib-review-loop.bats                | 267 +++++++++++++++++++
 test/plan-review-loop.bats               |  41 +++
 test/run                                 |   4 +
 test/smoke                               | 312 +++++++++++++++++++++++
 test/test_helper.bash                    |  35 +++
 21 files changed, 886 insertions(+), 84 deletions(-)
 create mode 100644 .gitmodules
 create mode 160000 test/bats/bats-assert
 create mode 160000 test/bats/bats-core
 create mode 160000 test/bats/bats-support
 create mode 100644 test/code-review-loop.bats
 create mode 100644 test/fixtures/prompt-blank-lines.md
 create mode 100644 test/fixtures/prompt-no-frontmatter.md
 create mode 100644 test/fixtures/prompt-with-frontmatter.md
 create mode 100644 test/fixtures/review-clean.md
 create mode 100644 test/fixtures/review-issues.md
 create mode 100644 test/lib-review-loop.bats
 create mode 100644 test/plan-review-loop.bats
 create mode 100755 test/run
 create mode 100755 test/smoke
 create mode 100644 test/test_helper.bash

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..9b8d1d3
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,9 @@
+[submodule "test/bats/bats-core"]
+	path = test/bats/bats-core
+	url = https://github.com/bats-core/bats-core.git
+[submodule "test/bats/bats-support"]
+	path = test/bats/bats-support
+	url = https://github.com/bats-core/bats-support.git
+[submodule "test/bats/bats-assert"]
+	path = test/bats/bats-assert
+	url = https://github.com/bats-core/bats-assert.git
diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml
index 2bf9a52..ebc99be 100644
--- a/.markdownlint-cli2.yaml
+++ b/.markdownlint-cli2.yaml
@@ -1,2 +1,4 @@
 config:
   MD013: false # Line length — instructional prose runs long intentionally
+ignores:
+  - "test/fixtures/**"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 10b7538..2ad1b64 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,3 +16,21 @@ repos:
     rev: v0.21.0
     hooks:
       - id: markdownlint-cli2
+
+  - repo: local
+    hooks:
+      - id: bats-unit-tests
+        name: BATS unit tests
+        entry: test/run
+        language: script
+        always_run: true
+        pass_filenames: false
+        stages: [pre-commit]
+
+      - id: smoke-tests
+        name: Smoke tests (real agents)
+        entry: test/smoke
+        language: script
+        always_run: true
+        pass_filenames: false
+        stages: [pre-push]
diff --git a/README.md b/README.md
index 0721a5c..bdedda7 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,37 @@ The setup script only manages commands it originally installed.
 
 ## Contributing
 
+### Running Tests
+
+The test suite uses [BATS](https://github.com/bats-core/bats-core) (Bash Automated Testing System). After cloning with submodules:
+
+```bash
+git clone --recurse-submodules https://github.com/rlorenzo/ai-coding-setup.git
+cd ai-coding-setup
+test/run
+```
+
+If you already cloned without submodules:
+
+```bash
+git submodule update --init --recursive
+test/run
+```
+
+Unit tests (`test/run`) cover config parsing, prompt loading, validation, and review status checks. They run in seconds and need no API keys.
+
+### Smoke Tests
+
+Smoke tests run real AI agents against a temporary git repo to verify that CLI flags are accepted and agents can perform basic read/write tasks:
+
+```bash
+test/smoke                   # test all installed agents
+test/smoke claude codex      # test specific agents
+test/smoke --timeout 180     # override per-test timeout (default: 120s)
+```
+
+Each installed agent is tested as both editor (can it modify a file?) and reviewer (does it produce a review file?). Requires at least one AI tool installed and authenticated.
+
 ### Pre-commit hooks (optional)
 
 This repo uses [pre-commit](https://pre-commit.com/) to run linters locally before each commit. Install it once and you'll get automatic checks for shell scripts (shellcheck), markdown (markdownlint), and TOML syntax.
diff --git a/bin/code-review-loop b/bin/code-review-loop
index 71263e8..4bbe079 100755
--- a/bin/code-review-loop
+++ b/bin/code-review-loop
@@ -45,7 +45,7 @@ Options:
   -r, --reviewer AGENT     Agent for code review (default: codex)
   -h, --help               Show this help message
 
-Agents: claude, codex, gemini
+Agents: claude, codex, gemini, copilot
 EOF
     exit 0
 }
@@ -84,32 +84,6 @@ REVIEWER_TOOLS="Read,Write,Bash,Grep,Glob"
 
 # ---- helpers -------------------------------------------------------------
 
-test_review_clean() {
-    [[ -f "$REVIEW_FILE" ]] || return 1
-    local content
-    content=$(<"$REVIEW_FILE")
-
-    # Only an explicit "Verdict: good to go" line signals clean — a bare
-    # substring match could false-positive on "not good to go"
-    if echo "$content" | grep -qiE "verdict[[:space:]]*:[[:space:]]*good to go"; then
-        return 0
-    fi
-
-    return 1
-}
-
-get_review_issue_counts() {
-    [[ -f "$REVIEW_FILE" ]] || { echo "No review file"; return; }
-    local content high medium low
-    content=$(<"$REVIEW_FILE")
-
-    high=$(echo "$content" | grep -oE "High[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
-    medium=$(echo "$content" | grep -oE "Medium[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
-    low=$(echo "$content" | grep -oE "Low[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
-
-    echo "High: ${high:-?}, Medium: ${medium:-?}, Low: ${low:-?}"
-}
-
 stage_review_changes() {
     # Re-stage files that are in the review scope (partially staged files are
     # rejected at startup, so git add here is safe)
diff --git a/bin/plan-review-loop b/bin/plan-review-loop
index 7e74dd0..95115cf 100755
--- a/bin/plan-review-loop
+++ b/bin/plan-review-loop
@@ -46,7 +46,7 @@ Options:
   -r, --reviewer AGENT     Agent for plan review (default: codex)
   -h, --help               Show this help message
 
-Agents: claude, codex, gemini
+Agents: claude, codex, gemini, copilot
 
 Examples:
   plan-review-loop PLAN-feature.md
@@ -97,60 +97,6 @@ REVIEWER_FOLLOWUP_PROMPT="$PROMPTS_DIR/plan-review-followup.md"
 EDITOR_TOOLS="Read,Write,Edit,Grep,Glob"
 REVIEWER_TOOLS="Read,Write,Grep,Glob"
 
-# ---- helpers -------------------------------------------------------------
-
-test_reviewer_satisfied() {
-    [[ -f "$FEEDBACK_FILE" ]] || return 1
-    # Match sentinel as a standalone line to avoid false positives from
-    # feedback that merely mentions the token in explanatory text
-    local trimmed
-    trimmed=$(sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' -e '/^$/d' "$FEEDBACK_FILE")
-    [[ "$trimmed" == "NO_FURTHER_FEEDBACK" ]]
-}
-
-build_improvement_prompt() {
-    local plan_path="$1" feedback_path="$2" reviewer_name="$3" cycle="$4" max_cycles="$5"
-    cat <<PROMPT
-You are a senior software architect improving a plan document based on reviewer feedback.
-
-## Your Task
-
-1. **Read** the plan file: $plan_path
-2. **Read** the feedback file: $feedback_path
-3. **Evaluate** each feedback point critically. You are NOT obligated to accept every suggestion. Use your judgment:
-   - Accept and implement feedback that genuinely improves completeness, feasibility, clarity, or risk coverage.
-   - Decline feedback that is subjective preference, out of scope, or would degrade the plan.
-4. **Edit** the plan file directly to incorporate valid improvements. Do not rewrite sections that are already well-structured unless the feedback identifies a real gap.
-5. **Write** a response to $feedback_path (overwrite it) explaining your actions:
-   - For each accepted point: briefly describe what was changed and where.
-   - For each declined point: explain why (e.g., out of scope, already covered, disagree with reasoning).
-
-## Response Format
-
-Use these markers for each feedback item in your response:
-
-- Accepted: \`✅ Implemented: [brief description of what was changed]\`
-- Deferred: \`📝 Response: [reason for deferring, alternatives]\`
-- Declined: \`❌ Clarification: [reason for disagreeing]\`
-
-## Guidelines
-
-- Preserve the plan's existing structure and formatting conventions.
-- Do not add unnecessary boilerplate or padding.
-- Focus on substance: missing edge cases, risk gaps, unclear requirements, feasibility concerns.
-- If the reviewer's feedback is entirely minor or already addressed, say so clearly in your response.
-- This is cycle $cycle of $max_cycles with the $reviewer_name reviewer.
-
-## Output
-
-Your only output files are:
-- The improved plan file (edited in place)
-- The feedback response file (overwritten with your response)
-
-Do not create any other files. Do not stage or commit anything.
-PROMPT
-}
-
 # ---- validation ----------------------------------------------------------
 if [[ ! -f "$PLAN_FILE_PATH" ]]; then
     echo -e "${RED}ERROR: Plan file not found: $PLAN_FILE_PATH${NC}"
@@ -172,8 +118,8 @@ echo -e "${MAGENTA} Plan Review Loop${NC}"
 echo -e "${MAGENTA}========================================${NC}"
 echo " Plan file      : $PLAN_FILE"
 echo " Max iterations : $MAX_ITERATIONS"
-echo " Editor          : $EDITOR_AGENT"
-echo " Reviewer        : $REVIEWER_AGENT"
+echo " Editor         : $EDITOR_AGENT"
+echo " Reviewer       : $REVIEWER_AGENT"
 
 start_time=$(date +%s)
 iteration_data=""
diff --git a/lib/lib-review-loop b/lib/lib-review-loop
index 0426f65..f7a6832 100644
--- a/lib/lib-review-loop
+++ b/lib/lib-review-loop
@@ -240,6 +240,101 @@ validate_prompts() {
     fi
 }
 
+# ---- review status checks ------------------------------------------------
+
+# Check whether a code review is clean (no actionable issues).
+# Requires REVIEW_FILE to be set by the caller.
+# Returns 0 if the review file contains "Verdict: good to go", 1 otherwise.
+test_review_clean() {
+    [[ -f "$REVIEW_FILE" ]] || return 1
+    local content
+    content=$(<"$REVIEW_FILE")
+
+    # Only an explicit "Verdict: good to go" line signals clean — a bare
+    # substring match could false-positive on "not good to go"
+    if echo "$content" | grep -qiE "verdict[[:space:]]*:[[:space:]]*good to go"; then
+        return 0
+    fi
+
+    return 1
+}
+
+# Extract issue counts from a code review file.
+# Requires REVIEW_FILE to be set by the caller.
+# Prints: "High: N, Medium: N, Low: N" (or ? for missing counts)
+get_review_issue_counts() {
+    [[ -f "$REVIEW_FILE" ]] || { echo "No review file"; return; }
+    local content high medium low
+    content=$(<"$REVIEW_FILE")
+
+    high=$(echo "$content" | grep -oE "High[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
+    medium=$(echo "$content" | grep -oE "Medium[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
+    low=$(echo "$content" | grep -oE "Low[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
+
+    echo "High: ${high:-?}, Medium: ${medium:-?}, Low: ${low:-?}"
+}
+
+# Check whether a plan reviewer is satisfied (no further feedback).
+# Requires FEEDBACK_FILE to be set by the caller.
+# Returns 0 if the feedback file contains only "NO_FURTHER_FEEDBACK", 1 otherwise.
+test_reviewer_satisfied() {
+    [[ -f "$FEEDBACK_FILE" ]] || return 1
+    # Match sentinel as a standalone line to avoid false positives from
+    # feedback that merely mentions the token in explanatory text
+    local trimmed
+    trimmed=$(sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' -e '/^$/d' "$FEEDBACK_FILE")
+    [[ "$trimmed" == "NO_FURTHER_FEEDBACK" ]]
+}
+
+# Build the improvement prompt for the plan editor.
+#   $1 — plan file path
+#   $2 — feedback file path
+#   $3 — reviewer agent name
+#   $4 — current cycle number
+#   $5 — max cycles
+build_improvement_prompt() {
+    local plan_path="$1" feedback_path="$2" reviewer_name="$3" cycle="$4" max_cycles="$5"
+    cat <<PROMPT
+You are a senior software architect improving a plan document based on reviewer feedback.
+
+## Your Task
+
+1. **Read** the plan file: $plan_path
+2. **Read** the feedback file: $feedback_path
+3. **Evaluate** each feedback point critically. You are NOT obligated to accept every suggestion. Use your judgment:
+   - Accept and implement feedback that genuinely improves completeness, feasibility, clarity, or risk coverage.
+   - Decline feedback that is subjective preference, out of scope, or would degrade the plan.
+4. **Edit** the plan file directly to incorporate valid improvements. Do not rewrite sections that are already well-structured unless the feedback identifies a real gap.
+5. **Write** a response to $feedback_path (overwrite it) explaining your actions:
+   - For each accepted point: briefly describe what was changed and where.
+   - For each declined point: explain why (e.g., out of scope, already covered, disagree with reasoning).
+
+## Response Format
+
+Use these markers for each feedback item in your response:
+
+- Accepted: \`✅ Implemented: [brief description of what was changed]\`
+- Deferred: \`📝 Response: [reason for deferring, alternatives]\`
+- Declined: \`❌ Clarification: [reason for disagreeing]\`
+
+## Guidelines
+
+- Preserve the plan's existing structure and formatting conventions.
+- Do not add unnecessary boilerplate or padding.
+- Focus on substance: missing edge cases, risk gaps, unclear requirements, feasibility concerns.
+- If the reviewer's feedback is entirely minor or already addressed, say so clearly in your response.
+- This is cycle $cycle of $max_cycles with the $reviewer_name reviewer.
+
+## Output
+
+Your only output files are:
+- The improved plan file (edited in place)
+- The feedback response file (overwritten with your response)
+
+Do not create any other files. Do not stage or commit anything.
+PROMPT
+}
+
 # ---- temp directory with cleanup -----------------------------------------
 
 # Create a temp directory and register a cleanup trap.
diff --git a/test/bats/bats-assert b/test/bats/bats-assert
new file mode 160000
index 0000000..697471b
--- /dev/null
+++ b/test/bats/bats-assert
@@ -0,0 +1 @@
+Subproject commit 697471b7a89d3ab38571f38c6c7c4b460d1f5e35
diff --git a/test/bats/bats-core b/test/bats/bats-core
new file mode 160000
index 0000000..d9faff0
--- /dev/null
+++ b/test/bats/bats-core
@@ -0,0 +1 @@
+Subproject commit d9faff0d7bc32e7adebc6552446f978118d3ab3b
diff --git a/test/bats/bats-support b/test/bats/bats-support
new file mode 160000
index 0000000..0954abb
--- /dev/null
+++ b/test/bats/bats-support
@@ -0,0 +1 @@
+Subproject commit 0954abb9925cad550424cebca2b99255d4eabe96
diff --git a/test/code-review-loop.bats b/test/code-review-loop.bats
new file mode 100644
index 0000000..e2452cb
--- /dev/null
+++ b/test/code-review-loop.bats
@@ -0,0 +1,35 @@
+#!/usr/bin/env bats
+# Tests for bin/code-review-loop — argument parsing and validation.
+
+load test_helper
+
+BIN="$PROJECT_ROOT/bin/code-review-loop"
+
+@test "code-review-loop --help prints usage and exits 0" {
+    run "$BIN" --help
+    assert_success
+    assert_output --partial "Usage: code-review-loop"
+}
+
+@test "code-review-loop -h prints usage and exits 0" {
+    run "$BIN" -h
+    assert_success
+    assert_output --partial "Usage: code-review-loop"
+}
+
+@test "code-review-loop rejects unknown options" {
+    run "$BIN" --bogus
+    assert_output --partial "Unknown option"
+}
+
+@test "code-review-loop --max-iterations without value shows error" {
+    run "$BIN" -m
+    assert_output --partial "requires a value"
+}
+
+@test "code-review-loop rejects invalid agent name" {
+    # Prompt files won't exist, but agent validation happens first
+    run "$BIN" --editor gpt4
+    assert_failure
+    assert_output --partial "Unknown agent"
+}
diff --git a/test/fixtures/prompt-blank-lines.md b/test/fixtures/prompt-blank-lines.md
new file mode 100644
index 0000000..16ea406
--- /dev/null
+++ b/test/fixtures/prompt-blank-lines.md
@@ -0,0 +1,5 @@
+
+
+  Indented content here.
+
+Another line.
diff --git a/test/fixtures/prompt-no-frontmatter.md b/test/fixtures/prompt-no-frontmatter.md
new file mode 100644
index 0000000..f5832f8
--- /dev/null
+++ b/test/fixtures/prompt-no-frontmatter.md
@@ -0,0 +1 @@
+Review the code for correctness and clarity.
diff --git a/test/fixtures/prompt-with-frontmatter.md b/test/fixtures/prompt-with-frontmatter.md
new file mode 100644
index 0000000..81436bc
--- /dev/null
+++ b/test/fixtures/prompt-with-frontmatter.md
@@ -0,0 +1,6 @@
+---
+name: test-prompt
+description: A test prompt with frontmatter
+---
+
+Review the code for correctness and clarity.
diff --git a/test/fixtures/review-clean.md b/test/fixtures/review-clean.md
new file mode 100644
index 0000000..a1ec912
--- /dev/null
+++ b/test/fixtures/review-clean.md
@@ -0,0 +1,9 @@
+# Code Review
+
+All checks passed. No issues found.
+
+Verdict: good to go
+
+High: 0
+Medium: 0
+Low: 0
diff --git a/test/fixtures/review-issues.md b/test/fixtures/review-issues.md
new file mode 100644
index 0000000..82c0144
--- /dev/null
+++ b/test/fixtures/review-issues.md
@@ -0,0 +1,9 @@
+# Code Review
+
+Several issues found during review.
+
+Verdict: needs work
+
+High: 2
+Medium: 3
+Low: 1
diff --git a/test/lib-review-loop.bats b/test/lib-review-loop.bats
new file mode 100644
index 0000000..6c45486
--- /dev/null
+++ b/test/lib-review-loop.bats
@@ -0,0 +1,267 @@
+#!/usr/bin/env bats
+# Unit tests for lib/lib-review-loop — pure library functions.
+# Agent runner tests are in test/smoke (uses real CLI agents).
+
+load test_helper
+
+# =========================================================================
+# Validation functions
+# =========================================================================
+
+@test "validate_agent_name accepts all valid agents" {
+    source_lib
+    for agent in claude codex gemini copilot; do
+        run validate_agent_name "$agent" "--editor"
+        assert_success
+    done
+}
+
+@test "validate_agent_name rejects unknown agent" {
+    source_lib
+    run validate_agent_name "gpt4" "--editor"
+    assert_failure
+    assert_output --partial "Unknown agent"
+    assert_output --partial "gpt4"
+}
+
+@test "validate_positive_int accepts valid integers" {
+    source_lib
+    for val in 1 5 100 999; do
+        run validate_positive_int "$val" "--max-iterations"
+        assert_success
+    done
+}
+
+@test "validate_positive_int rejects zero" {
+    source_lib
+    run validate_positive_int "0" "--max-iterations"
+    assert_failure
+}
+
+@test "validate_positive_int rejects negative numbers" {
+    source_lib
+    run validate_positive_int "-1" "--max-iterations"
+    assert_failure
+}
+
+@test "validate_positive_int rejects non-numeric input" {
+    source_lib
+    run validate_positive_int "abc" "--max-iterations"
+    assert_failure
+}
+
+@test "validate_positive_int rejects leading-zero numbers" {
+    source_lib
+    run validate_positive_int "01" "--max-iterations"
+    assert_failure
+}
+
+@test "validate_prompts succeeds when all files exist" {
+    source_lib
+    local f1="$TEST_TMPDIR/prompt1.md" f2="$TEST_TMPDIR/prompt2.md"
+    touch "$f1" "$f2"
+    run validate_prompts "$f1" "$f2"
+    assert_success
+}
+
+@test "validate_prompts fails when a file is missing" {
+    source_lib
+    run validate_prompts "/nonexistent/prompt.md"
+    assert_failure
+    assert_output --partial "Missing prompt files"
+}
+
+# =========================================================================
+# Prompt loading
+# =========================================================================
+
+@test "read_prompt_file strips YAML frontmatter" {
+    source_lib
+    run read_prompt_file "$PROJECT_ROOT/test/fixtures/prompt-with-frontmatter.md"
+    assert_success
+    assert_output "Review the code for correctness and clarity."
+    refute_output --partial "name:"
+    refute_output --partial "---"
+}
+
+@test "read_prompt_file returns content unchanged without frontmatter" {
+    source_lib
+    run read_prompt_file "$PROJECT_ROOT/test/fixtures/prompt-no-frontmatter.md"
+    assert_success
+    assert_output "Review the code for correctness and clarity."
+}
+
+@test "read_prompt_file trims leading and trailing blank lines preserving content" {
+    source_lib
+    run read_prompt_file "$PROJECT_ROOT/test/fixtures/prompt-blank-lines.md"
+    assert_success
+    assert_line --index 0 "  Indented content here."
+    assert_output --partial "Another line."
+    refute_line --index 0 ""
+}
+
+# =========================================================================
+# Config loading
+# =========================================================================
+
+@test "load_config sets agents from config file" {
+    source_lib
+    cat > "$HOME/.ai-coding-setup.conf" <<'EOF'
+EDITOR_AGENT=gemini
+REVIEWER_AGENT=copilot
+EOF
+    EDITOR_AGENT=""
+    REVIEWER_AGENT=""
+    load_config
+    assert [ "$EDITOR_AGENT" = "gemini" ]
+    assert [ "$REVIEWER_AGENT" = "copilot" ]
+}
+
+@test "load_config ignores comments and blank lines" {
+    source_lib
+    cat > "$HOME/.ai-coding-setup.conf" <<'EOF'
+# This is a comment
+EDITOR_AGENT=codex
+
+# Another comment
+REVIEWER_AGENT=claude
+EOF
+    EDITOR_AGENT=""
+    REVIEWER_AGENT=""
+    load_config
+    assert [ "$EDITOR_AGENT" = "codex" ]
+    assert [ "$REVIEWER_AGENT" = "claude" ]
+}
+
+@test "load_config strips quotes from values" {
+    source_lib
+    cat > "$HOME/.ai-coding-setup.conf" <<'EOF'
+EDITOR_AGENT="codex"
+REVIEWER_AGENT='gemini'
+EOF
+    EDITOR_AGENT=""
+    REVIEWER_AGENT=""
+    load_config
+    assert [ "$EDITOR_AGENT" = "codex" ]
+    assert [ "$REVIEWER_AGENT" = "gemini" ]
+}
+
+@test "load_config is no-op when config file missing" {
+    source_lib
+    EDITOR_AGENT="original"
+    REVIEWER_AGENT="original"
+    load_config
+    assert [ "$EDITOR_AGENT" = "original" ]
+    assert [ "$REVIEWER_AGENT" = "original" ]
+}
+
+# =========================================================================
+# Review status checks
+# =========================================================================
+
+@test "test_review_clean returns 0 for 'Verdict: good to go'" {
+    source_lib
+    REVIEW_FILE="$TEST_TMPDIR/review.md"
+    cp "$PROJECT_ROOT/test/fixtures/review-clean.md" "$REVIEW_FILE"
+    run test_review_clean
+    assert_success
+}
+
+@test "test_review_clean is case-insensitive" {
+    source_lib
+    REVIEW_FILE="$TEST_TMPDIR/review.md"
+    echo "VERDICT:  GOOD TO GO" > "$REVIEW_FILE"
+    run test_review_clean
+    assert_success
+}
+
+@test "test_review_clean returns 1 for issues present" {
+    source_lib
+    REVIEW_FILE="$TEST_TMPDIR/review.md"
+    cp "$PROJECT_ROOT/test/fixtures/review-issues.md" "$REVIEW_FILE"
+    run test_review_clean
+    assert_failure
+}
+
+@test "test_review_clean returns 1 when file is missing" {
+    source_lib
+    REVIEW_FILE="$TEST_TMPDIR/nonexistent.md"
+    run test_review_clean
+    assert_failure
+}
+
+@test "get_review_issue_counts extracts High/Medium/Low counts" {
+    source_lib
+    REVIEW_FILE="$TEST_TMPDIR/review.md"
+    cp "$PROJECT_ROOT/test/fixtures/review-issues.md" "$REVIEW_FILE"
+    run get_review_issue_counts
+    assert_output "High: 2, Medium: 3, Low: 1"
+}
+
+@test "get_review_issue_counts shows ? for missing counts" {
+    source_lib
+    REVIEW_FILE="$TEST_TMPDIR/review.md"
+    echo "High: 1" > "$REVIEW_FILE"
+    run get_review_issue_counts
+    assert_output "High: 1, Medium: ?, Low: ?"
+}
+
+@test "get_review_issue_counts reports no review file" {
+    source_lib
+    REVIEW_FILE="$TEST_TMPDIR/nonexistent.md"
+    run get_review_issue_counts
+    assert_output "No review file"
+}
+
+@test "test_reviewer_satisfied returns 0 for NO_FURTHER_FEEDBACK" {
+    source_lib
+    FEEDBACK_FILE="$TEST_TMPDIR/feedback.md"
+    echo "NO_FURTHER_FEEDBACK" > "$FEEDBACK_FILE"
+    run test_reviewer_satisfied
+    assert_success
+}
+
+@test "test_reviewer_satisfied returns 0 with surrounding whitespace" {
+    source_lib
+    FEEDBACK_FILE="$TEST_TMPDIR/feedback.md"
+    printf '  NO_FURTHER_FEEDBACK  \n\n' > "$FEEDBACK_FILE"
+    run test_reviewer_satisfied
+    assert_success
+}
+
+@test "test_reviewer_satisfied returns 1 for feedback with other content" {
+    source_lib
+    FEEDBACK_FILE="$TEST_TMPDIR/feedback.md"
+    printf 'Some feedback here.\nNO_FURTHER_FEEDBACK mentioned in passing.\n' > "$FEEDBACK_FILE"
+    run test_reviewer_satisfied
+    assert_failure
+}
+
+@test "test_reviewer_satisfied returns 1 when file is missing" {
+    source_lib
+    FEEDBACK_FILE="$TEST_TMPDIR/nonexistent.md"
+    run test_reviewer_satisfied
+    assert_failure
+}
+
+@test "build_improvement_prompt includes all parameters" {
+    source_lib
+    run build_improvement_prompt "/path/to/plan.md" "/path/to/feedback.md" "codex" "2" "5"
+    assert_success
+    assert_output --partial "/path/to/plan.md"
+    assert_output --partial "/path/to/feedback.md"
+    assert_output --partial "cycle 2 of 5"
+    assert_output --partial "codex"
+}
+
+# =========================================================================
+# Utility functions
+# =========================================================================
+
+@test "format_elapsed computes minutes and seconds" {
+    source_lib
+    local start
+    start=$(( $(date +%s) - 125 ))
+    run format_elapsed "$start"
+    assert_output "2m 5s"
+}
diff --git a/test/plan-review-loop.bats b/test/plan-review-loop.bats
new file mode 100644
index 0000000..8b9cf5a
--- /dev/null
+++ b/test/plan-review-loop.bats
@@ -0,0 +1,41 @@
+#!/usr/bin/env bats
+# Tests for bin/plan-review-loop — argument parsing and validation.
+
+load test_helper
+
+BIN="$PROJECT_ROOT/bin/plan-review-loop"
+
+@test "plan-review-loop --help prints usage and exits 0" {
+    run "$BIN" --help
+    assert_success
+    assert_output --partial "Usage: plan-review-loop"
+}
+
+@test "plan-review-loop -h prints usage and exits 0" {
+    run "$BIN" -h
+    assert_success
+    assert_output --partial "Usage: plan-review-loop"
+}
+
+@test "plan-review-loop requires a plan file argument" {
+    run "$BIN"
+    # usage() exits 0 after printing the error, so check for the error message
+    assert_output --partial "Plan file argument is required"
+}
+
+@test "plan-review-loop rejects unknown options" {
+    run "$BIN" --bogus
+    assert_output --partial "Unknown option"
+}
+
+@test "plan-review-loop rejects nonexistent plan file" {
+    run "$BIN" /nonexistent/plan.md
+    assert_failure
+    assert_output --partial "Plan file not found"
+}
+
+@test "plan-review-loop rejects invalid agent name" {
+    run "$BIN" --editor gpt4 /tmp/dummy.md
+    assert_failure
+    assert_output --partial "Unknown agent"
+}
diff --git a/test/run b/test/run
new file mode 100755
index 0000000..55bf6dc
--- /dev/null
+++ b/test/run
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+# Convenience script to run all BATS tests.
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+exec "$SCRIPT_DIR/bats/bats-core/bin/bats" "$SCRIPT_DIR"/*.bats "$@"
diff --git a/test/smoke b/test/smoke
new file mode 100755
index 0000000..7c39c3c
--- /dev/null
+++ b/test/smoke
@@ -0,0 +1,312 @@
+#!/usr/bin/env bash
+# ---------------------------------------------------------------------------
+# Smoke tests — run real AI agents to verify flag acceptance and basic I/O.
+#
+# Tests each installed agent as both editor (can it modify a file?) and
+# reviewer (can it produce a review file?).  Uses a temporary git repo with
+# a small staged diff so the agents have real context to work with.
+#
+# Usage:
+#   test/smoke                  # test all installed agents
+#   test/smoke claude codex     # test only specified agents
+#   test/smoke --timeout 180    # override per-test timeout (default: 120s)
+# ---------------------------------------------------------------------------
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Source the library for run_agent
+unset _LIB_REVIEW_LOOP_LOADED
+# shellcheck source=lib/lib-review-loop
+source "$PROJECT_ROOT/lib/lib-review-loop"
+
+# ---- configuration -------------------------------------------------------
+
+TIMEOUT=120  # seconds per agent invocation
+ALL_AGENTS="claude codex gemini copilot"
+
+# ---- usage ---------------------------------------------------------------
+
+usage() {
+    cat <<'EOF'
+Usage: test/smoke [OPTIONS] [agent...]
+
+Run real AI agents against a fixture repo to verify flag acceptance and
+basic read/write functionality.  No arguments tests all installed agents.
+
+Options:
+  --timeout SECONDS   Per-test timeout (default: 120)
+  -h, --help          Show this help message
+
+Examples:
+  test/smoke                   # all installed agents
+  test/smoke claude            # only Claude
+  test/smoke --timeout 180     # longer timeout
+EOF
+    exit 0
+}
+
+# ---- argument parsing ----------------------------------------------------
+
+requested_agents=()
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --timeout)
+            [[ $# -ge 2 ]] || { echo "ERROR: --timeout requires a value"; exit 1; }
+            TIMEOUT="$2"; shift 2 ;;
+        -h|--help) usage ;;
+        -*) echo "Unknown option: $1"; usage ;;
+        *) requested_agents+=("$1"); shift ;;
+    esac
+done
+
+# ---- detect installed agents ---------------------------------------------
+
+installed=()
+for agent in $ALL_AGENTS; do
+    if command -v "$agent" &>/dev/null; then
+        installed+=("$agent")
+    fi
+done
+
+if [[ ${#installed[@]} -eq 0 ]]; then
+    echo "No AI agents found on PATH. Install at least one of: $ALL_AGENTS"
+    exit 1
+fi
+
+# Filter to requested agents
+if [[ ${#requested_agents[@]} -gt 0 ]]; then
+    agents=()
+    for req in "${requested_agents[@]}"; do
+        found=false
+        for inst in "${installed[@]}"; do
+            if [[ "$req" == "$inst" ]]; then
+                agents+=("$req")
+                found=true
+                break
+            fi
+        done
+        if ! $found; then
+            echo "WARNING: '$req' is not installed — skipping"
+        fi
+    done
+else
+    agents=("${installed[@]}")
+fi
+
+if [[ ${#agents[@]} -eq 0 ]]; then
+    echo "No testable agents remain after filtering."
+    exit 1
+fi
+
+echo ""
+echo "Smoke tests"
+echo "==========="
+echo "Agents  : ${agents[*]}"
+echo "Timeout : ${TIMEOUT}s per test"
+
+# ---- fixture setup -------------------------------------------------------
+
+# Create a temporary git repo with a staged Python file that has an obvious
+# bug (no zero-division guard).  This gives agents real context to review
+# and a clear edit to make.
+create_fixture() {
+    local tmpdir
+    tmpdir=$(mktemp -d "${TMPDIR:-/tmp}/smoke-test.XXXXXX")
+
+    git -C "$tmpdir" init -q
+    git -C "$tmpdir" config user.name "ai-coding-setup smoke"
+    git -C "$tmpdir" config user.email "smoke-tests@example.invalid"
+    git -C "$tmpdir" commit --allow-empty -q -m "initial"
+
+    cat > "$tmpdir/calculator.py" <<'PYEOF'
+def add(a, b):
+    return a + b
+
+
+def divide(a, b):
+    return a / b
+PYEOF
+
+    git -C "$tmpdir" add calculator.py
+    git -C "$tmpdir" commit -q -m "add calculator module"
+
+    # Now make a staged change: add a multiply function (also without guard)
+    cat >> "$tmpdir/calculator.py" <<'PYEOF'
+
+
+def multiply(a, b):
+    return a * b
+PYEOF
+
+    git -C "$tmpdir" add calculator.py
+    echo "$tmpdir"
+}
+
+# ---- test runner ---------------------------------------------------------
+
+PASS=0
+FAIL=0
+RESULTS=()
+
+# Run a single smoke test.
+#   $1 — test name (for display)
+#   $2 — agent name
+#   $3 — prompt text
+#   $4 — tools (comma-separated, for Claude; ignored by others)
+#   $5 — verification function name (called after agent completes)
+#   $6 — fixture directory (agent runs here)
+run_smoke_test() {
+    local name="$1" agent="$2" prompt="$3" tools="$4" verify_fn="$5" fixture="$6"
+
+    local start_time elapsed exit_code=0
+    start_time=$(date +%s)
+
+    # Run agent in a subshell with the fixture as working directory.
+    # Capture combined output for diagnostics on failure.
+    local output_file="$fixture/.smoke-output.log"
+    (
+        cd "$fixture"
+        run_agent "$agent" "$prompt" "$tools"
+    ) > "$output_file" 2>&1 &
+    local agent_pid=$!
+
+    # Portable timeout: background a watchdog that kills the agent.
+    (
+        sleep "$TIMEOUT"
+        kill "$agent_pid" 2>/dev/null
+    ) &
+    local timer_pid=$!
+
+    wait "$agent_pid" 2>/dev/null || exit_code=$?
+
+    # Cancel the timer (may already be gone)
+    kill "$timer_pid" 2>/dev/null || true
+    wait "$timer_pid" 2>/dev/null || true
+
+    elapsed=$(( $(date +%s) - start_time ))
+
+    # Check if we timed out
+    if [[ $elapsed -ge $TIMEOUT ]]; then
+        echo -e "  ${YELLOW}TIMEOUT${NC}  $name  (>${TIMEOUT}s)"
+        RESULTS+=("TIMEOUT  $name")
+        ((FAIL++))
+        return 1
+    fi
+
+    # Check agent exit code
+    if [[ $exit_code -ne 0 ]]; then
+        echo -e "  ${RED}FAIL${NC}     $name  (${elapsed}s, exit $exit_code)"
+        # Show last few lines of output for debugging
+        if [[ -f "$output_file" ]]; then
+            echo "           --- last 5 lines of output ---"
+            tail -5 "$output_file" | sed 's/^/           /'
+        fi
+        RESULTS+=("FAIL     $name  (exit $exit_code)")
+        ((FAIL++))
+        return 1
+    fi
+
+    # Run the verification function
+    local verify_result
+    verify_result=$( "$verify_fn" "$fixture" 2>&1 ) || {
+        echo -e "  ${RED}FAIL${NC}     $name  (${elapsed}s) — $verify_result"
+        RESULTS+=("FAIL     $name  — $verify_result")
+        ((FAIL++))
+        return 1
+    }
+
+    echo -e "  ${GREEN}PASS${NC}     $name  (${elapsed}s)"
+    RESULTS+=("PASS     $name")
+    ((PASS++))
+    return 0
+}
+
+# ---- verification functions ----------------------------------------------
+
+# Verify that agent-code-review.md was created in the fixture directory.
+verify_review_file() {
+    local fixture="$1"
+    if [[ -f "$fixture/agent-code-review.md" ]]; then
+        return 0
+    else
+        echo "agent-code-review.md was not created"
+        return 1
+    fi
+}
+
+# Verify that calculator.py was modified (unstaged changes or re-staged).
+verify_file_modified() {
+    local fixture="$1"
+    if git -C "$fixture" diff --name-only | grep -q calculator.py ||
+       git -C "$fixture" diff --staged --name-only | grep -q calculator.py; then
+        return 0
+    else
+        echo "calculator.py was not modified"
+        return 1
+    fi
+}
+
+# ---- prompts -------------------------------------------------------------
+
+REVIEWER_PROMPT="Review the staged git changes (run: git diff --staged).
+
+Write your findings to agent-code-review.md with this structure:
+## Summary
+One paragraph overview.
+
+## Issues
+List any issues found.
+
+High: <count>
+Medium: <count>
+Low: <count>
+
+Verdict: good to go  OR  Verdict: needs work
+
+Keep it concise. Do not modify any source files."
+
+EDITOR_PROMPT="Read calculator.py. The divide function has no guard against division by zero.
+
+Add a check: if b is zero, raise a ValueError with the message 'Cannot divide by zero'.
+Edit only the divide function. Do not create new files."
+
+REVIEWER_TOOLS="Read,Write,Bash,Grep,Glob"
+EDITOR_TOOLS="Edit,Read,Write,Bash,Grep,Glob"
+
+# ---- run tests -----------------------------------------------------------
+
+for agent in "${agents[@]}"; do
+    echo ""
+    echo "--- $agent ---"
+
+    # Reviewer test
+    fixture=$(create_fixture)
+    run_smoke_test "$agent reviewer" "$agent" "$REVIEWER_PROMPT" "$REVIEWER_TOOLS" \
+        verify_review_file "$fixture" || true
+    rm -rf "$fixture"
+
+    # Editor test
+    fixture=$(create_fixture)
+    run_smoke_test "$agent editor" "$agent" "$EDITOR_PROMPT" "$EDITOR_TOOLS" \
+        verify_file_modified "$fixture" || true
+    rm -rf "$fixture"
+done
+
+# ---- summary -------------------------------------------------------------
+
+total=$((PASS + FAIL))
+echo ""
+echo "==========="
+echo "Results: $PASS passed, $FAIL failed out of $total tests"
+echo ""
+
+for r in "${RESULTS[@]}"; do
+    echo "  $r"
+done
+
+echo ""
+if [[ $FAIL -gt 0 ]]; then
+    exit 1
+fi
diff --git a/test/test_helper.bash b/test/test_helper.bash
new file mode 100644
index 0000000..f975458
--- /dev/null
+++ b/test/test_helper.bash
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Shared test helper for BATS tests.
+# Loaded via `load test_helper` at the top of each .bats file.
+
+# Absolute path to the project root
+PROJECT_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
+
+# Load BATS helper libraries
+load "$PROJECT_ROOT/test/bats/bats-support/load"
+load "$PROJECT_ROOT/test/bats/bats-assert/load"
+
+# ---- per-test setup/teardown ---------------------------------------------
+
+setup() {
+    TEST_TMPDIR="$(mktemp -d)"
+    export TEST_TMPDIR
+
+    # Override HOME so config file tests are isolated
+    export REAL_HOME="$HOME"
+    export HOME="$TEST_TMPDIR/home"
+    mkdir -p "$HOME"
+}
+
+teardown() {
+    rm -rf "$TEST_TMPDIR"
+}
+
+# ---- helpers -------------------------------------------------------------
+
+# Source the shared library in a clean state (reset double-source guard).
+source_lib() {
+    unset _LIB_REVIEW_LOOP_LOADED
+    # shellcheck source=lib/lib-review-loop
+    source "$PROJECT_ROOT/lib/lib-review-loop"
+}

From 5b4c3cbf6c8bcef080b4b675643cd39b3615be72 Mon Sep 17 00:00:00 2001
From: Rex Lorenzo <rexlorenzo@gmail.com>
Date: Fri, 20 Mar 2026 17:20:51 -0700
Subject: [PATCH 2/3] fix(ci): check out git submodules for BATS tests

---
 .github/workflows/lint.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index cf826ac..248ff67 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -13,6 +13,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          submodules: true
       - uses: actions/setup-python@v5
         with:
           python-version: "3.x"

From 10bc6179db402155fb80dedb55cc194e097ea79c Mon Sep 17 00:00:00 2001
From: Rex Lorenzo <rexlorenzo@gmail.com>
Date: Fri, 20 Mar 2026 20:36:51 -0700
Subject: [PATCH 3/3] fix(test): disable GPG signing in fixture repo, fix
 timing race in format_elapsed test

Prevents smoke tests from failing when commit.gpgsign=true is set
globally. Allows 1-second tolerance in format_elapsed assertion to
avoid flaky test from clock skew between date calls.
---
 test/lib-review-loop.bats | 9 +++++----
 test/smoke                | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/lib-review-loop.bats b/test/lib-review-loop.bats
index 6c45486..1e47654 100644
--- a/test/lib-review-loop.bats
+++ b/test/lib-review-loop.bats
@@ -260,8 +260,9 @@ EOF
 
 @test "format_elapsed computes minutes and seconds" {
     source_lib
-    local start
-    start=$(( $(date +%s) - 125 ))
-    run format_elapsed "$start"
-    assert_output "2m 5s"
+    local now
+    now=$(date +%s)
+    run format_elapsed $(( now - 125 ))
+    # Allow for 1-second clock skew between date calls
+    assert_output --regexp "^2m [56]s$"
 }
diff --git a/test/smoke b/test/smoke
index 7c39c3c..15bad42 100755
--- a/test/smoke
+++ b/test/smoke
@@ -118,6 +118,7 @@ create_fixture() {
     git -C "$tmpdir" init -q
     git -C "$tmpdir" config user.name "ai-coding-setup smoke"
     git -C "$tmpdir" config user.email "smoke-tests@example.invalid"
+    git -C "$tmpdir" config commit.gpgsign false
     git -C "$tmpdir" commit --allow-empty -q -m "initial"
 
     cat > "$tmpdir/calculator.py" <<'PYEOF'