Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v5
with:
python-version: "3.x"
Expand Down
9 changes: 9 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[submodule "test/bats/bats-core"]
path = test/bats/bats-core
url = https://github.com/bats-core/bats-core.git
[submodule "test/bats/bats-support"]
path = test/bats/bats-support
url = https://github.com/bats-core/bats-support.git
[submodule "test/bats/bats-assert"]
path = test/bats/bats-assert
url = https://github.com/bats-core/bats-assert.git
2 changes: 2 additions & 0 deletions .markdownlint-cli2.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
config:
MD013: false # Line length — instructional prose runs long intentionally
ignores:
- "test/fixtures/**"
18 changes: 18 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,21 @@ repos:
rev: v0.21.0
hooks:
- id: markdownlint-cli2

- repo: local
hooks:
- id: bats-unit-tests
name: BATS unit tests
entry: test/run
language: script
always_run: true
pass_filenames: false
stages: [pre-commit]

- id: smoke-tests
name: Smoke tests (real agents)
entry: test/smoke
language: script
always_run: true
pass_filenames: false
stages: [pre-push]
Comment thread
rlorenzo marked this conversation as resolved.
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,37 @@ The setup script only manages commands it originally installed.

## Contributing

### Running Tests

The test suite uses [BATS](https://github.com/bats-core/bats-core) (Bash Automated Testing System). After cloning with submodules:

```bash
git clone --recurse-submodules https://github.com/rlorenzo/ai-coding-setup.git
cd ai-coding-setup
test/run
```

If you already cloned without submodules:

```bash
git submodule update --init --recursive
test/run
```

Unit tests (`test/run`) cover config parsing, prompt loading, validation, and review status checks. They run in seconds and need no API keys.

### Smoke Tests

Smoke tests run real AI agents against a temporary git repo to verify that CLI flags are accepted and agents can perform basic read/write tasks:

```bash
test/smoke # test all installed agents
test/smoke claude codex # test specific agents
test/smoke --timeout 180 # override per-test timeout (default: 120s)
```

Each installed agent is tested as both editor (can it modify a file?) and reviewer (does it produce a review file?). Requires at least one AI tool installed and authenticated.

### Pre-commit hooks (optional)

This repo uses [pre-commit](https://pre-commit.com/) to run linters locally before each commit. Install it once and you'll get automatic checks for shell scripts (shellcheck), markdown (markdownlint), and TOML syntax.
Expand Down
28 changes: 1 addition & 27 deletions bin/code-review-loop
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Options:
-r, --reviewer AGENT Agent for code review (default: codex)
-h, --help Show this help message

Agents: claude, codex, gemini
Agents: claude, codex, gemini, copilot
EOF
exit 0
}
Expand Down Expand Up @@ -84,32 +84,6 @@ REVIEWER_TOOLS="Read,Write,Bash,Grep,Glob"

# ---- helpers -------------------------------------------------------------

test_review_clean() {
[[ -f "$REVIEW_FILE" ]] || return 1
local content
content=$(<"$REVIEW_FILE")

# Only an explicit "Verdict: good to go" line signals clean — a bare
# substring match could false-positive on "not good to go"
if echo "$content" | grep -qiE "verdict[[:space:]]*:[[:space:]]*good to go"; then
return 0
fi

return 1
}

get_review_issue_counts() {
[[ -f "$REVIEW_FILE" ]] || { echo "No review file"; return; }
local content high medium low
content=$(<"$REVIEW_FILE")

high=$(echo "$content" | grep -oE "High[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
medium=$(echo "$content" | grep -oE "Medium[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
low=$(echo "$content" | grep -oE "Low[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true

echo "High: ${high:-?}, Medium: ${medium:-?}, Low: ${low:-?}"
}

stage_review_changes() {
# Re-stage files that are in the review scope (partially staged files are
# rejected at startup, so git add here is safe)
Expand Down
60 changes: 3 additions & 57 deletions bin/plan-review-loop
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Options:
-r, --reviewer AGENT Agent for plan review (default: codex)
-h, --help Show this help message

Agents: claude, codex, gemini
Agents: claude, codex, gemini, copilot

Examples:
plan-review-loop PLAN-feature.md
Expand Down Expand Up @@ -97,60 +97,6 @@ REVIEWER_FOLLOWUP_PROMPT="$PROMPTS_DIR/plan-review-followup.md"
EDITOR_TOOLS="Read,Write,Edit,Grep,Glob"
REVIEWER_TOOLS="Read,Write,Grep,Glob"

# ---- helpers -------------------------------------------------------------

test_reviewer_satisfied() {
[[ -f "$FEEDBACK_FILE" ]] || return 1
# Match sentinel as a standalone line to avoid false positives from
# feedback that merely mentions the token in explanatory text
local trimmed
trimmed=$(sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' -e '/^$/d' "$FEEDBACK_FILE")
[[ "$trimmed" == "NO_FURTHER_FEEDBACK" ]]
}

build_improvement_prompt() {
local plan_path="$1" feedback_path="$2" reviewer_name="$3" cycle="$4" max_cycles="$5"
cat <<PROMPT
You are a senior software architect improving a plan document based on reviewer feedback.

## Your Task

1. **Read** the plan file: $plan_path
2. **Read** the feedback file: $feedback_path
3. **Evaluate** each feedback point critically. You are NOT obligated to accept every suggestion. Use your judgment:
- Accept and implement feedback that genuinely improves completeness, feasibility, clarity, or risk coverage.
- Decline feedback that is subjective preference, out of scope, or would degrade the plan.
4. **Edit** the plan file directly to incorporate valid improvements. Do not rewrite sections that are already well-structured unless the feedback identifies a real gap.
5. **Write** a response to $feedback_path (overwrite it) explaining your actions:
- For each accepted point: briefly describe what was changed and where.
- For each declined point: explain why (e.g., out of scope, already covered, disagree with reasoning).

## Response Format

Use these markers for each feedback item in your response:

- Accepted: \`✅ Implemented: [brief description of what was changed]\`
- Deferred: \`📝 Response: [reason for deferring, alternatives]\`
- Declined: \`❌ Clarification: [reason for disagreeing]\`

## Guidelines

- Preserve the plan's existing structure and formatting conventions.
- Do not add unnecessary boilerplate or padding.
- Focus on substance: missing edge cases, risk gaps, unclear requirements, feasibility concerns.
- If the reviewer's feedback is entirely minor or already addressed, say so clearly in your response.
- This is cycle $cycle of $max_cycles with the $reviewer_name reviewer.

## Output

Your only output files are:
- The improved plan file (edited in place)
- The feedback response file (overwritten with your response)

Do not create any other files. Do not stage or commit anything.
PROMPT
}

# ---- validation ----------------------------------------------------------
if [[ ! -f "$PLAN_FILE_PATH" ]]; then
echo -e "${RED}ERROR: Plan file not found: $PLAN_FILE_PATH${NC}"
Expand All @@ -172,8 +118,8 @@ echo -e "${MAGENTA} Plan Review Loop${NC}"
echo -e "${MAGENTA}========================================${NC}"
echo " Plan file : $PLAN_FILE"
echo " Max iterations : $MAX_ITERATIONS"
echo " Editor : $EDITOR_AGENT"
echo " Reviewer : $REVIEWER_AGENT"
echo " Editor : $EDITOR_AGENT"
echo " Reviewer : $REVIEWER_AGENT"

start_time=$(date +%s)
iteration_data=""
Expand Down
95 changes: 95 additions & 0 deletions lib/lib-review-loop
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,101 @@ validate_prompts() {
fi
}

# ---- review status checks ------------------------------------------------

# Check whether a code review is clean (no actionable issues).
# Requires REVIEW_FILE to be set by the caller.
# Returns 0 if the review file contains "Verdict: good to go", 1 otherwise.
test_review_clean() {
[[ -f "$REVIEW_FILE" ]] || return 1
local content
content=$(<"$REVIEW_FILE")

# Only an explicit "Verdict: good to go" line signals clean — a bare
# substring match could false-positive on "not good to go"
if echo "$content" | grep -qiE "verdict[[:space:]]*:[[:space:]]*good to go"; then
Comment thread
rlorenzo marked this conversation as resolved.
return 0
fi

return 1
}

# Extract issue counts from a code review file.
# Requires REVIEW_FILE to be set by the caller.
# Prints: "High: N, Medium: N, Low: N" (or ? for missing counts)
get_review_issue_counts() {
[[ -f "$REVIEW_FILE" ]] || { echo "No review file"; return; }
Comment thread
rlorenzo marked this conversation as resolved.
local content high medium low
content=$(<"$REVIEW_FILE")

high=$(echo "$content" | grep -oE "High[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
medium=$(echo "$content" | grep -oE "Medium[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
low=$(echo "$content" | grep -oE "Low[[:space:]]*:?[[:space:]]*[0-9]+" | grep -oE "[0-9]+" | head -1) || true
Comment thread
rlorenzo marked this conversation as resolved.

echo "High: ${high:-?}, Medium: ${medium:-?}, Low: ${low:-?}"
}

# Check whether a plan reviewer is satisfied (no further feedback).
# Requires FEEDBACK_FILE to be set by the caller.
# Returns 0 if the feedback file contains only "NO_FURTHER_FEEDBACK", 1 otherwise.
test_reviewer_satisfied() {
[[ -f "$FEEDBACK_FILE" ]] || return 1
# Match sentinel as a standalone line to avoid false positives from
# feedback that merely mentions the token in explanatory text
local trimmed
trimmed=$(sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' -e '/^$/d' "$FEEDBACK_FILE")
[[ "$trimmed" == "NO_FURTHER_FEEDBACK" ]]
}

# Build the improvement prompt for the plan editor.
# $1 — plan file path
# $2 — feedback file path
# $3 — reviewer agent name
# $4 — current cycle number
# $5 — max cycles
build_improvement_prompt() {
local plan_path="$1" feedback_path="$2" reviewer_name="$3" cycle="$4" max_cycles="$5"
cat <<PROMPT
You are a senior software architect improving a plan document based on reviewer feedback.

## Your Task

1. **Read** the plan file: $plan_path
2. **Read** the feedback file: $feedback_path
3. **Evaluate** each feedback point critically. You are NOT obligated to accept every suggestion. Use your judgment:
- Accept and implement feedback that genuinely improves completeness, feasibility, clarity, or risk coverage.
- Decline feedback that is subjective preference, out of scope, or would degrade the plan.
4. **Edit** the plan file directly to incorporate valid improvements. Do not rewrite sections that are already well-structured unless the feedback identifies a real gap.
5. **Write** a response to $feedback_path (overwrite it) explaining your actions:
- For each accepted point: briefly describe what was changed and where.
- For each declined point: explain why (e.g., out of scope, already covered, disagree with reasoning).

## Response Format

Use these markers for each feedback item in your response:

- Accepted: \`✅ Implemented: [brief description of what was changed]\`
- Deferred: \`📝 Response: [reason for deferring, alternatives]\`
- Declined: \`❌ Clarification: [reason for disagreeing]\`

## Guidelines

- Preserve the plan's existing structure and formatting conventions.
- Do not add unnecessary boilerplate or padding.
- Focus on substance: missing edge cases, risk gaps, unclear requirements, feasibility concerns.
- If the reviewer's feedback is entirely minor or already addressed, say so clearly in your response.
- This is cycle $cycle of $max_cycles with the $reviewer_name reviewer.

## Output

Your only output files are:
- The improved plan file (edited in place)
- The feedback response file (overwritten with your response)

Do not create any other files. Do not stage or commit anything.
PROMPT
}

# ---- temp directory with cleanup -----------------------------------------

# Create a temp directory and register a cleanup trap.
Expand Down
1 change: 1 addition & 0 deletions test/bats/bats-assert
Submodule bats-assert added at 697471
1 change: 1 addition & 0 deletions test/bats/bats-core
Submodule bats-core added at d9faff
1 change: 1 addition & 0 deletions test/bats/bats-support
Submodule bats-support added at 0954ab
35 changes: 35 additions & 0 deletions test/code-review-loop.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bats
# Tests for bin/code-review-loop — argument parsing and validation.

load test_helper

BIN="$PROJECT_ROOT/bin/code-review-loop"

@test "code-review-loop --help prints usage and exits 0" {
run "$BIN" --help
assert_success
assert_output --partial "Usage: code-review-loop"
}

@test "code-review-loop -h prints usage and exits 0" {
run "$BIN" -h
assert_success
assert_output --partial "Usage: code-review-loop"
}

@test "code-review-loop rejects unknown options" {
run "$BIN" --bogus
assert_output --partial "Unknown option"
}

@test "code-review-loop --max-iterations without value shows error" {
run "$BIN" -m
Comment thread
rlorenzo marked this conversation as resolved.
assert_output --partial "requires a value"
}

@test "code-review-loop rejects invalid agent name" {
# Prompt files won't exist, but agent validation happens first
run "$BIN" --editor gpt4
assert_failure
assert_output --partial "Unknown agent"
}
5 changes: 5 additions & 0 deletions test/fixtures/prompt-blank-lines.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@


Indented content here.

Another line.
1 change: 1 addition & 0 deletions test/fixtures/prompt-no-frontmatter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Review the code for correctness and clarity.
6 changes: 6 additions & 0 deletions test/fixtures/prompt-with-frontmatter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
name: test-prompt
description: A test prompt with frontmatter
---

Review the code for correctness and clarity.
9 changes: 9 additions & 0 deletions test/fixtures/review-clean.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Code Review

All checks passed. No issues found.

Verdict: good to go

High: 0
Medium: 0
Low: 0
9 changes: 9 additions & 0 deletions test/fixtures/review-issues.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Code Review

Several issues found during review.

Verdict: needs work

High: 2
Medium: 3
Low: 1
Loading
Loading