From 20249443d1030a4e218b949835d7e72f900349d4 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Thu, 1 Jan 2026 13:39:50 -0800 Subject: [PATCH 01/13] Docs: codify PR review loop + CodeRabbit comment extraction Add mandatory procedures for PR submission and review comment triage, plus a helper script to extract actionable comments from the PR head diff. --- .../scripts/extract-actionable-comments.sh | 221 ++++++++++++++++++ CONTRIBUTING.md | 1 + docs/decision-log.md | 1 + docs/execution-plan.md | 8 + docs/procedures/EXTRACT-PR-COMMENTS.md | 212 +++++++++++++++++ docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md | 121 ++++++++++ 6 files changed, 564 insertions(+) create mode 100755 .github/scripts/extract-actionable-comments.sh create mode 100644 docs/procedures/EXTRACT-PR-COMMENTS.md create mode 100644 docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh new file mode 100755 index 00000000..3286b64c --- /dev/null +++ b/.github/scripts/extract-actionable-comments.sh @@ -0,0 +1,221 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: Apache-2.0 +# © James Ross Ω FLYING•ROBOTS + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: + .github/scripts/extract-actionable-comments.sh [--repo OWNER/REPO] [--out ] [--full] + +Purpose: + Extract actionable (fresh) CodeRabbitAI/GitHub review comments for a PR by + filtering comments pinned to the PR head commit and grouping by staleness. + +Outputs: + - Writes a Markdown report to stdout by default. + - Also writes the raw comments JSON and intermediate files to /tmp. + +Options: + --repo OWNER/REPO Override repo (default: current repo via `gh repo view`) + --out Write the Markdown report to (also prints to stdout) + --full Include full comment bodies for actionable comments + +Examples: + .github/scripts/extract-actionable-comments.sh 176 + .github/scripts/extract-actionable-comments.sh 176 --out /tmp/pr-176-report.md + .github/scripts/extract-actionable-comments.sh 176 --full +EOF +} + +require_cmd() { + local cmd="$1" + command -v "$cmd" >/dev/null 2>&1 || { echo "Missing dependency: $cmd" >&2; exit 2; } +} + +PR_NUMBER="${1:-}" +shift || true +if [[ -z "${PR_NUMBER}" ]]; then + usage >&2 + exit 2 +fi + +REPO="" +OUT="" +FULL=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo) + REPO="${2:-}" + shift 2 + ;; + --out) + OUT="${2:-}" + shift 2 + ;; + --full) + FULL=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +require_cmd gh +require_cmd jq + +if [[ -z "$REPO" ]]; then + REPO="$(gh repo view --json nameWithOwner --jq '.nameWithOwner')" +fi + +OWNER="${REPO%/*}" +NAME="${REPO#*/}" + +HEAD_SHA="$(gh pr view "$PR_NUMBER" --repo "$REPO" --json headRefOid --jq '.headRefOid')" +HEAD7="${HEAD_SHA:0:7}" + +TS="$(date +%s)" +RAW="/tmp/pr-${PR_NUMBER}-comments-${TS}.json" +LATEST="/tmp/pr-${PR_NUMBER}-latest-${TS}.json" +REPORT="/tmp/pr-${PR_NUMBER}-report-${TS}.md" + +gh api "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/comments" --paginate > "$RAW" + +# Filter: top-level comments pinned to the current PR head commit. +jq --arg commit "$HEAD7" ' + [ .[] | + select(.in_reply_to_id == null and .commit_id[0:7] == $commit) | + { + id, + path, + line, + current_commit: (.commit_id[0:7]), + original_commit: (.original_commit_id[0:7]), + is_stale: (.commit_id != .original_commit_id), + has_ack: (.body | contains("✅ Addressed in commit")), + priority: ( + if (.body | contains("🔴 Critical")) then "P0" + elif (.body | contains("🟠 Major")) then "P1" + elif (.body | contains("🟡 Minor")) then "P2" + else "P3" + end + ), + title: ( + (.body + | split("\n") + | map(select(. != "")) + | .[0] // "UNTITLED" + ) + | gsub("\\*\\*"; "") + | .[0:80] + ), + body: .body + } + ] +' "$RAW" > "$LATEST" + +fresh_count="$(jq '[.[] | select(.is_stale == false and .has_ack == false)] | length' "$LATEST")" +stale_count="$(jq '[.[] | select(.is_stale == true)] | length' "$LATEST")" +ack_count="$(jq '[.[] | select(.has_ack == true)] | length' "$LATEST")" + +{ + echo "# CodeRabbitAI/GitHub Actionables — PR #${PR_NUMBER}" + echo + echo "- Repo: \`${REPO}\`" + echo "- PR head: \`${HEAD7}\`" + echo "- Generated: \`$(date -u +"%Y-%m-%dT%H:%M:%SZ")\`" + echo + + echo "## Summary" + echo + echo "- Fresh actionable: **${fresh_count}**" + echo "- Stale (verify): **${stale_count}**" + echo "- Acknowledged (✅ Addressed): **${ack_count}**" + echo + + echo "## Stale / Verify" + echo + jq -r ' + .[] + | select(.is_stale == true) + | "- [ ] \(.path):\(.line // 1) — \(.title) (original: \(.original_commit)) [id=\(.id)]" + ' "$LATEST" + echo + + echo "## Acknowledged" + echo + jq -r ' + .[] + | select(.has_ack == true) + | "- [ ] \(.path):\(.line // 1) — \(.title) (acknowledged) [id=\(.id)]" + ' "$LATEST" + echo + + echo "## Actionable (Fresh)" + echo + + # Sort: priority, then path, then line (null line => 0). + jq -r ' + def pnum(p): + if p == "P0" then 0 + elif p == "P1" then 1 + elif p == "P2" then 2 + else 3 + end; + + [ .[] + | select(.is_stale == false and .has_ack == false) + ] + | sort_by([pnum(.priority), .path, (.line // 0)]) + | .[] + | "- [ ] [\(.priority)] \(.path):\(.line // 1) — \(.title) [id=\(.id)]" + ' "$LATEST" + echo + + if [[ "$FULL" -eq 1 ]]; then + echo "## Full Comment Bodies (Actionable)" + echo + jq -r ' + def pnum(p): + if p == "P0" then 0 + elif p == "P1" then 1 + elif p == "P2" then 2 + else 3 + end; + + [ .[] + | select(.is_stale == false and .has_ack == false) + ] + | sort_by([pnum(.priority), .path, (.line // 0)]) + | .[] + | "### \(.path):\(.line // 1) [id=\(.id)]\n\n```\n\(.body)\n```\n" + ' "$LATEST" + else + echo "## Next Step" + echo + echo "Run with \`--full\` to include full comment bodies for the actionable set." + fi + + echo + echo "## Artifacts" + echo + echo "- Raw comments: \`${RAW}\`" + echo "- Filtered latest: \`${LATEST}\`" + echo "- Report: \`${REPORT}\`" +} | tee "$REPORT" + +if [[ -n "$OUT" ]]; then + mkdir -p "$(dirname "$OUT")" + cp "$REPORT" "$OUT" +fi + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ce046506..1e8abf91 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,6 +37,7 @@ Echo is a deterministic, renderer-agnostic engine. We prioritize: - Keep `main` pristine. Create feature branches like `echo/` or `timeline/`. - Before starting work, ensure `git status` is clean. If not, resolve or coordinate with the human operator. - Before each session, update the “Today’s Intent” section in `docs/execution-plan.md` so future collaborators can follow the timeline. +- PR review loops are procedural: follow `docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md` and use `docs/procedures/EXTRACT-PR-COMMENTS.md` to extract actionable CodeRabbitAI feedback per round. ## Testing Expectations - Write tests before or alongside code changes. diff --git a/docs/decision-log.md b/docs/decision-log.md index 0ff9e388..ce47444c 100644 --- a/docs/decision-log.md +++ b/docs/decision-log.md @@ -6,6 +6,7 @@ | Date | Context | Decision | Rationale | Consequence | | ---- | ------- | -------- | --------- | ----------- | +| 2026-01-01 | PR hygiene: review-loop determinism | Standardize CodeRabbitAI review triage by pinning “actionable comments” extraction to the PR head commit, and automate it with a script that groups stale vs fresh feedback. | GitHub carries review comments forward, which makes it easy to waste time on already-fixed feedback. Treating comment extraction as a procedure (and generating a report) keeps review loops deterministic and prevents ambiguity across rounds. | PR fix batches start from a clean, reproducible actionables list; stale comments are verified against current code before any work is done; review iteration cost drops. | | 2025-12-30 | PR #164 review follow-ups (WVP demo robustness; related: WVP demo path entry below) | Reset per-connection publish state on successful connect (force snapshot before diffs), harden handshake/outbound encoding failures (log + bail instead of writing empty packets), and add missing rustdoc/warn logs surfaced by review. | Review comments flagged a reconnect publish dead-end and “silent failure” risks; making failures visible and forcing snapshot-on-reconnect keeps the demo deterministic, debuggable, and aligned with missing-docs policy. | Publishing resumes after reconnect without manual toggles; protocol encode failures no longer silently write empty packets; workspace `-D warnings -D missing_docs` remains green. | | 2025-12-30 | WARP View Protocol demo path (hub + 2 viewers) | Add a bidirectional tool connection (`connect_channels_for_bidir`) so tools can publish `warp_stream` frames, and wire `warp-viewer` with publish/receive toggles plus a deterministic demo mutation (“pulse”) that emits gapless snapshot+diff streams. Document the workflow in `docs/guide/wvp-demo.md` and mark WVP checklist items complete in `docs/tasks.md`. | WVP needs an end-to-end “real” demo (not just a spec): tools must be able to publish without dragging async runtimes into UI code, and publisher/subscriber roles must be testable locally. A deterministic, viewer-native mutation is the smallest way to exercise the protocol without coupling to the engine rewrite loop yet. | You can run a local hub and watch two viewers share WARP changes (publisher pulses, subscriber updates). Client and hub error surfaces are explicit via notifications/toasts; epoch gaps and forbidden publishes fail fast. Remaining follow-up: a dedicated integration test harness for multi-client loopback if/when we formalize resync/acks. | | 2025-12-30 | Stage B1 usability/docs: make descent-chain law and portal-chain slicing concrete | Expand `docs/spec-warp-core.md` with worked Stage B1 examples (descent-chain reads → `Footprint.a_read` → patch `in_slots`, and portal-chain inclusion in Paper III slicing). Add `Engine::with_state(...) -> Result` so tools/tests can initialize an engine from an externally constructed multi-instance `WarpState` without exposing internal maps. | Stage B1 correctness depends on subtle invariants that are easy to “get almost right” without a concrete example; documentation must prevent “looks right, doesn’t slice.” A state-based engine constructor is the smallest ergonomic primitive that enables multi-instance fixtures and patch-replay workflows while keeping the rewrite hot path unchanged. | Contributors can reproduce and reason about B1 behavior quickly; multi-instance demos/tests can be built via patch replay + `Engine::with_state` rather than internal mutation; the portal-chain dependency is explicit in both docs and slice behavior. | diff --git a/docs/execution-plan.md b/docs/execution-plan.md index 784b29bd..6ac8956e 100644 --- a/docs/execution-plan.md +++ b/docs/execution-plan.md @@ -35,6 +35,14 @@ This is Codex’s working map for building Echo. Update it relentlessly—each s ## Today’s Intent +> 2026-01-01 — PR hygiene: standardize CodeRabbitAI review triage (COMPLETED) + +- Goal: make CodeRabbitAI review loops cheap and unambiguous by codifying how we extract actionable comments from the current PR head diff. +- Scope: + - Add mandatory procedures under `docs/procedures/` for PR submission and review comment extraction. + - Add a helper script `.github/scripts/extract-actionable-comments.sh` to automate “stale vs fresh” bucketing and produce a Markdown report. +- Exit criteria: a contributor can run one command and get a clean actionables list without re-reading the entire PR history. + > 2025-12-30 — Issue #163: WVP demo path (IN PROGRESS) - Goal: complete the WARP View Protocol demo path (publisher + subscriber) by adding outbound publish support to `echo-session-client` and wiring publish/subscribe toggles + a dirty publish loop in `warp-viewer`. diff --git a/docs/procedures/EXTRACT-PR-COMMENTS.md b/docs/procedures/EXTRACT-PR-COMMENTS.md new file mode 100644 index 00000000..a9d1cc14 --- /dev/null +++ b/docs/procedures/EXTRACT-PR-COMMENTS.md @@ -0,0 +1,212 @@ + + +# Procedure: Extract Actionable Comments from CodeRabbitAI PR Reviews + +This procedure is part of the required PR workflow for this repo. + +GitHub carries forward review comments across commits, so you must extract only the **currently actionable** feedback (not already-fixed or stale comments) before starting another fix batch. + +--- + +## Expected Workflow Context (Where This Fits) + +When you finish work: + +1. Push a branch and open a PR. +2. Wait for CI + CodeRabbitAI. +3. Extract actionable comments (this doc). +4. Fix issues in small commits + push. +5. Repeat until CodeRabbitAI (and any required human reviewer) approves. + +--- + +## Prerequisites + +- `gh` installed and authenticated +- `jq` installed +- Repo access to view PRs + +--- + +## Procedure + +### Step 1: Identify the PR head commit (the current diff) + +```bash +PR_NUMBER="" +LATEST_COMMIT="$(gh pr view "$PR_NUMBER" --json headRefOid --jq '.headRefOid[0:7]')" +echo "PR head commit: $LATEST_COMMIT" +``` + +Why: comment staleness is measured relative to the current PR head. + +--- + +### Step 2: Fetch all review comments (PR review threads) + +```bash +OWNER="" +REPO="" +TMPFILE="/tmp/pr-${PR_NUMBER}-comments-$(date +%s).json" +gh api "repos/${OWNER}/${REPO}/pulls/${PR_NUMBER}/comments" --paginate > "$TMPFILE" +``` + +--- + +### Step 3: Extract top-level comments pinned to the latest commit + +```bash +cat "$TMPFILE" | jq --arg commit "$LATEST_COMMIT" ' + .[] | + select(.in_reply_to_id == null and .commit_id[0:7] == $commit) | + { + id, + line, + path, + current_commit: .commit_id[0:7], + original_commit: .original_commit_id[0:7], + is_stale: (.commit_id != .original_commit_id), + created_at, + body_preview: (.body[0:200]) + } +' | jq -s '.' > /tmp/comments-latest.json +``` + +--- + +### Step 4: Identify stale vs fresh comments + +```bash +cat /tmp/comments-latest.json | jq ' + group_by(.is_stale) | + map({ + category: (if .[0].is_stale then "STALE" else "FRESH" end), + count: length, + comments: map({id, line, path, original_commit}) + }) +' +``` + +Key insight: +- If `is_stale == true`, the comment originated on an earlier commit and may already be fixed. + +--- + +### Step 5: Detect “Already Addressed” markers + +```bash +cat "$TMPFILE" | jq '.[] | + select(.body | contains("✅ Addressed in commit")) | + { + id, + line, + path, + fixed_in: (.body | capture("✅ Addressed in commit (?[a-f0-9]{7})").commit) + } +' +``` + +Key insight: +- If the comment contains a “✅ Addressed in commit …” marker, it’s no longer actionable. + +--- + +### Step 6: Categorize by priority (optional) + +This is only useful if CodeRabbitAI uses explicit priority markers in comment bodies. + +```bash +cat "$TMPFILE" | jq --arg commit "$LATEST_COMMIT" ' + .[] | + select( + .in_reply_to_id == null and + .commit_id[0:7] == $commit + ) | + { + id, + line, + path, + priority: ( + if (.body | contains("🔴 Critical")) then "P0" + elif (.body | contains("🟠 Major")) then "P1" + elif (.body | contains("🟡 Minor")) then "P2" + else "P3" + end + ), + is_stale: (.commit_id != .original_commit_id), + body + } +' | jq -s '.' > /tmp/prioritized-comments.json +``` + +--- + +### Step 7: Verify stale comments against current code (critical step) + +Do not trust `is_stale` alone. Verify: + +```bash +# 1) Inspect current state +git show "HEAD:" | sed -n ',p' + +# 2) Search history for fixes (if needed) +git log --all --oneline --grep="" +git log -p --all -S"" -- +``` + +--- + +### Step 8: Produce an issue report (batch) + +Create a batch checklist and work top-down: + +```bash +cat > /tmp/batch-N-issues.md << 'EOF' +# Batch N - CodeRabbitAI Issues + +## Stale (Verify / Already Fixed) +- [ ] Line XXX - Issue description (Fixed in: COMMIT_SHA) + +## P0 Critical +- [ ] Line XXX - Issue description + +## P1 Major +- [ ] Line XXX - Issue description + +## P2 Minor +- [ ] Line XXX - Issue description + +## P3 Trivial +- [ ] Line XXX - Issue description +EOF +``` + +--- + +### Step 9: Save full bodies for actionable issues + +```bash +cat /tmp/prioritized-comments.json | jq -r '.[] | + select(.is_stale == false) | + "# Comment ID: \(.id) - Line \(.line)\n\(.body)\n\n---\n\n" +' > /tmp/batch-N-full-comments.txt +``` + +--- + +## When CodeRabbitAI approval doesn’t unblock GitHub + +If CodeRabbitAI approved but GitHub still shows “changes requested”, nudge the bot: + +```text +@coderabbitai Please review the latest commit and clear the "changes requested" status since you have already approved the changes. +``` + +--- + +## Automation + +Use the helper script: + +- `.github/scripts/extract-actionable-comments.sh` + diff --git a/docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md b/docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md new file mode 100644 index 00000000..95f1b2bd --- /dev/null +++ b/docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md @@ -0,0 +1,121 @@ + + +# Procedure: PR Submission + CodeRabbitAI Review Loop + +This document defines the required end-to-end submission workflow for this repo. + +It is deliberately operational: follow it step-by-step and avoid “interpretation drift”. + +--- + +## Rules (Non‑Negotiable) + +1. No direct-to-`main` commits. +2. No admin bypass merges to skip required reviews. +3. CI green is required but not sufficient — review approval is a separate gate. +4. Iterate in small commits to reduce review ambiguity. + +--- + +## Workflow (Branch → PR → Review → Fix → Merge) + +### Step 0 — Start on a branch + +Prefer a clear prefix: + +- `docs/...` for docs-only changes +- `feat/...` for features +- `fix/...` for bug fixes +- `chore/...` for tooling/maintenance + +```bash +git checkout -b +``` + +--- + +### Step 1 — Push and open a PR + +```bash +git push -u origin +gh pr create --base main --head +``` + +--- + +### Step 2 — Wait for CI and CodeRabbitAI + +Watch checks: + +```bash +gh pr checks --watch +``` + +Then wait for CodeRabbitAI to comment. Do not merge “because CI is green”. + +--- + +### Step 3 — Extract actionable review feedback (required) + +Use: + +- `docs/procedures/EXTRACT-PR-COMMENTS.md` + +The outcome of this step should be a bucketed list of actionable items (P0/P1/P2/P3). + +--- + +### Step 4 — Fix issues in batches (commit + push) + +Work one bucket at a time: + +- P0: correctness / determinism / security +- P1: major design/API drift +- P2: minor issues / maintainability +- P3: nits + +For each batch: + +```bash +git commit -m "fix: " +git push +``` + +When replying in threads, prefer: + +> ✅ Addressed in commit `abc1234` + +This reduces stale-comment confusion in later rounds. + +--- + +### Step 5 — Repeat until approved + +Repeat Steps 2–4 until: + +- CI checks are green, and +- CodeRabbitAI is satisfied (approved or no unresolved actionables), and +- any required human reviewer has approved. + +--- + +### Step 6 — Merge only when approved + +If branch protection requires it, enable auto-merge: + +```bash +gh pr merge --auto --merge +``` + +--- + +## If CodeRabbitAI approved but GitHub stays blocked + +Sometimes the “changes requested” status lingers even after an approving review. + +Post this comment on the PR: + +```text +@coderabbitai Please review the latest commit and clear the "changes requested" status since you have already approved the changes. +``` + From 5b0963210403a4cbf1b841dcb64da844fa6932ba Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Thu, 1 Jan 2026 14:41:22 -0800 Subject: [PATCH 02/13] =?UTF-8?q?Tooling:=20treat=20'=E2=9C=85=20Addressed?= =?UTF-8?q?'=20replies=20as=20acknowledged?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the actionable-comments extractor to mark top-level review comments as acknowledged when any reply in the thread contains the '✅ Addressed in commit …' marker. --- .github/scripts/extract-actionable-comments.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index 3286b64c..7bceb1a6 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -93,6 +93,17 @@ gh api "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/comments" --paginate > "$RAW" # Filter: top-level comments pinned to the current PR head commit. jq --arg commit "$HEAD7" ' + def has_ack_marker(s): + (s | type) == "string" and (s | contains("✅ Addressed in commit")); + + # Replies are returned in the same list, with `in_reply_to_id` set. + # Treat a top-level comment as "acknowledged" if either: + # - the comment body itself contains the marker, OR + # - any reply to it contains the marker. + def ack_by_reply: + reduce .[] as $c ({}; if ($c.in_reply_to_id != null and has_ack_marker($c.body)) then .[($c.in_reply_to_id | tostring)] = true else . end); + + ($replies := ack_by_reply) | [ .[] | select(.in_reply_to_id == null and .commit_id[0:7] == $commit) | { @@ -102,7 +113,7 @@ jq --arg commit "$HEAD7" ' current_commit: (.commit_id[0:7]), original_commit: (.original_commit_id[0:7]), is_stale: (.commit_id != .original_commit_id), - has_ack: (.body | contains("✅ Addressed in commit")), + has_ack: (has_ack_marker(.body) or ($replies[(.id | tostring)] // false)), priority: ( if (.body | contains("🔴 Critical")) then "P0" elif (.body | contains("🟠 Major")) then "P1" @@ -218,4 +229,3 @@ if [[ -n "$OUT" ]]; then mkdir -p "$(dirname "$OUT")" cp "$REPORT" "$OUT" fi - From e6485f67030d164c61ee9d9a7484a99ddd711815 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Thu, 1 Jan 2026 14:42:11 -0800 Subject: [PATCH 03/13] Tooling: fix jq syntax for reply-ack detection Use jq 1.6-compatible variable binding when computing acknowledged-by-reply markers. --- .github/scripts/extract-actionable-comments.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index 7bceb1a6..c2282c99 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -103,7 +103,7 @@ jq --arg commit "$HEAD7" ' def ack_by_reply: reduce .[] as $c ({}; if ($c.in_reply_to_id != null and has_ack_marker($c.body)) then .[($c.in_reply_to_id | tostring)] = true else . end); - ($replies := ack_by_reply) | + (ack_by_reply) as $replies | [ .[] | select(.in_reply_to_id == null and .commit_id[0:7] == $commit) | { From e9b99e4391c0cb1d00d0030cedfdce4f138fa3b8 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Thu, 1 Jan 2026 14:53:48 -0800 Subject: [PATCH 04/13] chore: retrigger CodeRabbit No code changes; bump PR head to retrigger CodeRabbit status after rate-limit failures. From bed5fe3fc574751678d168442a9b4bda017cf4d1 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Fri, 2 Jan 2026 07:24:40 -0800 Subject: [PATCH 05/13] docs: include outdated review comments in extractor --- .../scripts/extract-actionable-comments.sh | 82 +++++++++++-------- docs/procedures/EXTRACT-PR-COMMENTS.md | 66 +++++++++------ 2 files changed, 90 insertions(+), 58 deletions(-) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index c2282c99..8e5c4426 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -91,8 +91,14 @@ REPORT="/tmp/pr-${PR_NUMBER}-report-${TS}.md" gh api "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/comments" --paginate > "$RAW" -# Filter: top-level comments pinned to the current PR head commit. -jq --arg commit "$HEAD7" ' +# Collect: all top-level review comments (including ones authored on earlier commits). +# +# Why: the PR review comments API (`/pulls/:number/comments`) keeps each comment’s +# `commit_id` fixed to the commit it was authored on. When new commits are pushed, +# older unresolved comments do not “move” to the new head; they become “outdated”. +# If we only include comments whose `commit_id` matches the current head, we can +# incorrectly report “0 actionables” even though older review threads remain open. +jq --arg head "$HEAD7" ' def has_ack_marker(s): (s | type) == "string" and (s | contains("✅ Addressed in commit")); @@ -105,14 +111,20 @@ jq --arg commit "$HEAD7" ' (ack_by_reply) as $replies | [ .[] | - select(.in_reply_to_id == null and .commit_id[0:7] == $commit) | + select(.in_reply_to_id == null) | { id, path, line, - current_commit: (.commit_id[0:7]), + position, + original_position, + head_commit: $head, + comment_commit: (.commit_id[0:7]), original_commit: (.original_commit_id[0:7]), - is_stale: (.commit_id != .original_commit_id), + is_on_head: (.commit_id[0:7] == $head), + is_visible_on_head_diff: (.position != null), + is_outdated: (.position == null), + is_moved: (.commit_id != .original_commit_id), has_ack: (has_ack_marker(.body) or ($replies[(.id | tostring)] // false)), priority: ( if (.body | contains("🔴 Critical")) then "P0" @@ -135,9 +147,12 @@ jq --arg commit "$HEAD7" ' ] ' "$RAW" > "$LATEST" -fresh_count="$(jq '[.[] | select(.is_stale == false and .has_ack == false)] | length' "$LATEST")" -stale_count="$(jq '[.[] | select(.is_stale == true)] | length' "$LATEST")" +needs_attention_count="$(jq '[.[] | select(.has_ack == false)] | length' "$LATEST")" +on_head_attention_count="$(jq '[.[] | select(.is_visible_on_head_diff == true and .has_ack == false)] | length' "$LATEST")" +outdated_attention_count="$(jq '[.[] | select(.is_outdated == true and .has_ack == false)] | length' "$LATEST")" +moved_count="$(jq '[.[] | select(.is_moved == true)] | length' "$LATEST")" ack_count="$(jq '[.[] | select(.has_ack == true)] | length' "$LATEST")" +total_count="$(jq 'length' "$LATEST")" { echo "# CodeRabbitAI/GitHub Actionables — PR #${PR_NUMBER}" @@ -149,17 +164,29 @@ ack_count="$(jq '[.[] | select(.has_ack == true)] | length' "$LATEST")" echo "## Summary" echo - echo "- Fresh actionable: **${fresh_count}**" - echo "- Stale (verify): **${stale_count}**" + echo "- Total top-level review comments: **${total_count}**" + echo "- Needs attention (unacknowledged): **${needs_attention_count}**" + echo " - Visible on head diff: **${on_head_attention_count}**" + echo " - Outdated (not visible on head diff): **${outdated_attention_count}**" echo "- Acknowledged (✅ Addressed): **${ack_count}**" + echo "- Moved by GitHub (commit_id != original_commit_id): **${moved_count}**" echo - echo "## Stale / Verify" + echo "## Needs Attention (On Head Diff)" echo jq -r ' .[] - | select(.is_stale == true) - | "- [ ] \(.path):\(.line // 1) — \(.title) (original: \(.original_commit)) [id=\(.id)]" + | select(.is_visible_on_head_diff == true and .has_ack == false) + | "- [ ] [\(.priority)] \(.path):\(.line // 1) — \(.title) [id=\(.id)]" + ' "$LATEST" + echo + + echo "## Needs Attention (Outdated / Earlier Commits)" + echo + jq -r ' + .[] + | select(.is_outdated == true and .has_ack == false) + | "- [ ] [\(.priority)] \(.path):\(.line // 1) — \(.title) (comment commit: \(.comment_commit)) [id=\(.id)]" ' "$LATEST" echo @@ -172,29 +199,14 @@ ack_count="$(jq '[.[] | select(.has_ack == true)] | length' "$LATEST")" ' "$LATEST" echo - echo "## Actionable (Fresh)" + echo "## Notes" echo - - # Sort: priority, then path, then line (null line => 0). - jq -r ' - def pnum(p): - if p == "P0" then 0 - elif p == "P1" then 1 - elif p == "P2" then 2 - else 3 - end; - - [ .[] - | select(.is_stale == false and .has_ack == false) - ] - | sort_by([pnum(.priority), .path, (.line // 0)]) - | .[] - | "- [ ] [\(.priority)] \(.path):\(.line // 1) — \(.title) [id=\(.id)]" - ' "$LATEST" + echo "- \"Outdated\" means the review comment is no longer visible on the current head diff; it may still be actionable." + echo "- Use \`✅ Addressed in commit \` replies to close the loop and keep future extraction cheap." echo if [[ "$FULL" -eq 1 ]]; then - echo "## Full Comment Bodies (Actionable)" + echo "## Full Comment Bodies (Needs Attention)" echo jq -r ' def pnum(p): @@ -205,16 +217,16 @@ ack_count="$(jq '[.[] | select(.has_ack == true)] | length' "$LATEST")" end; [ .[] - | select(.is_stale == false and .has_ack == false) + | select(.has_ack == false) ] - | sort_by([pnum(.priority), .path, (.line // 0)]) + | sort_by([(.is_outdated | if . then 1 else 0 end), pnum(.priority), .path, (.line // 0)]) | .[] - | "### \(.path):\(.line // 1) [id=\(.id)]\n\n```\n\(.body)\n```\n" + | "### [\(.priority)] \(.path):\(.line // 1) [id=\(.id)]\n\n- Visible on head diff: \(.is_visible_on_head_diff)\n- Outdated: \(.is_outdated)\n- Comment commit: \(.comment_commit)\n\n```\n\(.body)\n```\n" ' "$LATEST" else echo "## Next Step" echo - echo "Run with \`--full\` to include full comment bodies for the actionable set." + echo "Run with \`--full\` to include full comment bodies for the needs-attention set." fi echo diff --git a/docs/procedures/EXTRACT-PR-COMMENTS.md b/docs/procedures/EXTRACT-PR-COMMENTS.md index a9d1cc14..d2225eba 100644 --- a/docs/procedures/EXTRACT-PR-COMMENTS.md +++ b/docs/procedures/EXTRACT-PR-COMMENTS.md @@ -53,19 +53,29 @@ gh api "repos/${OWNER}/${REPO}/pulls/${PR_NUMBER}/comments" --paginate > "$TMPFI --- -### Step 3: Extract top-level comments pinned to the latest commit +### Step 3: Extract top-level review comments (including “outdated”) + +Important: + +- GitHub’s review comments API (`/pulls/:number/comments`) keeps each comment’s `commit_id` fixed to the commit it was authored on. +- When the PR head moves, older unresolved comments usually become **outdated** rather than being re-bound to the new head. +- If you filter only to `commit_id == PR_HEAD`, you can incorrectly report “0 actionables” while older threads remain open. ```bash -cat "$TMPFILE" | jq --arg commit "$LATEST_COMMIT" ' +cat "$TMPFILE" | jq --arg head "$LATEST_COMMIT" ' .[] | - select(.in_reply_to_id == null and .commit_id[0:7] == $commit) | + select(.in_reply_to_id == null) | { id, line, path, - current_commit: .commit_id[0:7], + position, + head_commit: $head, + comment_commit: .commit_id[0:7], original_commit: .original_commit_id[0:7], - is_stale: (.commit_id != .original_commit_id), + is_visible_on_head_diff: (.position != null), + is_outdated: (.position == null), + is_moved: (.commit_id != .original_commit_id), created_at, body_preview: (.body[0:200]) } @@ -74,26 +84,38 @@ cat "$TMPFILE" | jq --arg commit "$LATEST_COMMIT" ' --- -### Step 4: Identify stale vs fresh comments +### Step 4: Bucket on-head vs outdated (and verify against current code) ```bash cat /tmp/comments-latest.json | jq ' - group_by(.is_stale) | + group_by(.is_outdated) | map({ - category: (if .[0].is_stale then "STALE" else "FRESH" end), + category: (if .[0].is_outdated then "OUTDATED (earlier commit)" else "ON_HEAD" end), count: length, - comments: map({id, line, path, original_commit}) + comments: map({id, line, path, position, comment_commit, original_commit}) }) ' ``` Key insight: -- If `is_stale == true`, the comment originated on an earlier commit and may already be fixed. +- “Outdated” means “not visible on the current head diff”, **not** “fixed”. +- Always verify against current code before acting (see Step 7). --- ### Step 5: Detect “Already Addressed” markers +Note: the “✅ Addressed in commit …” marker may appear either: + +- in the top-level comment body, or +- in a reply to the thread. + +If you want reliable ack detection, prefer the repo script: + +```bash +.github/scripts/extract-actionable-comments.sh +``` + ```bash cat "$TMPFILE" | jq '.[] | select(.body | contains("✅ Addressed in commit")) | @@ -116,11 +138,10 @@ Key insight: This is only useful if CodeRabbitAI uses explicit priority markers in comment bodies. ```bash -cat "$TMPFILE" | jq --arg commit "$LATEST_COMMIT" ' +cat "$TMPFILE" | jq --arg head "$LATEST_COMMIT" ' .[] | select( - .in_reply_to_id == null and - .commit_id[0:7] == $commit + .in_reply_to_id == null ) | { id, @@ -133,7 +154,8 @@ cat "$TMPFILE" | jq --arg commit "$LATEST_COMMIT" ' else "P3" end ), - is_stale: (.commit_id != .original_commit_id), + is_on_head: (.commit_id[0:7] == $head), + is_outdated: (.commit_id[0:7] != $head), body } ' | jq -s '.' > /tmp/prioritized-comments.json @@ -141,9 +163,9 @@ cat "$TMPFILE" | jq --arg commit "$LATEST_COMMIT" ' --- -### Step 7: Verify stale comments against current code (critical step) +### Step 7: Verify outdated comments against current code (critical step) -Do not trust `is_stale` alone. Verify: +Do not trust `is_outdated` alone. Verify: ```bash # 1) Inspect current state @@ -164,7 +186,7 @@ Create a batch checklist and work top-down: cat > /tmp/batch-N-issues.md << 'EOF' # Batch N - CodeRabbitAI Issues -## Stale (Verify / Already Fixed) +## Outdated (Verify / Already Fixed) - [ ] Line XXX - Issue description (Fixed in: COMMIT_SHA) ## P0 Critical @@ -183,13 +205,12 @@ EOF --- -### Step 9: Save full bodies for actionable issues +### Step 9: Save full bodies for needs-attention issues + +Prefer the helper script, which understands ack markers in replies and can print full bodies: ```bash -cat /tmp/prioritized-comments.json | jq -r '.[] | - select(.is_stale == false) | - "# Comment ID: \(.id) - Line \(.line)\n\(.body)\n\n---\n\n" -' > /tmp/batch-N-full-comments.txt +.github/scripts/extract-actionable-comments.sh --full ``` --- @@ -209,4 +230,3 @@ If CodeRabbitAI approved but GitHub still shows “changes requested”, nudge t Use the helper script: - `.github/scripts/extract-actionable-comments.sh` - From 537b41b8308be7feedd89bab5dcf6b6127ea522f Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Fri, 2 Jan 2026 07:28:29 -0800 Subject: [PATCH 06/13] docs: harden comment extractor and docs --- .../scripts/extract-actionable-comments.sh | 47 +++++++++++-- docs/procedures/EXTRACT-PR-COMMENTS.md | 66 +++++++++++++++---- docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md | 3 - 3 files changed, 97 insertions(+), 19 deletions(-) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index 8e5c4426..5f88309b 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -40,6 +40,11 @@ if [[ -z "${PR_NUMBER}" ]]; then usage >&2 exit 2 fi +if ! [[ "${PR_NUMBER}" =~ ^[0-9]+$ ]]; then + echo "Error: PR_NUMBER must be numeric, got: ${PR_NUMBER}" >&2 + usage >&2 + exit 2 +fi REPO="" OUT="" @@ -86,10 +91,44 @@ HEAD7="${HEAD_SHA:0:7}" TS="$(date +%s)" RAW="/tmp/pr-${PR_NUMBER}-comments-${TS}.json" +RAW_ERR="/tmp/pr-${PR_NUMBER}-comments-${TS}.err" LATEST="/tmp/pr-${PR_NUMBER}-latest-${TS}.json" REPORT="/tmp/pr-${PR_NUMBER}-report-${TS}.md" -gh api "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/comments" --paginate > "$RAW" +attempt=1 +delay_s=1 +while true; do + if gh api "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/comments" --paginate > "$RAW" 2> "$RAW_ERR"; then + break + fi + + if [[ "$attempt" -ge 4 ]]; then + echo "Error: Failed to fetch PR comments from GitHub after ${attempt} attempts." >&2 + echo "Repo: ${REPO}" >&2 + echo "PR: ${PR_NUMBER}" >&2 + echo >&2 + echo "Troubleshooting:" >&2 + echo "- Run: gh auth status" >&2 + echo "- Check rate limits / token scopes (GH_TOKEN) and retry later" >&2 + echo "- Verify repo/PR access permissions" >&2 + echo "- Check network connectivity" >&2 + echo >&2 + echo "gh api stderr (last attempt):" >&2 + sed -n '1,200p' "$RAW_ERR" >&2 + exit 1 + fi + + sleep "$delay_s" + delay_s="$((delay_s * 2))" + attempt="$((attempt + 1))" +done + +if ! jq -e . "$RAW" >/dev/null 2>&1; then + echo "Error: GitHub API returned invalid JSON in: ${RAW}" >&2 + echo "gh api stderr:" >&2 + sed -n '1,200p' "$RAW_ERR" >&2 + exit 1 +fi # Collect: all top-level review comments (including ones authored on earlier commits). # @@ -127,9 +166,9 @@ jq --arg head "$HEAD7" ' is_moved: (.commit_id != .original_commit_id), has_ack: (has_ack_marker(.body) or ($replies[(.id | tostring)] // false)), priority: ( - if (.body | contains("🔴 Critical")) then "P0" - elif (.body | contains("🟠 Major")) then "P1" - elif (.body | contains("🟡 Minor")) then "P2" + if (.body | test("\\\\bP0\\\\b|badge/P0-|🔴|Critical"; "i")) then "P0" + elif (.body | test("\\\\bP1\\\\b|badge/P1-|🟠|Major"; "i")) then "P1" + elif (.body | test("\\\\bP2\\\\b|badge/P2-|🟡|Minor"; "i")) then "P2" else "P3" end ), diff --git a/docs/procedures/EXTRACT-PR-COMMENTS.md b/docs/procedures/EXTRACT-PR-COMMENTS.md index d2225eba..475ff099 100644 --- a/docs/procedures/EXTRACT-PR-COMMENTS.md +++ b/docs/procedures/EXTRACT-PR-COMMENTS.md @@ -23,13 +23,27 @@ When you finish work: ## Prerequisites - `gh` installed and authenticated -- `jq` installed +- `jq` installed (minimum: `jq >= 1.6`) - Repo access to view PRs --- ## Procedure +### Quick Recommendation + +Prefer the repo automation whenever possible: + +```bash +.github/scripts/extract-actionable-comments.sh --full +``` + +It is designed to: + +- include “outdated” comments that are not visible on the current head diff, +- detect `✅ Addressed in commit ...` markers in replies, +- and produce a deterministic Markdown report. + ### Step 1: Identify the PR head commit (the current diff) ```bash @@ -148,14 +162,14 @@ cat "$TMPFILE" | jq --arg head "$LATEST_COMMIT" ' line, path, priority: ( - if (.body | contains("🔴 Critical")) then "P0" - elif (.body | contains("🟠 Major")) then "P1" - elif (.body | contains("🟡 Minor")) then "P2" + if (.body | test("\\\\bP0\\\\b|badge/P0-|🔴|Critical"; "i")) then "P0" + elif (.body | test("\\\\bP1\\\\b|badge/P1-|🟠|Major"; "i")) then "P1" + elif (.body | test("\\\\bP2\\\\b|badge/P2-|🟡|Minor"; "i")) then "P2" else "P3" end ), - is_on_head: (.commit_id[0:7] == $head), - is_outdated: (.commit_id[0:7] != $head), + is_visible_on_head_diff: (.position != null), + is_outdated: (.position == null), body } ' | jq -s '.' > /tmp/prioritized-comments.json @@ -165,15 +179,43 @@ cat "$TMPFILE" | jq --arg head "$LATEST_COMMIT" ' ### Step 7: Verify outdated comments against current code (critical step) -Do not trust `is_outdated` alone. Verify: +Do not trust `is_outdated` alone. Verify by mapping fields from the comment object to concrete commands. + +Suggested mapping: + +- `path` → file path +- `line` → line number (use a small context window, e.g. ±5 lines) + +Example: ```bash -# 1) Inspect current state -git show "HEAD:" | sed -n ',p' +# Suppose you have a single comment object (e.g., from /tmp/comments-latest.json): +COMMENT_PATH="docs/decision-log.md" +COMMENT_LINE=42 + +# Clamp the start line to 1 (sed doesn't like 0/negative ranges). +START=$((COMMENT_LINE - 5)) +if [[ "$START" -lt 1 ]]; then START=1; fi +END=$((COMMENT_LINE + 5)) -# 2) Search history for fixes (if needed) -git log --all --oneline --grep="" -git log -p --all -S"" -- +# 1) Inspect current state around the line +git show "HEAD:${COMMENT_PATH}" | sed -n "${START},${END}p" + +# 2) Scan recent history for related fixes +git log --all --oneline -- "${COMMENT_PATH}" | head -20 + +# 3) If the comment mentions a specific identifier (function/struct name), search by token +git log -p --all -S"SomeIdentifierFromComment" -- "${COMMENT_PATH}" | head -80 +``` + +If the comment is outdated (not visible on head diff), it may refer to old line numbers. In that case: + +- search by keyword/token rather than trusting the line number, and +- look up the original code context via `original_commit_id` if needed. + +```bash +# Use the repo script if you want the comment bodies included: +.github/scripts/extract-actionable-comments.sh --full ``` --- diff --git a/docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md b/docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md index 95f1b2bd..26c03d80 100644 --- a/docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md +++ b/docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md @@ -115,7 +115,4 @@ Sometimes the “changes requested” status lingers even after an approving rev Post this comment on the PR: -```text @coderabbitai Please review the latest commit and clear the "changes requested" status since you have already approved the changes. -``` - From 1cf7ba75eec8b0738998ba942733772c7308a069 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Fri, 2 Jan 2026 07:32:34 -0800 Subject: [PATCH 07/13] chore: validate args and harden markdown output --- .../scripts/extract-actionable-comments.sh | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index 5f88309b..59c627a9 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -53,11 +53,21 @@ FULL=0 while [[ $# -gt 0 ]]; do case "$1" in --repo) - REPO="${2:-}" + if [[ -z "${2:-}" || "${2:-}" == -* ]]; then + echo "Error: --repo requires a value in the form OWNER/REPO" >&2 + usage >&2 + exit 2 + fi + REPO="$2" shift 2 ;; --out) - OUT="${2:-}" + if [[ -z "${2:-}" || "${2:-}" == -* ]]; then + echo "Error: --out requires a filesystem path" >&2 + usage >&2 + exit 2 + fi + OUT="$2" shift 2 ;; --full) @@ -85,6 +95,10 @@ fi OWNER="${REPO%/*}" NAME="${REPO#*/}" +if [[ "$REPO" != */* || "$REPO" == */*/* || -z "$OWNER" || -z "$NAME" || "$OWNER" == "$NAME" ]]; then + echo "Error: Invalid repo format '${REPO}'. Expected OWNER/REPO." >&2 + exit 2 +fi HEAD_SHA="$(gh pr view "$PR_NUMBER" --repo "$REPO" --json headRefOid --jq '.headRefOid')" HEAD7="${HEAD_SHA:0:7}" @@ -260,7 +274,7 @@ total_count="$(jq 'length' "$LATEST")" ] | sort_by([(.is_outdated | if . then 1 else 0 end), pnum(.priority), .path, (.line // 0)]) | .[] - | "### [\(.priority)] \(.path):\(.line // 1) [id=\(.id)]\n\n- Visible on head diff: \(.is_visible_on_head_diff)\n- Outdated: \(.is_outdated)\n- Comment commit: \(.comment_commit)\n\n```\n\(.body)\n```\n" + | "### [\(.priority)] \(.path):\(.line // 1) [id=\(.id)]\n\n- Visible on head diff: \(.is_visible_on_head_diff)\n- Outdated: \(.is_outdated)\n- Comment commit: \(.comment_commit)\n\n````\n\(.body)\n````\n" ' "$LATEST" else echo "## Next Step" From 96284db9f024e7eef63f25b4bdc88cb9158c2761 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Fri, 2 Jan 2026 07:33:37 -0800 Subject: [PATCH 08/13] docs: clarify /tmp artifacts behavior --- .github/scripts/extract-actionable-comments.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index 59c627a9..d63c52a5 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -288,6 +288,7 @@ total_count="$(jq 'length' "$LATEST")" echo "- Raw comments: \`${RAW}\`" echo "- Filtered latest: \`${LATEST}\`" echo "- Report: \`${REPORT}\`" + echo "- Note: artifacts are intentionally left in \`/tmp\` for debugging; your OS typically cleans \`/tmp\` periodically." } | tee "$REPORT" if [[ -n "$OUT" ]]; then From 74aef68cfa86bb6f17f5f49af9cbff6260c80101 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Fri, 2 Jan 2026 10:42:54 -0800 Subject: [PATCH 09/13] docs: expand PR feedback extractor sources --- .../scripts/extract-actionable-comments.sh | 454 +++++++++++++----- docs/decision-log.md | 1 + docs/execution-plan.md | 10 + docs/procedures/EXTRACT-PR-COMMENTS.md | 18 +- 4 files changed, 365 insertions(+), 118 deletions(-) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index d63c52a5..edc9123f 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -8,24 +8,35 @@ usage() { cat <<'EOF' Usage: .github/scripts/extract-actionable-comments.sh [--repo OWNER/REPO] [--out ] [--full] + .github/scripts/extract-actionable-comments.sh [--repo OWNER/REPO] [--out ] [--full] [--all-sources] + .github/scripts/extract-actionable-comments.sh [--repo OWNER/REPO] [--out ] [--full] [--include-conversation] [--include-reviews] Purpose: - Extract actionable (fresh) CodeRabbitAI/GitHub review comments for a PR by - filtering comments pinned to the PR head commit and grouping by staleness. + Extract actionable PR feedback (CodeRabbitAI + humans) from: + - PR review threads (inline review comments; diff-positioned, can become outdated) + - optionally PR conversation comments (issue comments) + - optionally PR review summaries (review bodies, e.g. "changes requested") + + Review threads are grouped by staleness; all sources support a lightweight ack + convention via: ✅ Addressed in commit Outputs: - Writes a Markdown report to stdout by default. - - Also writes the raw comments JSON and intermediate files to /tmp. + - Also writes raw JSON + intermediate artifacts to /tmp. Options: - --repo OWNER/REPO Override repo (default: current repo via `gh repo view`) - --out Write the Markdown report to (also prints to stdout) - --full Include full comment bodies for actionable comments + --repo OWNER/REPO Override repo (default: current repo via `gh repo view`) + --out Write the Markdown report to (also prints to stdout) + --full Include full comment bodies for actionable comments + --include-conversation Also include PR conversation (issue) comments + --include-reviews Also include PR review summaries (approve/request-changes bodies) + --all-sources Equivalent to: --include-conversation --include-reviews Examples: .github/scripts/extract-actionable-comments.sh 176 .github/scripts/extract-actionable-comments.sh 176 --out /tmp/pr-176-report.md .github/scripts/extract-actionable-comments.sh 176 --full + .github/scripts/extract-actionable-comments.sh 176 --all-sources EOF } @@ -34,6 +45,44 @@ require_cmd() { command -v "$cmd" >/dev/null 2>&1 || { echo "Missing dependency: $cmd" >&2; exit 2; } } +fetch_paginated_json() { + local api_path="$1" + local out_json="$2" + local out_err="$3" + + local attempt=1 + local delay_s=1 + while true; do + if gh api "$api_path" --paginate > "$out_json" 2> "$out_err"; then + break + fi + + if [[ "$attempt" -ge 4 ]]; then + echo "Error: Failed to fetch GitHub API '${api_path}' after ${attempt} attempts." >&2 + echo "Troubleshooting:" >&2 + echo "- Run: gh auth status" >&2 + echo "- Check rate limits / token scopes (GH_TOKEN) and retry later" >&2 + echo "- Verify repo/PR access permissions" >&2 + echo "- Check network connectivity" >&2 + echo >&2 + echo "gh api stderr (last attempt):" >&2 + sed -n '1,200p' "$out_err" >&2 + exit 1 + fi + + sleep "$delay_s" + delay_s="$((delay_s * 2))" + attempt="$((attempt + 1))" + done + + if ! jq -e . "$out_json" >/dev/null 2>&1; then + echo "Error: GitHub API returned invalid JSON in: ${out_json}" >&2 + echo "gh api stderr:" >&2 + sed -n '1,200p' "$out_err" >&2 + exit 1 + fi +} + PR_NUMBER="${1:-}" shift || true if [[ -z "${PR_NUMBER}" ]]; then @@ -49,6 +98,8 @@ fi REPO="" OUT="" FULL=0 +INCLUDE_CONVERSATION=0 +INCLUDE_REVIEWS=0 while [[ $# -gt 0 ]]; do case "$1" in @@ -74,6 +125,19 @@ while [[ $# -gt 0 ]]; do FULL=1 shift ;; + --include-conversation) + INCLUDE_CONVERSATION=1 + shift + ;; + --include-reviews) + INCLUDE_REVIEWS=1 + shift + ;; + --all-sources) + INCLUDE_CONVERSATION=1 + INCLUDE_REVIEWS=1 + shift + ;; -h|--help) usage exit 0 @@ -104,111 +168,224 @@ HEAD_SHA="$(gh pr view "$PR_NUMBER" --repo "$REPO" --json headRefOid --jq '.head HEAD7="${HEAD_SHA:0:7}" TS="$(date +%s)" -RAW="/tmp/pr-${PR_NUMBER}-comments-${TS}.json" -RAW_ERR="/tmp/pr-${PR_NUMBER}-comments-${TS}.err" -LATEST="/tmp/pr-${PR_NUMBER}-latest-${TS}.json" +RAW_REVIEW="/tmp/pr-${PR_NUMBER}-review-comments-${TS}.json" +RAW_REVIEW_ERR="/tmp/pr-${PR_NUMBER}-review-comments-${TS}.err" +LATEST_REVIEW="/tmp/pr-${PR_NUMBER}-review-latest-${TS}.json" +RAW_CONVERSATION="/tmp/pr-${PR_NUMBER}-conversation-comments-${TS}.json" +RAW_CONVERSATION_ERR="/tmp/pr-${PR_NUMBER}-conversation-comments-${TS}.err" +LATEST_CONVERSATION="/tmp/pr-${PR_NUMBER}-conversation-latest-${TS}.json" +RAW_REVIEWS="/tmp/pr-${PR_NUMBER}-reviews-${TS}.json" +RAW_REVIEWS_ERR="/tmp/pr-${PR_NUMBER}-reviews-${TS}.err" +LATEST_REVIEWS="/tmp/pr-${PR_NUMBER}-reviews-latest-${TS}.json" +LATEST_ALL="/tmp/pr-${PR_NUMBER}-latest-${TS}.json" REPORT="/tmp/pr-${PR_NUMBER}-report-${TS}.md" -attempt=1 -delay_s=1 -while true; do - if gh api "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/comments" --paginate > "$RAW" 2> "$RAW_ERR"; then - break - fi - - if [[ "$attempt" -ge 4 ]]; then - echo "Error: Failed to fetch PR comments from GitHub after ${attempt} attempts." >&2 - echo "Repo: ${REPO}" >&2 - echo "PR: ${PR_NUMBER}" >&2 - echo >&2 - echo "Troubleshooting:" >&2 - echo "- Run: gh auth status" >&2 - echo "- Check rate limits / token scopes (GH_TOKEN) and retry later" >&2 - echo "- Verify repo/PR access permissions" >&2 - echo "- Check network connectivity" >&2 - echo >&2 - echo "gh api stderr (last attempt):" >&2 - sed -n '1,200p' "$RAW_ERR" >&2 - exit 1 - fi - - sleep "$delay_s" - delay_s="$((delay_s * 2))" - attempt="$((attempt + 1))" -done - -if ! jq -e . "$RAW" >/dev/null 2>&1; then - echo "Error: GitHub API returned invalid JSON in: ${RAW}" >&2 - echo "gh api stderr:" >&2 - sed -n '1,200p' "$RAW_ERR" >&2 - exit 1 +fetch_paginated_json "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/comments" "$RAW_REVIEW" "$RAW_REVIEW_ERR" +if [[ "$INCLUDE_CONVERSATION" -eq 1 ]]; then + fetch_paginated_json "repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/comments" "$RAW_CONVERSATION" "$RAW_CONVERSATION_ERR" +fi +if [[ "$INCLUDE_REVIEWS" -eq 1 ]]; then + fetch_paginated_json "repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews" "$RAW_REVIEWS" "$RAW_REVIEWS_ERR" fi -# Collect: all top-level review comments (including ones authored on earlier commits). -# -# Why: the PR review comments API (`/pulls/:number/comments`) keeps each comment’s -# `commit_id` fixed to the commit it was authored on. When new commits are pushed, -# older unresolved comments do not “move” to the new head; they become “outdated”. -# If we only include comments whose `commit_id` matches the current head, we can -# incorrectly report “0 actionables” even though older review threads remain open. +# Normalize review-thread comments (top-level only) and detect ack markers in replies. jq --arg head "$HEAD7" ' def has_ack_marker(s): (s | type) == "string" and (s | contains("✅ Addressed in commit")); + def normalize_title(body): + ((body + | split("\n") + | map(select(. != "")) + | .[0] // "UNTITLED" + ) + | gsub("\\*\\*"; "") + | .[0:80]); + + def priority_from_body(body): + if (body | test("\\bP0\\b|badge/P0-|🔴|Critical"; "i")) then "P0" + elif (body | test("\\bP1\\b|badge/P1-|🟠|Major"; "i")) then "P1" + elif (body | test("\\bP2\\b|badge/P2-|🟡|Minor"; "i")) then "P2" + else "P3" + end; + # Replies are returned in the same list, with `in_reply_to_id` set. - # Treat a top-level comment as "acknowledged" if either: - # - the comment body itself contains the marker, OR - # - any reply to it contains the marker. def ack_by_reply: reduce .[] as $c ({}; if ($c.in_reply_to_id != null and has_ack_marker($c.body)) then .[($c.in_reply_to_id | tostring)] = true else . end); (ack_by_reply) as $replies | - [ .[] | - select(.in_reply_to_id == null) | - { - id, - path, - line, - position, - original_position, - head_commit: $head, - comment_commit: (.commit_id[0:7]), - original_commit: (.original_commit_id[0:7]), - is_on_head: (.commit_id[0:7] == $head), - is_visible_on_head_diff: (.position != null), - is_outdated: (.position == null), - is_moved: (.commit_id != .original_commit_id), - has_ack: (has_ack_marker(.body) or ($replies[(.id | tostring)] // false)), - priority: ( - if (.body | test("\\\\bP0\\\\b|badge/P0-|🔴|Critical"; "i")) then "P0" - elif (.body | test("\\\\bP1\\\\b|badge/P1-|🟠|Major"; "i")) then "P1" - elif (.body | test("\\\\bP2\\\\b|badge/P2-|🟡|Minor"; "i")) then "P2" - else "P3" - end - ), - title: ( - (.body - | split("\n") - | map(select(. != "")) - | .[0] // "UNTITLED" - ) - | gsub("\\*\\*"; "") - | .[0:80] - ), - body: .body - } + [ .[] + | select(.in_reply_to_id == null) + | { + id, + author: (.user.login // "unknown"), + author_is_bot: ( + (.user.type // "") == "Bot" + or ((.user.login // "") | endswith("[bot]")) + ), + url: .html_url, + source: "review_thread", + path, + line, + position, + original_position, + head_commit: $head, + comment_commit: (.commit_id[0:7]), + original_commit: (.original_commit_id[0:7]), + is_on_head: (.commit_id[0:7] == $head), + is_visible_on_head_diff: (.position != null), + is_outdated: (.position == null), + is_moved: (.commit_id != .original_commit_id), + has_ack: (has_ack_marker(.body) or ($replies[(.id | tostring)] // false)), + is_actionable: true, + priority: priority_from_body(.body), + title: normalize_title(.body), + body: .body + } ] -' "$RAW" > "$LATEST" +' "$RAW_REVIEW" > "$LATEST_REVIEW" + +if [[ "$INCLUDE_CONVERSATION" -eq 1 ]]; then + jq ' + def has_ack_marker(s): + (s | type) == "string" and (s | contains("✅ Addressed in commit")); + + def normalize_title(body): + ((body + | split("\n") + | map(select(. != "")) + | .[0] // "UNTITLED" + ) + | gsub("\\*\\*"; "") + | .[0:80]); + + def priority_from_body(body): + if (body | test("\\bP0\\b|badge/P0-|🔴|Critical"; "i")) then "P0" + elif (body | test("\\bP1\\b|badge/P1-|🟠|Major"; "i")) then "P1" + elif (body | test("\\bP2\\b|badge/P2-|🟡|Minor"; "i")) then "P2" + else "P3" + end; + + def is_html_comment(body): + (body | type) == "string" + and (body | test("^\\s* -# Procedure: Extract Actionable Comments from CodeRabbitAI PR Reviews +# Procedure: Extract Actionable Comments from PR Review Threads (CodeRabbitAI + Humans) This procedure is part of the required PR workflow for this repo. @@ -40,10 +40,23 @@ Prefer the repo automation whenever possible: It is designed to: +- include review comments from **all** authors (CodeRabbitAI *and* human reviewers), - include “outdated” comments that are not visible on the current head diff, - detect `✅ Addressed in commit ...` markers in replies, - and produce a deterministic Markdown report. +To widen the net beyond inline review threads, you can include: + +- PR conversation comments (top-level PR timeline discussion), and +- review summaries (approve / changes-requested review bodies). + +```bash +.github/scripts/extract-actionable-comments.sh --all-sources --full +``` + +Note: +- Conversation comments and review summaries are not diff-positioned like review threads, so the script applies a simple “likely actionable” heuristic and emits a separate “Unclassified” bucket for anything that doesn’t match. + ### Step 1: Identify the PR head commit (the current diff) ```bash @@ -65,6 +78,9 @@ TMPFILE="/tmp/pr-${PR_NUMBER}-comments-$(date +%s).json" gh api "repos/${OWNER}/${REPO}/pulls/${PR_NUMBER}/comments" --paginate > "$TMPFILE" ``` +Note: +- This endpoint returns PR review comments authored by anyone (humans, bots, CodeRabbitAI, etc.). + --- ### Step 3: Extract top-level review comments (including “outdated”) From 7f8e95f4fc16cd30e04c486679247792d86446f5 Mon Sep 17 00:00:00 2001 From: "J. Kirby Ross" Date: Fri, 2 Jan 2026 13:06:12 -0800 Subject: [PATCH 10/13] docs: fix CodeRabbit actionables for PR 179 --- .../scripts/extract-actionable-comments.sh | 363 +++++++++--------- docs/decision-log.md | 2 +- docs/procedures/EXTRACT-PR-COMMENTS.md | 6 +- docs/procedures/PR-SUBMISSION-REVIEW-LOOP.md | 17 +- 4 files changed, 199 insertions(+), 189 deletions(-) diff --git a/.github/scripts/extract-actionable-comments.sh b/.github/scripts/extract-actionable-comments.sh index edc9123f..c669c027 100755 --- a/.github/scripts/extract-actionable-comments.sh +++ b/.github/scripts/extract-actionable-comments.sh @@ -165,6 +165,10 @@ if [[ "$REPO" != */* || "$REPO" == */*/* || -z "$OWNER" || -z "$NAME" || "$OWNER fi HEAD_SHA="$(gh pr view "$PR_NUMBER" --repo "$REPO" --json headRefOid --jq '.headRefOid')" +if [[ -z "$HEAD_SHA" || ! "$HEAD_SHA" =~ ^[0-9a-f]{7,}$ ]]; then + echo "Error: Failed to determine PR head commit SHA for ${REPO}#${PR_NUMBER}" >&2 + exit 1 +fi HEAD7="${HEAD_SHA:0:7}" TS="$(date +%s)" @@ -189,200 +193,197 @@ if [[ "$INCLUDE_REVIEWS" -eq 1 ]]; then fi # Normalize review-thread comments (top-level only) and detect ack markers in replies. -jq --arg head "$HEAD7" ' - def has_ack_marker(s): - (s | type) == "string" and (s | contains("✅ Addressed in commit")); - - def normalize_title(body): - ((body - | split("\n") - | map(select(. != "")) - | .[0] // "UNTITLED" - ) - | gsub("\\*\\*"; "") - | .[0:80]); - - def priority_from_body(body): - if (body | test("\\bP0\\b|badge/P0-|🔴|Critical"; "i")) then "P0" - elif (body | test("\\bP1\\b|badge/P1-|🟠|Major"; "i")) then "P1" - elif (body | test("\\bP2\\b|badge/P2-|🟡|Minor"; "i")) then "P2" - else "P3" - end; - - # Replies are returned in the same list, with `in_reply_to_id` set. - def ack_by_reply: - reduce .[] as $c ({}; if ($c.in_reply_to_id != null and has_ack_marker($c.body)) then .[($c.in_reply_to_id | tostring)] = true else . end); - - (ack_by_reply) as $replies | - [ .[] - | select(.in_reply_to_id == null) - | { - id, - author: (.user.login // "unknown"), - author_is_bot: ( - (.user.type // "") == "Bot" - or ((.user.login // "") | endswith("[bot]")) - ), - url: .html_url, - source: "review_thread", - path, - line, - position, - original_position, - head_commit: $head, - comment_commit: (.commit_id[0:7]), - original_commit: (.original_commit_id[0:7]), - is_on_head: (.commit_id[0:7] == $head), - is_visible_on_head_diff: (.position != null), - is_outdated: (.position == null), - is_moved: (.commit_id != .original_commit_id), - has_ack: (has_ack_marker(.body) or ($replies[(.id | tostring)] // false)), - is_actionable: true, - priority: priority_from_body(.body), - title: normalize_title(.body), - body: .body - } - ] -' "$RAW_REVIEW" > "$LATEST_REVIEW" +FILTER_COMMON="/tmp/pr-${PR_NUMBER}-jq-common-${TS}.jq" +FILTER_REVIEW="/tmp/pr-${PR_NUMBER}-jq-review-thread-${TS}.jq" +FILTER_CONVERSATION="/tmp/pr-${PR_NUMBER}-jq-conversation-${TS}.jq" +FILTER_REVIEWS="/tmp/pr-${PR_NUMBER}-jq-review-summaries-${TS}.jq" +FILTER_REVIEW_FULL="/tmp/pr-${PR_NUMBER}-jq-review-thread-full-${TS}.jq" +FILTER_CONVERSATION_FULL="/tmp/pr-${PR_NUMBER}-jq-conversation-full-${TS}.jq" +FILTER_REVIEWS_FULL="/tmp/pr-${PR_NUMBER}-jq-review-summaries-full-${TS}.jq" + +cat > "$FILTER_COMMON" <<'JQ' +def has_ack_marker(s): + (s | type) == "string" and (s | contains("✅ Addressed in commit")); + +def normalize_title(body): + ((body + | split("\n") + | map(select(. != "")) + | .[0] // "UNTITLED" + ) + | gsub("\\*\\*"; "") + | .[0:80]); + +def priority_from_body(body): + if (body | test("\\bP0\\b|badge/P0-|🔴|Critical"; "i")) then "P0" + elif (body | test("\\bP1\\b|badge/P1-|🟠|Major"; "i")) then "P1" + elif (body | test("\\bP2\\b|badge/P2-|🟡|Minor"; "i")) then "P2" + else "P3" + end; + +def likely_actionable(body): + (body | type) == "string" + and (body | test("\\bP[0-3]\\b|\\bTODO\\b|\\bFIXME\\b|\\bnit\\b|suggest|\\bshould\\b|\\bconsider\\b|blocker|\\bbug\\b|error|fail|typo|rename|missing|clarify|doc(s|ument)?|\\btests?\\b|panic|crash|security|\\bdetermin"; "i")); +JQ + +cat > "$FILTER_REVIEW" <<'JQ' +# Replies are returned in the same list, with `in_reply_to_id` set. +def ack_by_reply: + reduce .[] as $c ({}; if ($c.in_reply_to_id != null and has_ack_marker($c.body)) then .[($c.in_reply_to_id | tostring)] = true else . end); + +(ack_by_reply) as $replies | +[ .[] + | select(.in_reply_to_id == null) + | { + id, + author: (.user.login // "unknown"), + author_is_bot: ( + (.user.type // "") == "Bot" + or ((.user.login // "") | endswith("[bot]")) + ), + url: .html_url, + source: "review_thread", + path, + line, + position, + original_position, + head_commit: $head, + comment_commit: (.commit_id[0:7]), + original_commit: (.original_commit_id[0:7]), + is_on_head: (.commit_id[0:7] == $head), + is_visible_on_head_diff: (.position != null), + is_outdated: (.position == null), + is_moved: (.commit_id != .original_commit_id), + has_ack: (has_ack_marker(.body) or ($replies[(.id | tostring)] // false)), + is_actionable: true, + priority: priority_from_body(.body), + title: normalize_title(.body), + body: .body + } +] +JQ + +cat "$FILTER_COMMON" "$FILTER_REVIEW" > "$FILTER_REVIEW_FULL" +jq --arg head "$HEAD7" -f "$FILTER_REVIEW_FULL" "$RAW_REVIEW" > "$LATEST_REVIEW" if [[ "$INCLUDE_CONVERSATION" -eq 1 ]]; then - jq ' - def has_ack_marker(s): - (s | type) == "string" and (s | contains("✅ Addressed in commit")); - - def normalize_title(body): - ((body - | split("\n") - | map(select(. != "")) - | .[0] // "UNTITLED" - ) - | gsub("\\*\\*"; "") - | .[0:80]); - - def priority_from_body(body): - if (body | test("\\bP0\\b|badge/P0-|🔴|Critical"; "i")) then "P0" - elif (body | test("\\bP1\\b|badge/P1-|🟠|Major"; "i")) then "P1" - elif (body | test("\\bP2\\b|badge/P2-|🟡|Minor"; "i")) then "P2" - else "P3" - end; - - def is_html_comment(body): - (body | type) == "string" - and (body | test("^\\s*