diff --git a/review-pr/action.yml b/review-pr/action.yml index af4bd2e..0034017 100644 --- a/review-pr/action.yml +++ b/review-pr/action.yml @@ -284,7 +284,7 @@ runs: echo "chunk_manifest=$(jq -nc --argjson f "$FILES" '{"1": $f}')" >> $GITHUB_OUTPUT echo "✅ Single chunk ($TOTAL_LINES lines)" else - # Large diff — split at file boundaries, targeting ~1000 lines per chunk + # Large diff — split at file boundaries, targeting ~600 lines per chunk CHUNK=1 CHUNK_LINES=0 CURRENT_FILE="" @@ -301,9 +301,9 @@ runs: DIR=$(dirname "$FILE") # Start new chunk if: - # - over soft limit (1000 lines) and directory changed, OR + # - over soft limit (600 lines) and directory changed, OR # - over hard limit (2000 lines) regardless of directory - if ([ "$CHUNK_LINES" -gt 1000 ] && [ "$DIR" != "$CURRENT_DIR" ]) || [ "$CHUNK_LINES" -gt 2000 ]; then + if ([ "$CHUNK_LINES" -gt 600 ] && [ "$DIR" != "$CURRENT_DIR" ]) || [ "$CHUNK_LINES" -gt 2000 ]; then # Save current chunk's file list to manifest MANIFEST=$(echo "$MANIFEST" | jq --argjson files "$CHUNK_FILES" --arg k "$CHUNK" '.[$k] = $files') CHUNK=$((CHUNK + 1)) @@ -326,7 +326,7 @@ runs: echo "chunk_count=$CHUNK" >> $GITHUB_OUTPUT echo "chunk_manifest=$(echo "$MANIFEST" | jq -c .)" >> $GITHUB_OUTPUT - echo "✅ Split into $CHUNK chunks (target ~1000 lines each)" + echo "✅ Split into $CHUNK chunks (target ~600 lines each)" for i in $(seq 1 $CHUNK); do LINES=$(wc -l < /tmp/drafter_chunk_${i}.diff | tr -d ' ') @@ -806,6 +806,7 @@ runs: run: | REVIEW_URL="https://github.com/$REPOSITORY/pull/$PR_NUMBER" echo "review-url=$REVIEW_URL" >> $GITHUB_OUTPUT + TIMEOUT_NOTE="" if [ "$SKIP_REASON" = "concurrent" ]; then # Stay silent — the 👀 reaction on the triggering comment (added by the @@ -814,6 +815,17 @@ runs: STATUS="⏭️ **Review skipped** — another review is already in progress" elif [ -z "$EXIT_CODE" ]; then STATUS="⏭️ **Review skipped** — agent did not run" + elif [ "$EXIT_CODE" = "124" ]; then + # Timeout (SIGKILL after 1800 s) — provide actionable guidance + STATUS="⏱️ **Review timed out** (exit code: 124, limit: 1800 s)" + TIMEOUT_NOTE="- **Exit code:** 124 (SIGKILL — 1800 s timeout) +- **Timeout limit:** 1800 s +- Verbose log artifact (${VERBOSE_LOG_FILE}) uploaded for debugging." + if ! gh api "repos/$REPOSITORY/issues/$PR_NUMBER/comments" \ + -f body="⏱️ **PR Review Timed Out** — The review agent hit the 1800 s time limit before completing. This usually happens on large or complex diffs. You can re-trigger with \`/review\` — if it times out again, consider splitting the PR into smaller pieces." \ + 2>&1; then + echo "::warning::Failed to post timeout comment to PR" + fi elif [ "$EXIT_CODE" != "0" ]; then # Check if agent actually posted a review despite the error exit code. # This happens when a sub-agent fails (e.g., API overload) but the root @@ -841,6 +853,10 @@ runs: echo "## PR Review Summary" echo "" echo "$STATUS" + if [ -n "$TIMEOUT_NOTE" ]; then + echo "" + printf '%s\n' "$TIMEOUT_NOTE" + fi echo "" echo "📝 [View Pull Request #$PR_NUMBER]($REVIEW_URL)" } >> $GITHUB_STEP_SUMMARY diff --git a/review-pr/agents/pr-review.yaml b/review-pr/agents/pr-review.yaml index 891e1e5..d349b0b 100644 --- a/review-pr/agents/pr-review.yaml +++ b/review-pr/agents/pr-review.yaml @@ -155,7 +155,8 @@ agents: c. Delegate to the drafter once with the path `./pr-review.diff`. **Both modes**: - d. Merge all findings arrays into a single list before proceeding. + d. Merge all findings arrays into a single list before proceeding. Each finding + object uses fields `category`, `issue`, `details`, `in_diff` — not `title`/`body`. e. Include any relevant learned patterns from memory in each delegation. ## CRITICAL: How to delegate to the drafter @@ -170,12 +171,26 @@ agents: - File history: relevant entries from `/tmp/file_history.txt` (CI mode only — skip in console mode) - Available files: run `ls` on directories of changed files so drafter knows real paths - Learned patterns: any relevant memories + + **CRITICAL — drafter response schema (schema-validated, strict):** + The drafter returns a JSON object with these exact top-level fields: + - `findings`: array of objects with fields `file`, `line`, `severity`, `category`, + `issue` (one-line summary), `details` (trigger + impact), `in_diff` (boolean) + - `summary`: string — overall assessment + - `review_complete`: boolean — true if the full chunk was reviewed + + **NEVER mention `title` or `body` as finding field names in the delegation + message.** Those fields do not exist in the drafter schema. If the task string + asks for `title`/`body`, the schema validator rejects every emit attempt and the + drafter enters an infinite file-read retry loop. Always use `issue` and `details`. 5. Parse the drafter's JSON response. Check `review_complete`: - If `review_complete` is `true` AND zero findings → skip directly to step 8 (Decision Rules). - If `review_complete` is `false` AND zero findings → the drafter likely exhausted its context. Post a COMMENT review noting the review was incomplete and do NOT approve. - - Otherwise, collect all findings with severity "high" or "medium" - and delegate them to the `verifier` in a single batch. Skip verification for "low" findings. + - Otherwise, collect all findings with severity "high" or "medium" — reference + each finding's one-line summary via the `issue` field and its explanation via + the `details` field (not `title`/`body`) — and delegate them to the `verifier` + in a single batch. Skip verification for "low" findings. Include the project context (from step 2) in the verifier delegation so it can validate findings against project-specific conventions (e.g., language version, available APIs). **The verifier has no file access — you must provide all code context inline.** @@ -194,7 +209,9 @@ agents: `in_changed_code == false` or `in_diff == false`. 7. **Verify line numbers** before posting (see below) 8. Apply the Decision Rules (see below) to determine the review verdict - 9. Build inline comments from CONFIRMED/LIKELY issues and post the review + 9. Build inline comments from CONFIRMED/LIKELY issues using each finding's `issue` + (one-line summary), `details` (full explanation), `severity`, `category`, `file`, + and `line` fields. Post the review. 10. Always report ALL HIGH severity bugs. Limit MEDIUM/LOW to 5 comments max. Find **real bugs in the changed code**, not style issues. If the changed code works correctly, approve it.