diff --git a/README.md b/README.md index 7d88c1e..f6e66d7 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ Supported provider names today: - `acpx`: any ACP-compatible coding agent (Codex / Claude / Pi / Gemini / ...) via openclaw/acpx - `grok`: local Grok Build CLI - `opencode`: local OpenCode CLI +- `cursor`: local Cursor Agent CLI (experimental; `doctor` is enabled by default) - `mock`: deterministic test provider - `mock-fail`: failure test provider diff --git a/docs/providers.md b/docs/providers.md index 9453888..1480941 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -18,6 +18,7 @@ Provider names today: - `grok`: shells out to the xAI Grok Build CLI in headless mode (`grok --prompt-file`) - `opencode`: shells out to `opencode run --format json` - `pi`: shells out to `pi -p` (non-interactive print mode) +- `cursor`: shells out to `cursor-agent -p --output-format json` - `mock`: deterministic provider for tests and fixtures - `mock-fail`: failure provider for tests @@ -195,7 +196,65 @@ review and revalidate, but enforcement depends on pi honoring the tool allowlist For write operations during `fix`, the agent has full filesystem and shell access. For untrusted code, run `clawpatch fix --provider pi` inside an isolated checkout. +## Cursor + +The `cursor` provider shells out to the local Cursor Agent CLI in headless print +mode. It is experimental and disabled for `map`, `review`, `fix`, and +`revalidate` by default while HITL verification is incomplete. + +Verify local availability: + +```bash +cursor-agent --version +clawpatch doctor --provider cursor +``` + +Experimental provider selection: + +```bash +CURSOR_API_KEY=... CLAWPATCH_CURSOR_EXPERIMENTAL=1 clawpatch review --provider cursor +CURSOR_API_KEY=... CLAWPATCH_CURSOR_EXPERIMENTAL=1 CLAWPATCH_PROVIDER=cursor clawpatch review +CURSOR_API_KEY=... CLAWPATCH_CURSOR_EXPERIMENTAL=1 CLAWPATCH_CURSOR_ALLOW_WRITE=1 clawpatch fix --finding --provider cursor --model +clawpatch doctor --provider cursor +``` + +How the Cursor provider works: + +- Headless mode: `cursor-agent --trust -p --output-format json --workspace ` +- Read-only operations: also pass Cursor's documented `--mode ask` +- Output: parses Cursor's `type: "result"` JSON envelope and then extracts the + Clawpatch JSON object from the `result` text +- Prompt delivery: writes the full Clawpatch prompt to Cursor's stdin +- Model selection: passes `--model ` when configured +- Model names: pass Cursor model ids, for example `composer-2.5` for Composer + 2.5 without fast mode +- Reasoning effort and `skipGitRepoCheck`: not mapped to Cursor CLI flags +- Authentication: experimental execution uses the host user environment and + passes `CURSOR_API_KEY` through when present. Prefer API-key auth for headless + runs; relying on the user's Cursor login can touch the macOS login keychain. + Clawpatch also sets `NO_OPEN_BROWSER=1` to reduce browser prompts during + headless runs. +- Read-only guard: map, review, and revalidate pass `--mode ask` and include + read-only instructions in the prompt. Clawpatch does not set + `CURSOR_CONFIG_DIR`, because that can bypass the user's existing Cursor auth + profile and trigger browser login prompts. +- Timeout: 300 seconds by default, override with + `CLAWPATCH_CURSOR_TIMEOUT_MS` or `CLAWPATCH_PROVIDER_TIMEOUT_MS` +- Advisory handling: semver-like Cursor CLI versions below `2.5.0` are blocked + for CVE-2026-26268 / GHSA-8pcm-8jpx-hv8r. Date-formatted CLI builds are + allowed only when Clawpatch can verify a semver Cursor app version from the + local macOS app bundle. + +Permission caveat: Cursor's print mode is documented as having access to tools, +including write and shell. Clawpatch therefore keeps Cursor execution behind +`CLAWPATCH_CURSOR_EXPERIMENTAL=1`, uses `--mode ask` for read-only operations, +and separately requires `CLAWPATCH_CURSOR_ALLOW_WRITE=1` for `fix`. The +implementation uses `--trust` for the explicit trusted-workspace path and never +uses `--force` or `--yolo`. Complete HITL verification before promoting this to +default provider support, especially for ambient rules, MCP configuration, +positional prompt exposure, timeout behavior, and any claimed read-only mode. + Direct OpenAI API, local-model, and multi-model panel providers are not implemented yet. The `acpx` provider is the generic route for ACP-compatible -agents; the `grok`, `opencode`, and `pi` providers are direct integrations +agents; the `grok`, `opencode`, `pi`, and `cursor` providers are direct integrations for local CLIs. diff --git a/src/prompt.test.ts b/src/prompt.test.ts index c078de4..ad13906 100644 --- a/src/prompt.test.ts +++ b/src/prompt.test.ts @@ -1,8 +1,12 @@ import { describe, expect, it } from "vitest"; -import { REVIEW_PROMPT_FILE_CHAR_LIMIT, buildReviewPromptBundle } from "./prompt.js"; +import { + REVIEW_PROMPT_FILE_CHAR_LIMIT, + buildFixPrompt, + buildReviewPromptBundle, +} from "./prompt.js"; import { defaultConfig } from "./config.js"; import { fixtureRoot, writeFixture } from "./test-helpers.js"; -import type { FeatureRecord, ProjectRecord } from "./types.js"; +import type { FeatureRecord, FindingRecord, ProjectRecord } from "./types.js"; describe("review prompt provenance", () => { it("records included, omitted, and truncated review prompt context", async () => { @@ -21,13 +25,32 @@ describe("review prompt provenance", () => { }); expect(bundle.prompt).toContain("Prompt context:"); - expect(bundle.prompt).toContain("--- src/index.ts"); - expect(bundle.prompt).toContain("--- tests/index.test.ts"); + expect(bundle.prompt).toContain("--- src/index.ts (owned, lines 1-1)"); + expect(bundle.prompt).toContain("1 | export const value = 1;"); + expect(bundle.prompt).toContain("--- tests/index.test.ts (context, lines 1-1)"); expect(bundle.prompt).not.toContain("--- src/extra.ts"); + expect(bundle.prompt).toContain("Valid evidence paths are exactly:"); + expect(bundle.prompt).toContain("- src/index.ts"); + expect(bundle.prompt).toContain("- tests/index.test.ts"); + expect(bundle.prompt).not.toContain("- src/extra.ts"); + expect(bundle.prompt).not.toContain('"analysisHistory"'); + expect(bundle.prompt).not.toContain('"lock"'); expect(bundle.manifest.includedFiles).toEqual( expect.arrayContaining([ - expect.objectContaining({ path: "src/index.ts", role: "owned", truncated: false }), - expect.objectContaining({ path: "docs/large.md", role: "context", truncated: true }), + expect.objectContaining({ + path: "src/index.ts", + role: "owned", + includedStartLine: 1, + includedEndLine: 1, + truncated: false, + }), + expect.objectContaining({ + path: "docs/large.md", + role: "context", + includedStartLine: 1, + includedEndLine: expect.any(Number), + truncated: true, + }), ]), ); expect(bundle.manifest.omittedFiles).toEqual([ @@ -51,6 +74,123 @@ describe("review prompt provenance", () => { expect(bundle.manifest.includedFiles).toEqual( expect.arrayContaining([expect.objectContaining({ path: "docs/large.md", truncated: true })]), ); + expect(bundle.prompt).toContain("--- docs/large.md (context, lines 1-1, truncated)"); + expect(bundle.prompt).toContain("...[truncated after line 1]"); + }); + + it("includes linked tests as valid review evidence", async () => { + const root = await fixtureRoot("clawpatch-prompt-linked-tests-"); + await writeFixture(root, "src/index.ts", "export const value = 1;\n"); + await writeFixture(root, "src/extra.ts", "export const extra = 1;\n"); + await writeFixture(root, "tests/index.test.ts", "expect(1).toBe(1);\n"); + await writeFixture(root, "docs/large.md", "docs\n"); + const linkedTestFeature = { + ...feature(), + contextFiles: [], + tests: [{ path: "tests/index.test.ts", command: null }], + }; + + const bundle = await buildReviewPromptBundle( + root, + project(root), + linkedTestFeature, + defaultConfig(), + ); + + expect(bundle.prompt).toContain("--- tests/index.test.ts (test, lines 1-1)"); + expect(bundle.prompt).toContain("- tests/index.test.ts"); + expect(bundle.manifest.includedFiles).toEqual( + expect.arrayContaining([ + expect.objectContaining({ path: "tests/index.test.ts", role: "test" }), + ]), + ); + }); + + it("does not list duplicate-skipped included files as omitted", async () => { + const root = await fixtureRoot("clawpatch-prompt-duplicate-context-"); + await writeFixture(root, "src/index.ts", "export const value = 1;\n"); + await writeFixture(root, "src/context-one.ts", "export const one = 1;\n"); + await writeFixture(root, "src/context-two.ts", "export const two = 1;\n"); + await writeFixture(root, "src/context-three.ts", "export const three = 1;\n"); + const duplicateFeature = { + ...feature(), + ownedFiles: [{ path: "src/index.ts", reason: "primary" }], + contextFiles: [ + { path: "src/index.ts", reason: "duplicate owned file" }, + { path: "src/context-one.ts", reason: "context" }, + { path: "src/context-two.ts", reason: "context" }, + { path: "src/context-three.ts", reason: "overflow" }, + ], + }; + + const bundle = await buildReviewPromptBundle(root, project(root), duplicateFeature, { + ...defaultConfig(), + review: { + ...defaultConfig().review, + maxOwnedFiles: 1, + maxContextFiles: 2, + }, + }); + + expect(bundle.prompt).toContain("--- src/context-two.ts (context, lines 1-1)"); + expect(bundle.manifest.omittedFiles).toEqual([ + { path: "src/context-three.ts", role: "context", reason: "maxContextFiles" }, + ]); + }); + + it("de-duplicates equivalent prompt paths before applying limits", async () => { + const root = await fixtureRoot("clawpatch-prompt-normalized-duplicates-"); + await writeFixture(root, "src/index.ts", "export const value = 1;\n"); + await writeFixture(root, "src/next.ts", "export const next = 1;\n"); + const duplicateFeature = { + ...feature(), + ownedFiles: [ + { path: "src/index.ts", reason: "primary" }, + { path: "./src/index.ts", reason: "duplicate spelling" }, + { path: "src/next.ts", reason: "next real file" }, + ], + contextFiles: [], + }; + + const bundle = await buildReviewPromptBundle(root, project(root), duplicateFeature, { + ...defaultConfig(), + review: { + ...defaultConfig().review, + maxOwnedFiles: 2, + }, + }); + + expect(bundle.manifest.includedFiles.map((file) => file.path)).toEqual([ + "src/index.ts", + "src/next.ts", + ]); + expect(bundle.manifest.omittedFiles).toEqual([]); + }); + + it("includes fix evidence paths that differ only by normalized spelling", async () => { + const root = await fixtureRoot("clawpatch-fix-prompt-normalized-evidence-"); + await writeFixture(root, "src/index.ts", "export const value = 1;\n"); + await writeFixture(root, "src/other.ts", "export const other = 1;\n"); + const normalizedFeature = { + ...feature(), + entrypoints: [], + ownedFiles: [{ path: "./src/index.ts", reason: "primary" }], + contextFiles: [], + tests: [], + }; + + const prompt = await buildFixPrompt(root, finding("src/index.ts"), normalizedFeature, { + ...defaultConfig(), + review: { + ...defaultConfig().review, + maxOwnedFiles: 0, + maxContextFiles: 0, + }, + }); + + expect(prompt).toContain("--- ./src/index.ts"); + expect(prompt).toContain("export const value = 1;"); + expect(prompt).not.toContain("--- src/other.ts"); }); }); @@ -109,3 +249,39 @@ function feature(): FeatureRecord { updatedAt: now, }; } + +function finding(path: string): FindingRecord { + const now = new Date().toISOString(); + return { + schemaVersion: 1, + findingId: "fnd_prompt", + featureId: "feat_prompt", + title: "Prompt finding", + category: "bug", + severity: "medium", + confidence: "high", + triage: "confirmed-bug", + evidence: [ + { + path, + startLine: 1, + endLine: 1, + symbol: null, + quote: null, + }, + ], + reasoning: "The file needs a fix.", + reproduction: null, + recommendation: "Fix the file.", + whyTestsDoNotAlreadyCoverThis: "", + suggestedRegressionTest: null, + minimumFixScope: "src/index.ts", + status: "open", + history: [], + signature: "sig_prompt", + linkedPatchAttemptIds: [], + createdByRunId: "run_prompt", + createdAt: now, + updatedAt: now, + }; +} diff --git a/src/prompt.ts b/src/prompt.ts index e5078a1..800ede8 100644 --- a/src/prompt.ts +++ b/src/prompt.ts @@ -6,13 +6,21 @@ export type ReviewMode = "default" | "deslopify"; export const REVIEW_PROMPT_FILE_CHAR_LIMIT = 24_000; -export type ReviewPromptFileRole = "owned" | "context"; +export type ReviewPromptFileRole = "owned" | "context" | "test"; + +export type ReviewPromptLineRange = { + startLine: number; + endLine: number; +}; export type ReviewPromptFileManifest = { path: string; role: ReviewPromptFileRole; bytes: number; includedBytes: number; + includedStartLine: number | null; + includedEndLine: number | null; + includedLineRanges: ReviewPromptLineRange[]; truncated: boolean; readable: boolean; skippedReason: string | null; @@ -101,19 +109,23 @@ export async function buildReviewPromptBundle( mode: ReviewMode = "default", customPrompt: string | null = null, ): Promise { - const owned = feature.ownedFiles.slice(0, config.review.maxOwnedFiles); - const context = feature.contextFiles.slice(0, config.review.maxContextFiles); + const seenPromptFiles = new Set(); + const owned = uniquePromptRefs(feature.ownedFiles, config.review.maxOwnedFiles, seenPromptFiles); + const context = uniquePromptRefs( + feature.contextFiles, + config.review.maxContextFiles, + seenPromptFiles, + ); + const tests = uniquePromptRefs(feature.tests, config.review.maxContextFiles, seenPromptFiles); + const includedPromptPaths = new Set([ + ...owned.map((ref) => normalizePromptPath(ref.path)), + ...context.map((ref) => normalizePromptPath(ref.path)), + ...tests.map((ref) => normalizePromptPath(ref.path)), + ]); const omittedFiles = [ - ...feature.ownedFiles.slice(config.review.maxOwnedFiles).map((ref) => ({ - path: ref.path, - role: "owned" as const, - reason: "maxOwnedFiles", - })), - ...feature.contextFiles.slice(config.review.maxContextFiles).map((ref) => ({ - path: ref.path, - role: "context" as const, - reason: "maxContextFiles", - })), + ...omittedPromptRefs(feature.ownedFiles, "owned", "maxOwnedFiles", includedPromptPaths), + ...omittedPromptRefs(feature.contextFiles, "context", "maxContextFiles", includedPromptPaths), + ...omittedPromptRefs(feature.tests, "test", "maxContextFiles", includedPromptPaths), ]; const fileBlocks: string[] = []; const includedFiles: ReviewPromptFileManifest[] = []; @@ -127,6 +139,11 @@ export async function buildReviewPromptBundle( fileBlocks.push(file.block); includedFiles.push(file.manifest); } + for (const ref of tests) { + const file = await fileBlockWithManifest(root, ref.path, "test"); + fileBlocks.push(file.block); + includedFiles.push(file.manifest); + } const customBlock = customPrompt !== null && customPrompt.trim() !== "" ? `Additional reviewer guidance (provided via --prompt-file): @@ -138,15 +155,32 @@ ${customPrompt.trim()} const promptContext = { maxOwnedFiles: config.review.maxOwnedFiles, maxContextFiles: config.review.maxContextFiles, - includedFiles: includedFiles.map(({ path, role, bytes, includedBytes, truncated }) => ({ - path, - role, - bytes, - includedBytes, - truncated, - })), + includedFiles: includedFiles.map( + ({ + path, + role, + bytes, + includedBytes, + includedStartLine, + includedEndLine, + includedLineRanges, + truncated, + }) => ({ + path, + role, + bytes, + includedBytes, + includedStartLine, + includedEndLine, + includedLineRanges, + truncated, + }), + ), omittedFiles, }; + const validEvidencePaths = [ + ...new Set(includedFiles.filter((file) => file.readable).map((file) => file.path)), + ]; const prompt = `You are reviewing one semantic feature for clawpatch. Return strict JSON only. No markdown fences. @@ -155,7 +189,7 @@ Project: ${JSON.stringify({ name: project.name, detected: project.detected }, null, 2)} Feature: -${JSON.stringify(feature, null, 2)} +${JSON.stringify(reviewFeatureView(feature), null, 2)} ${customBlock}Review categories: - correctness bugs @@ -180,6 +214,13 @@ issues: when the same bug pattern appears in multiple owned files, emit one find with multiple evidence refs instead of separate one-off findings. Avoid speculative low-evidence findings. Evidence must point at included files. +Valid evidence paths are exactly: +${validEvidencePaths.map((path) => `- ${path}`).join("\n")} +Feature metadata paths are not valid evidence unless listed above. +When providing evidence line ranges, use the line-number gutter in the Files section. +Do not inspect files beyond the shown excerpts for evidence. If an excerpt is truncated, +only cite lines that appear in the Files section. +Set evidence.quote to null; line ranges are enough for validation. Prompt context: ${JSON.stringify(promptContext, null, 2)} @@ -219,6 +260,66 @@ ${fileBlocks.join("\n\n")}`; }; } +function reviewFeatureView(feature: FeatureRecord): object { + return { + featureId: feature.featureId, + title: feature.title, + summary: feature.summary, + kind: feature.kind, + source: feature.source, + confidence: feature.confidence, + entrypoints: feature.entrypoints, + ownedFiles: feature.ownedFiles, + contextFiles: feature.contextFiles, + tests: feature.tests, + tags: feature.tags, + trustBoundaries: feature.trustBoundaries, + }; +} + +function uniquePromptRefs( + refs: readonly T[], + limit: number, + seen: Set, +): T[] { + const selected: T[] = []; + for (const ref of refs) { + if (selected.length >= limit) { + break; + } + const normalizedPath = normalizePromptPath(ref.path); + if (seen.has(normalizedPath)) { + continue; + } + seen.add(normalizedPath); + selected.push(ref); + } + return selected; +} + +function omittedPromptRefs( + refs: readonly T[], + role: ReviewPromptFileRole, + reason: string, + includedPaths: ReadonlySet, +): Array<{ path: string; role: ReviewPromptFileRole; reason: string }> { + const omitted: Array<{ path: string; role: ReviewPromptFileRole; reason: string }> = []; + const seen = new Set(); + for (const ref of refs) { + const normalizedPath = normalizePromptPath(ref.path); + if (includedPaths.has(normalizedPath) || seen.has(normalizedPath)) { + continue; + } + seen.add(normalizedPath); + omitted.push({ path: ref.path, role, reason }); + } + return omitted; +} + +function normalizePromptPath(path: string): string { + return path.replace(/\\/gu, "/").replace(/^\.\/+/u, ""); +} + function reviewModeInstructions(mode: ReviewMode): string { if (mode === "default") { return ""; @@ -301,20 +402,34 @@ function fixPromptPaths( const owned = feature.ownedFiles.slice(0, config.review.maxOwnedFiles); const context = feature.contextFiles.slice(0, config.review.maxContextFiles); const tests = feature.tests.slice(0, config.review.maxContextFiles); - const allowed = new Set([ - ...feature.ownedFiles.map((ref) => ref.path), - ...feature.contextFiles.map((ref) => ref.path), - ...feature.tests.map((test) => test.path), - ...feature.entrypoints.map((entrypoint) => entrypoint.path), - ]); + const allowed = new Map(); + const allowPath = (path: string): void => { + const normalizedPath = normalizePromptPath(path); + if (!allowed.has(normalizedPath)) { + allowed.set(normalizedPath, path); + } + }; + for (const ref of feature.ownedFiles) { + allowPath(ref.path); + } + for (const ref of feature.contextFiles) { + allowPath(ref.path); + } + for (const test of feature.tests) { + allowPath(test.path); + } + for (const entrypoint of feature.entrypoints) { + allowPath(entrypoint.path); + } const push = (path: string): void => { if (!paths.includes(path)) { paths.push(path); } }; for (const evidence of finding.evidence) { - if (allowed.has(evidence.path)) { - push(evidence.path); + const allowedPath = allowed.get(normalizePromptPath(evidence.path)); + if (allowedPath !== undefined) { + push(allowedPath); } } for (const ref of owned) { @@ -356,6 +471,9 @@ async function fileBlockWithManifest( role, bytes: 0, includedBytes: 0, + includedStartLine: null, + includedEndLine: null, + includedLineRanges: [], truncated: false, readable: false, skippedReason: "unreadable", @@ -363,18 +481,20 @@ async function fileBlockWithManifest( }; } const bytes = Buffer.byteLength(contents, "utf8"); - const truncated = contents.length > REVIEW_PROMPT_FILE_CHAR_LIMIT; - const trimmed = truncated - ? `${contents.slice(0, REVIEW_PROMPT_FILE_CHAR_LIMIT)}\n...[truncated]` - : contents; + const excerpt = prefixExcerpt(contents); return { - block: `--- ${path}\n${trimmed}`, + block: `--- ${path} (${role}, ${rangeLabel(excerpt.includedLineRanges)}${ + excerpt.truncated ? ", truncated" : "" + })\n${excerpt.body}`, manifest: { path, role, bytes, - includedBytes: Buffer.byteLength(trimmed, "utf8"), - truncated, + includedBytes: Buffer.byteLength(excerpt.includedContents, "utf8"), + includedStartLine: excerpt.includedLineRanges[0]?.startLine ?? null, + includedEndLine: excerpt.includedLineRanges.at(-1)?.endLine ?? null, + includedLineRanges: excerpt.includedLineRanges, + truncated: excerpt.truncated, readable: true, skippedReason: null, }, @@ -393,6 +513,9 @@ function skippedFileBlock( role, bytes: 0, includedBytes: 0, + includedStartLine: null, + includedEndLine: null, + includedLineRanges: [], truncated: false, readable: false, skippedReason: reason, @@ -400,6 +523,55 @@ function skippedFileBlock( }; } +function prefixExcerpt(contents: string): { + body: string; + includedContents: string; + includedLineRanges: ReviewPromptLineRange[]; + truncated: boolean; +} { + const truncated = contents.length > REVIEW_PROMPT_FILE_CHAR_LIMIT; + const includedContents = truncated ? contents.slice(0, REVIEW_PROMPT_FILE_CHAR_LIMIT) : contents; + const includedEndLine = reviewLineCount(includedContents); + const body = `${numberedFileContents(includedContents, 1)}${ + truncated ? `\n...[truncated after line ${includedEndLine}]` : "" + }`; + return { + body, + includedContents, + includedLineRanges: [{ startLine: 1, endLine: includedEndLine }], + truncated, + }; +} + +function rangeLabel(ranges: readonly ReviewPromptLineRange[]): string { + if (ranges.length === 0) { + return "no lines"; + } + if (ranges.length === 1) { + const range = ranges[0]!; + return `lines ${range.startLine}-${range.endLine}`; + } + return `lines ${ranges.map((range) => `${range.startLine}-${range.endLine}`).join(", ")}`; +} + +function numberedFileContents(contents: string, startLine: number): string { + return splitReviewLines(contents) + .map((line, index) => `${startLine + index} | ${line}`) + .join("\n"); +} + +function reviewLineCount(contents: string): number { + return splitReviewLines(contents).length; +} + +function splitReviewLines(contents: string): string[] { + if (contents.length === 0) { + return [""]; + } + const lines = contents.split("\n"); + return contents.endsWith("\n") ? lines.slice(0, -1) : lines; +} + function isInside(root: string, candidate: string): boolean { const relativePath = relative(root, candidate); return relativePath === "" || (!relativePath.startsWith("..") && !isAbsolute(relativePath)); diff --git a/src/provider.test.ts b/src/provider.test.ts index a6f4610..ec39d9e 100644 --- a/src/provider.test.ts +++ b/src/provider.test.ts @@ -2,23 +2,32 @@ import { afterEach, describe, expect, it } from "vitest"; import { ClawpatchError } from "./errors.js"; import { __testing, extractJson, providerByName } from "./provider.js"; import { safeProviderPreview } from "./provider-json.js"; +import { agentMapJsonSchema, reviewJsonSchema } from "./provider-schema.js"; import { evidenceRefSchema, revalidateOutputSchema, reviewOutputSchema } from "./types.js"; // eslint-disable-next-line no-underscore-dangle const { acpxFailureMessage, + assertCursorRuntimeVersionAllowed, acpxPromptRetries, addCodexModelArgs, addCodexSandboxArgs, buildAcpxJsonArgs, codexFailureMessage, + cursorAgentArgs, + cursorEnv, + cursorFailureMessage, + cursorPrompt, + cursorTimeoutMs, extractAcpxJson, + extractCursorJson, extractOpencodeJson, formatZodError, formatZodIssue, parseAcpxJsonOutput, parseAcpxAgent, parseCodexJson, + parseSemver, parseReviewOutput, parseOrThrow, piThinkingLevel, @@ -304,6 +313,271 @@ describe("piThinkingLevel", () => { }); }); +describe("Cursor provider", () => { + const originalCursorTimeout = process.env["CLAWPATCH_CURSOR_TIMEOUT_MS"]; + const originalProviderTimeout = process.env["CLAWPATCH_PROVIDER_TIMEOUT_MS"]; + + afterEach(() => { + if (originalCursorTimeout === undefined) { + delete process.env["CLAWPATCH_CURSOR_TIMEOUT_MS"]; + } else { + process.env["CLAWPATCH_CURSOR_TIMEOUT_MS"] = originalCursorTimeout; + } + if (originalProviderTimeout === undefined) { + delete process.env["CLAWPATCH_PROVIDER_TIMEOUT_MS"]; + } else { + process.env["CLAWPATCH_PROVIDER_TIMEOUT_MS"] = originalProviderTimeout; + } + }); + + it("builds the verified trusted read-only print JSON command shape", () => { + const args = cursorAgentArgs( + "/repo", + { + model: "cursor-model", + reasoningEffort: "xhigh", + skipGitRepoCheck: true, + }, + true, + ); + + expect(args).toEqual([ + "--trust", + "-p", + "--output-format", + "json", + "--workspace", + "/repo", + "--mode", + "ask", + "--model", + "cursor-model", + ]); + expect(args).not.toContain("--force"); + expect(args).not.toContain("--yolo"); + }); + + it("leaves write-mode Cursor execution ungated by read-only mode flags", () => { + const args = cursorAgentArgs( + "/repo", + { + model: null, + reasoningEffort: null, + skipGitRepoCheck: false, + }, + false, + ); + + expect(args).toEqual(["--trust", "-p", "--output-format", "json", "--workspace", "/repo"]); + }); + + it("keeps Cursor provider execution disabled by default", async () => { + const originalExperimental = process.env["CLAWPATCH_CURSOR_EXPERIMENTAL"]; + delete process.env["CLAWPATCH_CURSOR_EXPERIMENTAL"]; + try { + await expect( + providerByName("cursor").review("/repo", "prompt", { + model: null, + reasoningEffort: null, + skipGitRepoCheck: false, + }), + ).rejects.toThrow(/experimental and disabled by default/u); + } finally { + if (originalExperimental === undefined) { + delete process.env["CLAWPATCH_CURSOR_EXPERIMENTAL"]; + } else { + process.env["CLAWPATCH_CURSOR_EXPERIMENTAL"] = originalExperimental; + } + } + }); + + it("extracts Clawpatch JSON from the Cursor success envelope result", () => { + const stdout = JSON.stringify({ + type: "result", + subtype: "success", + is_error: false, + result: '```json\n{"findings":[],"inspected":{"files":[],"symbols":[],"notes":[]}}\n```', + }); + + expect(extractCursorJson(stdout)).toEqual({ + findings: [], + inspected: { files: [], symbols: [], notes: [] }, + }); + }); + + it("accepts Cursor success envelopes without a subtype", () => { + const stdout = JSON.stringify({ + type: "result", + is_error: false, + result: '{"outcome":"fixed","reasoning":"ok","commands":[]}', + }); + + expect(extractCursorJson(stdout)).toEqual({ + outcome: "fixed", + reasoning: "ok", + commands: [], + }); + }); + + it("rejects Cursor error envelopes", () => { + expect(() => + extractCursorJson( + JSON.stringify({ + type: "result", + subtype: "error", + is_error: true, + result: "auth required", + }), + ), + ).toThrow(/cursor provider returned an error envelope/u); + }); + + it("rejects missing result text", () => { + expectMalformed( + () => + extractCursorJson(JSON.stringify({ type: "result", subtype: "success", is_error: false })), + /missing result text/u, + ); + }); + + it("does not preview malformed Cursor result text", () => { + const secretPrompt = "SOURCE_CONTEXT_SECRET"; + + expect(() => + extractCursorJson( + JSON.stringify({ + type: "result", + subtype: "success", + is_error: false, + result: `not json ${secretPrompt}`, + }), + ), + ).toThrow(/result chars=\d+/u); + expect(() => + extractCursorJson( + JSON.stringify({ + type: "result", + subtype: "success", + is_error: false, + result: `not json ${secretPrompt}`, + }), + ), + ).not.toThrow(secretPrompt); + }); + + it("rejects multiple Cursor JSON envelopes", () => { + const stdout = [ + JSON.stringify({ type: "result", subtype: "success", is_error: false, result: "{}" }), + JSON.stringify({ type: "result", subtype: "success", is_error: false, result: "{}" }), + ].join("\n"); + + expectMalformed(() => extractCursorJson(stdout), /produced 2 JSON envelopes/u); + }); + + it("does not preview stdout unless it looks like auth or quota output", () => { + const secretPrompt = "SOURCE_CONTEXT_SECRET"; + + expect(cursorFailureMessage(secretPrompt, "", 1)).not.toContain(secretPrompt); + expect(cursorFailureMessage("login required", "", 1)).toContain("authentication required"); + }); + + it("does not preview Cursor stderr on failure", () => { + const secretPrompt = "SOURCE_CONTEXT_SECRET"; + + expect(cursorFailureMessage("", secretPrompt, 1)).not.toContain(secretPrompt); + }); + + it("sets Cursor headless browser suppression without replacing the host environment", () => { + const previous = process.env["CURSOR_API_KEY"]; + try { + delete process.env["CURSOR_API_KEY"]; + expect(cursorEnv()).toEqual({ + NO_OPEN_BROWSER: "1", + }); + } finally { + if (previous === undefined) { + delete process.env["CURSOR_API_KEY"]; + } else { + process.env["CURSOR_API_KEY"] = previous; + } + } + }); + + it("passes CURSOR_API_KEY through the explicit Cursor env overlay when present", () => { + const previous = process.env["CURSOR_API_KEY"]; + try { + process.env["CURSOR_API_KEY"] = "cursor_test_key"; + expect(cursorEnv()).toEqual({ + NO_OPEN_BROWSER: "1", + CURSOR_API_KEY: "cursor_test_key", + }); + } finally { + if (previous === undefined) { + delete process.env["CURSOR_API_KEY"]; + } else { + process.env["CURSOR_API_KEY"] = previous; + } + } + }); + + it("uses a 300 second default timeout for Cursor", () => { + delete process.env["CLAWPATCH_CURSOR_TIMEOUT_MS"]; + delete process.env["CLAWPATCH_PROVIDER_TIMEOUT_MS"]; + + expect(cursorTimeoutMs()).toBe(300_000); + }); + + it("adds Cursor-specific strict evidence guidance for reviews", () => { + const prompt = cursorPrompt("base review prompt", reviewJsonSchema, true); + + expect(prompt).toContain("Cursor evidence rules:"); + expect(prompt).toContain("Always set evidence.quote to null"); + expect(prompt).toContain("evidence.path must exactly match an included file path"); + expect(prompt).toContain("Do not use files outside the prompt excerpts as evidence"); + expect(prompt).toContain("Every evidence item must include startLine and endLine"); + }); + + it("does not add review evidence guidance to Cursor map prompts", () => { + const prompt = cursorPrompt("base map prompt", agentMapJsonSchema, true); + + expect(prompt).not.toContain("Cursor evidence rules:"); + }); + + it("parses semver for Cursor advisory checks", () => { + expect(parseSemver("2.4.9")).toEqual([2, 4, 9]); + expect(parseSemver("v2.5")).toEqual([2, 5, 0]); + expect(parseSemver("2026.05.16-0338208")).toBeNull(); + expect(parseSemver("2.5.0-beta")).toBeNull(); + expect(parseSemver("2.5beta")).toBeNull(); + }); + + it("uses Cursor app version for date-formatted CLI builds", () => { + expect(() => assertCursorRuntimeVersionAllowed("2026.05.16-0338208", "3.2.16")).not.toThrow(); + expect(() => assertCursorRuntimeVersionAllowed("2026.05.16-0338208", "2.4.9")).toThrow( + /blocked vulnerable Cursor version/u, + ); + }); + + it("uses semver CLI versions as the authoritative runtime version", () => { + expect(() => assertCursorRuntimeVersionAllowed("2.5.0", "2.4.9")).not.toThrow(); + expect(() => assertCursorRuntimeVersionAllowed("2.4.9", "3.2.16")).toThrow( + /blocked vulnerable Cursor version/u, + ); + }); + + it("does not treat date-formatted CLI builds as advisory proof by themselves", () => { + expect(() => assertCursorRuntimeVersionAllowed("2026.05.16-0338208", null)).toThrow( + /could not verify Cursor app\/runtime version/u, + ); + }); + + it("does not treat date-formatted app builds as advisory proof", () => { + expect(() => + assertCursorRuntimeVersionAllowed("2026.05.16-0338208", "2026.05.16-0338208"), + ).toThrow(/could not verify Cursor app\/runtime version/u); + }); +}); + function schemaKeys(value: unknown): string[] { if (Array.isArray(value)) { return value.flatMap(schemaKeys); @@ -690,7 +964,11 @@ describe("parseReviewOutput", () => { const result = parseReviewOutput(output); expect(result.findings).toHaveLength(3); - expect(result.findings.map((f) => f.title)).toEqual(["first", "second", "third"]); + expect(result.findings.map((f: { title: string }) => f.title)).toEqual([ + "first", + "second", + "third", + ]); expect(result.droppedFindings).toEqual([]); expect(result.inspected.files).toEqual(["src/a.ts"]); }); @@ -709,7 +987,11 @@ describe("parseReviewOutput", () => { const result = parseReviewOutput(output); expect(result.findings).toHaveLength(3); - expect(result.findings.map((f) => f.title)).toEqual(["first", "second", "fourth"]); + expect(result.findings.map((f: { title: string }) => f.title)).toEqual([ + "first", + "second", + "fourth", + ]); expect(result.droppedFindings).toHaveLength(1); const dropped = result.droppedFindings[0]!; expect(dropped.path[0]).toBe("findings"); @@ -874,6 +1156,7 @@ describe("providerByName", () => { expect(providerByName("grok").name).toBe("grok"); expect(providerByName("opencode").name).toBe("opencode"); expect(providerByName("pi").name).toBe("pi"); + expect(providerByName("cursor").name).toBe("cursor"); }); it("still supports codex, mock, and mock-fail", () => { diff --git a/src/provider.ts b/src/provider.ts index 8692a86..37d2b35 100644 --- a/src/provider.ts +++ b/src/provider.ts @@ -14,6 +14,7 @@ import { import { extractJson, parseCodexJson, safeProviderPreview } from "./provider-json.js"; import { AgentMapOutput, + CommandResult, FixPlanOutput, ReviewFinding, RevalidateOutput, @@ -274,6 +275,9 @@ export function providerByName(name: string): Provider { if (name === "pi") { return piProvider; } + if (name === "cursor") { + return cursorProvider; + } if (name === "mock") { return mockProvider; } @@ -436,6 +440,11 @@ const grokProvider: Provider = { }; const PI_DEFAULT_TIMEOUT_MS = 180_000; +const CURSOR_DEFAULT_TIMEOUT_MS = 300_000; +const CURSOR_MIN_SAFE_APP_VERSION = "2.5.0"; +const CURSOR_DARWIN_INFO_PLIST = "/Applications/Cursor.app/Contents/Info.plist"; +const CURSOR_EXPERIMENTAL_ENV = "CLAWPATCH_CURSOR_EXPERIMENTAL"; +const CURSOR_WRITE_ENV = "CLAWPATCH_CURSOR_ALLOW_WRITE"; const piProvider: Provider = { name: "pi", @@ -472,6 +481,333 @@ const piProvider: Provider = { }, }; +const cursorProvider: Provider = { + name: "cursor", + async check(root: string): Promise { + return await checkedCursorRuntimeVersion(root); + }, + async map(root: string, prompt: string, options: ProviderOptions): Promise { + assertCursorProviderEnabled("map"); + const output = await runCursorJson(root, prompt, options, agentMapJsonSchema, true); + return parseOrThrow(agentMapOutputSchema, output, "cursor agent-map"); + }, + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { + assertCursorProviderEnabled("review"); + const output = await runCursorJson(root, prompt, options, reviewJsonSchema, true); + return parseReviewOutput(output); + }, + async fix(root: string, prompt: string, options: ProviderOptions): Promise { + assertCursorProviderEnabled("fix"); + assertCursorWriteEnabled(); + const output = await runCursorJson(root, prompt, options, fixPlanJsonSchema, false); + return parseOrThrow(fixPlanOutputSchema, output, "cursor fix-plan"); + }, + async revalidate( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { + assertCursorProviderEnabled("revalidate"); + const output = await runCursorJson(root, prompt, options, revalidateJsonSchema, true); + return parseOrThrow(revalidateOutputSchema, output, "cursor revalidate"); + }, +}; + +async function runCursorJson( + root: string, + prompt: string, + options: ProviderOptions, + schema: object, + readOnly: boolean, +): Promise { + await checkedCursorRuntimeVersion(root); + const fullPrompt = cursorPrompt(prompt, schema, readOnly); + const args = cursorAgentArgs(root, options, readOnly); + const result = await runCursorAgent(root, args, fullPrompt); + if (result.exitCode !== 0) { + throw new ClawpatchError( + cursorFailureMessage(result.stdout, result.stderr, result.exitCode), + providerExitCode(`${result.stderr}\n${result.stdout}`), + "provider-failure", + ); + } + return extractCursorJson(result.stdout); +} + +async function checkedCursorRuntimeVersion(root: string): Promise { + const result = await runCursorAgent(root, ["--version"]); + if (result.exitCode !== 0) { + throw new ClawpatchError( + "cursor-agent CLI not available or not authenticated", + 4, + "provider-auth", + ); + } + const version = result.stdout.trim(); + const appVersion = await cursorAppVersion(); + assertCursorRuntimeVersionAllowed(version, appVersion); + return appVersion === null ? version : `${version} (Cursor app ${appVersion})`; +} + +function cursorAgentArgs(root: string, options: ProviderOptions, readOnly: boolean): string[] { + const args = ["--trust", "-p", "--output-format", "json", "--workspace", root]; + if (readOnly) { + args.push("--mode", "ask"); + } + if (options.model !== null) { + args.push("--model", options.model); + } + return args; +} + +async function runCursorAgent( + root: string, + args: string[], + input?: string, +): Promise { + return await runCommandArgs("cursor-agent", args, root, input, { + trimOutput: false, + timeoutMs: cursorTimeoutMs(), + env: cursorEnv(), + }); +} + +function cursorPrompt(prompt: string, schema: object, readOnly: boolean): string { + const promptBody = readOnly + ? "READ-ONLY REVIEW MODE.\n" + + "Do not modify, create, or delete any files.\n" + + "Do not run shell commands.\n" + + "The Cursor CLI also receives --mode ask for this read-only request.\n\n" + + prompt + : prompt; + const evidenceRules = + schema === reviewJsonSchema + ? ` + +Cursor evidence rules: +- Cite only files that are explicitly included in the prompt's file blocks. +- evidence.path must exactly match an included file path. +- If you provide startLine and endLine, copy them from the included file block and keep them inside that file's shown line range. +- Do not use files outside the prompt excerpts as evidence. +- Always set evidence.quote to null. +- Every evidence item must include startLine and endLine from the shown file block.` + : ""; + return `${promptBody}${evidenceRules} + +Provider output schema: +${JSON.stringify(schema, null, 2)} + +Return only one JSON object matching the schema.`; +} + +function extractCursorJson(stdout: string): unknown { + const trimmed = stdout.trim(); + if (trimmed.length === 0) { + throw new ClawpatchError("cursor provider produced no JSON envelope", 8, "malformed-output"); + } + const envelope = parseSingleCursorEnvelope(trimmed); + if (typeof envelope !== "object" || envelope === null) { + throw new ClawpatchError( + "cursor provider produced a non-object JSON envelope", + 8, + "malformed-output", + ); + } + const record = envelope as Record; + if (record["type"] !== "result") { + throw new ClawpatchError( + "cursor provider produced a non-result JSON envelope", + 8, + "malformed-output", + ); + } + const subtype = record["subtype"]; + if (record["is_error"] === true || (subtype !== undefined && subtype !== "success")) { + const subtypePreview = typeof subtype === "string" ? subtype : "unknown"; + throw new ClawpatchError( + `cursor provider returned an error envelope (subtype=${safeProviderPreview( + subtypePreview, + 80, + )}, is_error=${String(record["is_error"])})`, + 1, + "provider-failure", + ); + } + if (typeof record["result"] !== "string" || record["result"].trim().length === 0) { + throw new ClawpatchError( + "cursor provider result envelope is missing result text", + 8, + "malformed-output", + ); + } + const parsed = extractJson(record["result"]); + if (parsed === null) { + throw new ClawpatchError( + `cursor provider result contained no Clawpatch JSON (result chars=${record["result"].length})`, + 8, + "malformed-output", + ); + } + return parsed; +} + +function parseSingleCursorEnvelope(stdout: string): unknown { + try { + return JSON.parse(stdout) as unknown; + } catch {} + const parsedLines: unknown[] = []; + for (const line of stdout.split("\n")) { + const trimmed = line.trim(); + if (trimmed.length === 0) { + continue; + } + try { + parsedLines.push(JSON.parse(trimmed) as unknown); + } catch { + throw new ClawpatchError( + "cursor provider produced malformed JSON envelope", + 8, + "malformed-output", + ); + } + } + if (parsedLines.length === 1) { + return parsedLines[0]; + } + throw new ClawpatchError( + `cursor provider produced ${parsedLines.length} JSON envelopes; expected exactly one`, + 8, + "malformed-output", + ); +} + +function cursorFailureMessage(stdout: string, stderr: string, exitCode: number | null): string { + const combined = `${stderr}\n${stdout}`; + if (/auth|login|not authenticated|keychain|unauthorized/iu.test(combined)) { + return "cursor provider failed: authentication required or unavailable"; + } + if (/quota|rate.?limit/iu.test(combined)) { + return "cursor provider failed: quota or rate limit"; + } + return `cursor provider failed with exit code ${exitCode ?? "unknown"}`; +} + +function assertCursorProviderEnabled(operation: string): void { + if (process.env[CURSOR_EXPERIMENTAL_ENV] === "1") { + return; + } + throw new ClawpatchError( + `cursor provider ${operation} is experimental and disabled by default; set ${CURSOR_EXPERIMENTAL_ENV}=1 after completing local HITL verification`, + 2, + "unsupported-provider", + ); +} + +function assertCursorWriteEnabled(): void { + if (process.env[CURSOR_WRITE_ENV] === "1") { + return; + } + throw new ClawpatchError( + `cursor provider fix is disabled until write-mode HITL verification passes; set ${CURSOR_WRITE_ENV}=1 only in an isolated checkout`, + 2, + "unsupported-provider", + ); +} + +function cursorTimeoutMs(): number { + const raw = + process.env["CLAWPATCH_CURSOR_TIMEOUT_MS"] ?? process.env["CLAWPATCH_PROVIDER_TIMEOUT_MS"]; + if (raw === undefined) { + return CURSOR_DEFAULT_TIMEOUT_MS; + } + const parsed = Number(raw); + return Number.isFinite(parsed) && parsed > 0 ? parsed : CURSOR_DEFAULT_TIMEOUT_MS; +} + +function cursorEnv(): NodeJS.ProcessEnv { + const apiKey = process.env["CURSOR_API_KEY"]; + return { + NO_OPEN_BROWSER: "1", + ...(apiKey === undefined ? {} : { CURSOR_API_KEY: apiKey }), + }; +} + +async function cursorAppVersion(): Promise { + if (process.platform !== "darwin") { + return null; + } + const plist = await readFile(CURSOR_DARWIN_INFO_PLIST, "utf8").catch(() => null); + if (plist === null) { + return null; + } + const match = + /CFBundleShortVersionString<\/key>\s*([^<]+)<\/string>/u.exec(plist) ?? + /CFBundleVersion<\/key>\s*([^<]+)<\/string>/u.exec(plist); + return match?.[1]?.trim() ?? null; +} + +function assertCursorRuntimeVersionAllowed(cliVersion: string, appVersion: string | null): void { + const parsedCli = parseSemver(cliVersion); + if (parsedCli !== null) { + assertCursorVersionAllowed(cliVersion, parsedCli); + return; + } + if (isCursorDateBuildVersion(cliVersion) && appVersion !== null) { + const parsedApp = parseSemver(appVersion); + if (parsedApp !== null) { + assertCursorVersionAllowed(appVersion, parsedApp); + return; + } + } + throw new ClawpatchError( + "cursor provider could not verify Cursor app/runtime version for CVE-2026-26268 / GHSA-8pcm-8jpx-hv8r", + 4, + "provider-auth", + ); +} + +function assertCursorVersionAllowed(version: string, parsed: [number, number, number]): void { + const minimum = parseSemver(CURSOR_MIN_SAFE_APP_VERSION); + if (minimum === null || compareSemver(parsed, minimum) >= 0) { + return; + } + throw new ClawpatchError( + `cursor provider blocked vulnerable Cursor version ${version}; upgrade to ${CURSOR_MIN_SAFE_APP_VERSION} or newer for CVE-2026-26268 / GHSA-8pcm-8jpx-hv8r`, + 4, + "provider-auth", + ); +} + +function isCursorDateBuildVersion(version: string): boolean { + return /^\d{4}\.\d{2}\.\d{2}(?:[-+].*)?$/u.test(version.trim()); +} + +function parseSemver(version: string): [number, number, number] | null { + const trimmed = version.trim(); + if (isCursorDateBuildVersion(trimmed)) { + return null; + } + const match = /^v?(\d+)\.(\d+)(?:\.(\d+))?$/u.exec(trimmed); + if (match === null) { + return null; + } + return [Number(match[1]), Number(match[2]), Number(match[3] ?? "0")]; +} + +function compareSemver(left: [number, number, number], right: [number, number, number]): number { + for (let index = 0; index < left.length; index += 1) { + const diff = left[index]! - right[index]!; + if (diff !== 0) { + return diff; + } + } + return 0; +} + async function runPiJson( root: string, prompt: string, @@ -727,7 +1063,10 @@ function firstPromptFileWith(prompt: string, marker: string): string | null { if (newline === -1) { continue; } - const path = block.slice(0, newline).trim(); + const path = block + .slice(0, newline) + .replace(/ \([^)]*\)$/u, "") + .trim(); const contents = block.slice(newline + 1); if (path.length > 0 && contents.includes(marker)) { return path; @@ -1384,9 +1723,17 @@ export const __testing = { addCodexSandboxArgs, buildAcpxJsonArgs, codexFailureMessage, + cursorAgentArgs, + cursorEnv, + cursorFailureMessage, + cursorPrompt, + cursorTimeoutMs, + extractCursorJson, extractAcpxJson, parseAcpxJsonOutput, extractOpencodeJson, + assertCursorRuntimeVersionAllowed, + parseSemver, formatZodError, formatZodIssue, parseAcpxAgent, diff --git a/src/review-validation.test.ts b/src/review-validation.test.ts index 48e4c80..1ad6d17 100644 --- a/src/review-validation.test.ts +++ b/src/review-validation.test.ts @@ -38,6 +38,51 @@ describe("validateReviewOutput", () => { ).resolves.toMatchObject({ findings: [{ title: "Bug" }] }); }); + it("accepts evidence from linked tests included in review context", async () => { + const root = await fixtureRoot("clawpatch-review-validation-test-file-"); + await writeFixture(root, "src/index.ts", "const value = 'safe';\n"); + await writeFixture(root, "src/index.test.ts", "const value = 'TODO_BUG';\n"); + + await expect( + validateReviewOutput( + root, + { + ...feature("src/index.ts"), + tests: [{ path: "src/index.test.ts", command: null }], + }, + defaultConfig(), + manifest("src/index.test.ts", { role: "test" }), + output("src/index.test.ts"), + ), + ).resolves.toMatchObject({ findings: [{ title: "Bug" }] }); + }); + + it("accepts evidence from files selected after duplicate prompt refs are skipped", async () => { + const root = await fixtureRoot("clawpatch-review-validation-duplicate-context-"); + await writeFixture(root, "src/index.ts", "const value = 'safe';\n"); + await writeFixture(root, "src/context-one.ts", "const value = 'safe';\n"); + await writeFixture(root, "src/context-two.ts", "const value = 'TODO_BUG';\n"); + const config = defaultConfig(); + config.review.maxContextFiles = 2; + + await expect( + validateReviewOutput( + root, + { + ...feature("src/index.ts"), + contextFiles: [ + { path: "src/index.ts", reason: "duplicate owned file" }, + { path: "src/context-one.ts", reason: "context" }, + { path: "src/context-two.ts", reason: "context" }, + ], + }, + config, + manifest("src/context-two.ts", { role: "context" }), + output("src/context-two.ts"), + ), + ).resolves.toMatchObject({ findings: [{ title: "Bug" }] }); + }); + it("rejects evidence for files that were not included in review context", async () => { const root = await fixtureRoot("clawpatch-review-validation-path-"); await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); @@ -104,6 +149,38 @@ describe("validateReviewOutput", () => { ).rejects.toMatchObject({ code: "malformed-output" }); }); + it("rejects line-only evidence outside the included prompt range", async () => { + const root = await fixtureRoot("clawpatch-review-validation-prompt-range-"); + await writeFixture(root, "src/index.ts", "const first = 'safe';\nconst second = 'TODO_BUG';\n"); + + await expect( + validateReviewOutput( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts", { + includedLineRanges: [{ startLine: 1, endLine: 1 }], + }), + output("src/index.ts", { startLine: 2, endLine: 2, quote: null }), + ), + ).rejects.toMatchObject({ code: "malformed-output" }); + }); + + it("rejects evidence without a line range or quote", async () => { + const root = await fixtureRoot("clawpatch-review-validation-empty-evidence-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + + await expect( + validateReviewOutput( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + output("src/index.ts", { startLine: null, endLine: null, quote: null }), + ), + ).rejects.toMatchObject({ code: "malformed-output" }); + }); + it("rejects evidence that only exists beyond the truncated prompt text", async () => { const root = await fixtureRoot("clawpatch-review-validation-truncated-"); await writeFixture(root, "src/index.ts", `${"a".repeat(24_000)}\nconst value = 'TODO_TAIL';\n`); @@ -249,7 +326,11 @@ function feature(path: string): FeatureRecord { function output( path: string, - evidence: { startLine?: number | null; endLine?: number | null; quote?: string | null } = {}, + evidence: { + startLine?: number | null; + endLine?: number | null; + quote?: string | null; + } = {}, ): ReviewOutput { return { findings: [ @@ -261,10 +342,10 @@ function output( evidence: [ { path, - startLine: evidence.startLine ?? 1, - endLine: evidence.endLine ?? 1, + startLine: "startLine" in evidence ? evidence.startLine! : 1, + endLine: "endLine" in evidence ? evidence.endLine! : 1, symbol: null, - quote: evidence.quote ?? "TODO_BUG", + quote: "quote" in evidence ? evidence.quote! : "TODO_BUG", }, ], reasoning: "Reason.", @@ -281,18 +362,29 @@ function output( function manifest( path: string, - options: { truncated?: boolean; readable?: boolean } = {}, + options: { + truncated?: boolean; + readable?: boolean; + includedLineRanges?: Array<{ startLine: number; endLine: number }>; + role?: "owned" | "context" | "test"; + } = {}, ): ReviewPromptManifest { const readable = options.readable ?? true; + const includedLineRanges = readable + ? (options.includedLineRanges ?? [{ startLine: 1, endLine: 1 }]) + : []; return { maxOwnedFiles: defaultConfig().review.maxOwnedFiles, maxContextFiles: defaultConfig().review.maxContextFiles, includedFiles: [ { path, - role: "owned", + role: options.role ?? "owned", bytes: readable ? 1 : 0, includedBytes: readable ? 1 : 0, + includedStartLine: includedLineRanges[0]?.startLine ?? null, + includedEndLine: includedLineRanges.at(-1)?.endLine ?? null, + includedLineRanges, truncated: options.truncated ?? false, readable, skippedReason: readable ? null : "unreadable", diff --git a/src/review-validation.ts b/src/review-validation.ts index aa20d61..446acab 100644 --- a/src/review-validation.ts +++ b/src/review-validation.ts @@ -12,7 +12,9 @@ export async function validateReviewOutput( manifest: ReviewPromptManifest, output: ReviewOutput, ): Promise { - const included = includedReviewPaths(feature, config); + void feature; + void config; + const included = includedReviewPaths(manifest); const promptFiles = new Map( manifest.includedFiles.map((file) => [normalizePath(file.path), file]), ); @@ -39,7 +41,9 @@ export async function validateReviewOutputPartitioned( manifest: ReviewPromptManifest, output: ReviewOutput, ): Promise<{ findings: ReviewOutput["findings"]; droppedFindings: DroppedFinding[] }> { - const included = includedReviewPaths(feature, config); + void feature; + void config; + const included = includedReviewPaths(manifest); const promptFiles = new Map( manifest.includedFiles.map((file) => [normalizePath(file.path), file]), ); @@ -85,7 +89,8 @@ async function validateFinding( throwMalformed(`evidence file was not readable in review context: ${evidence.path}`); } const contents = await fileContents(root, evidence.path, promptFile.truncated, cache); - assertLineRange(contents, evidence); + assertVerifiableEvidence(evidence); + assertLineRange(contents, evidence, promptFile); assertQuote(contents, evidence); } } @@ -103,13 +108,8 @@ function truncateValidationSample(finding: ReviewOutput["findings"][number]): st return text.length > 200 ? `${text.slice(0, 197)}...` : text; } -function includedReviewPaths(feature: FeatureRecord, config: ClawpatchConfig): Set { - return new Set( - [ - ...feature.ownedFiles.slice(0, config.review.maxOwnedFiles).map((ref) => ref.path), - ...feature.contextFiles.slice(0, config.review.maxContextFiles).map((ref) => ref.path), - ].map(normalizePath), - ); +function includedReviewPaths(manifest: ReviewPromptManifest): Set { + return new Set(manifest.includedFiles.map((file) => normalizePath(file.path))); } function assertIncludedPath(path: string, included: ReadonlySet, label: string): void { @@ -120,6 +120,16 @@ function assertIncludedPath(path: string, included: ReadonlySet, label: } } +function assertVerifiableEvidence( + evidence: ReviewOutput["findings"][number]["evidence"][number], +): void { + const hasLineRange = evidence.startLine !== null || evidence.endLine !== null; + const hasQuote = evidence.quote !== null && evidence.quote.trim().length > 0; + if (!hasLineRange && !hasQuote) { + throwMalformed(`evidence must include a line range or quote: ${evidence.path}`); + } +} + function assertSafePath(path: string, label: string): void { const normalized = normalizePath(path); if (normalized.startsWith("../") || isAbsolute(normalized)) { @@ -160,6 +170,7 @@ async function readIncludedFile(root: string, path: string, truncated: boolean): function assertLineRange( contents: string, evidence: ReviewOutput["findings"][number]["evidence"][number], + promptFile: ReviewPromptManifest["includedFiles"][number], ): void { const { startLine, endLine } = evidence; if (startLine === null && endLine === null) { @@ -177,6 +188,19 @@ function assertLineRange( `evidence line range exceeds file length: ${evidence.path}:${startLine}-${endLine}`, ); } + if (!rangeIncluded(startLine, endLine, promptFile.includedLineRanges)) { + throwMalformed( + `evidence line range was not included in review context: ${evidence.path}:${startLine}-${endLine}`, + ); + } +} + +function rangeIncluded( + startLine: number, + endLine: number, + ranges: readonly { startLine: number; endLine: number }[], +): boolean { + return ranges.some((range) => startLine >= range.startLine && endLine <= range.endLine); } function reviewLineCount(contents: string): number {