diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c9663c..e4a5926 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Added review prompt provenance and budget accounting for included files, omitted files, prompt bytes, and approximate tokens. - Added retries for transient acpx JSON review failures via `--prompt-retries` and `CLAWPATCH_REVIEW_RETRIES`, thanks @coletebou. - Hardened review ingestion so provider findings must cite included files with valid line ranges and matching evidence quotes. +- Fixed provider review to preserve valid sibling findings when per-finding schema or evidence validation fails, recording drops in `run.errors` as non-fatal `schema-drop` or `validation-drop` entries, thanks @coletebou. - Improved provider schema validation failures so `run.errors[].message` shows compact one-line Zod issue summaries, thanks @coletebou. - Added `total` and `results` aliases on `clawpatch report --json` output while keeping the legacy `findings` count, thanks @coletebou. - Fixed `clawpatch open-pr` so repositories without default-branch metadata use a dedicated patch branch and let GitHub choose the PR base. diff --git a/src/app.ts b/src/app.ts index f9417c9..00ce8d9 100644 --- a/src/app.ts +++ b/src/app.ts @@ -22,7 +22,7 @@ import { stableId, runId } from "./id.js"; import { mapWithSource } from "./agent-mapper.js"; import { mapFeatures } from "./mapper.js"; import { emitProgress } from "./progress.js"; -import { providerByName } from "./provider.js"; +import { providerByName, type DroppedFinding } from "./provider.js"; import { buildFixPrompt, buildReviewPromptBundle, buildRevalidatePrompt } from "./prompt.js"; import type { ReviewMode, ReviewPromptManifest } from "./prompt.js"; import { @@ -32,7 +32,7 @@ import { renderFindingDetail, renderReport, } from "./reporting.js"; -import { validateReviewOutput } from "./review-validation.js"; +import { validateReviewOutputPartitioned } from "./review-validation.js"; import { filterFeaturesByChangedFiles, filterFeaturesByProject, @@ -343,6 +343,16 @@ export async function reviewCommand( allowNonPendingFeatureReview: stringFlag(flags, "feature") !== undefined, }); findingIds.push(...reviewed.findingIds); + for (const dropped of reviewed.droppedFindings) { + const code = dropped.layer === "validation" ? "validation-drop" : "schema-drop"; + errors.push({ + message: + `dropped 1 finding from feature ${feature.featureId} ` + + `at ${dropped.path.join(".")}: ${dropped.message}`, + code, + error: null, + }); + } } catch (error: unknown) { errors.push({ message: error instanceof Error ? error.message : String(error), @@ -353,7 +363,10 @@ export async function reviewCommand( } }), ); - if (errors.length > 0) { + const fatalErrors = errors.filter( + (entry) => entry.code !== "schema-drop" && entry.code !== "validation-drop", + ); + if (fatalErrors.length > 0) { await writeRun(loaded.paths, { ...run, status: "failed", @@ -363,15 +376,16 @@ export async function reviewCommand( }); emitProgress(context, "review", "failed", { run: currentRunId, - errors: errors.length, + errors: fatalErrors.length, }); - throw errors[0]?.error ?? new ClawpatchError("review failed", 1, "review-failed"); + throw fatalErrors[0]?.error ?? new ClawpatchError("review failed", 1, "review-failed"); } const finished: RunRecord = { ...run, status: "completed", finishedAt: nowIso(), findingIds, + errors: errors.map(({ message, code }) => ({ message, code })), }; await writeRun(loaded.paths, finished); emitProgress(context, "review", "done", { @@ -638,7 +652,9 @@ type ReviewFeatureOptions = { allowNonPendingFeatureReview: boolean; }; -async function reviewFeature(options: ReviewFeatureOptions): Promise<{ findingIds: string[] }> { +async function reviewFeature( + options: ReviewFeatureOptions, +): Promise<{ findingIds: string[]; droppedFindings: DroppedFinding[] }> { const { context, loaded, @@ -688,21 +704,27 @@ async function reviewFeature(options: ReviewFeatureOptions): Promise<{ findingId index, total, }); + // Layer 1 drops: per-finding schema violations from parseReviewOutput. + const droppedFindings: DroppedFinding[] = [...providerOutput.droppedFindings]; const reviewOutput = { - ...providerOutput, findings: reviewFindingsForMode(providerOutput.findings, mode).slice( 0, config.review.maxFindingsPerFeature, ), + inspected: providerOutput.inspected, }; - const output = await validateReviewOutput( + // Layer 2 drops: per-finding evidence validation (line ranges, quotes, + // included files). Partition so a single bad finding doesn't lose the + // whole feature. + const validated = await validateReviewOutputPartitioned( loaded.root, lockedFeature, config, reviewPrompt.manifest, reviewOutput, ); - const records = output.findings.map((finding) => + droppedFindings.push(...validated.droppedFindings); + const records = validated.findings.map((finding) => findingFromOutput(finding, lockedFeature.featureId, currentRunId), ); const findingIds: string[] = []; @@ -743,7 +765,7 @@ async function reviewFeature(options: ReviewFeatureOptions): Promise<{ findingId findings: findingIds.length, elapsed: `${Math.round((Date.now() - started) / 1000)}s`, }); - return { findingIds }; + return { findingIds, droppedFindings }; } catch (error: unknown) { const message = error instanceof Error ? error.message : String(error); if (locked !== null) { diff --git a/src/provider.test.ts b/src/provider.test.ts index 5be14fc..a6f4610 100644 --- a/src/provider.test.ts +++ b/src/provider.test.ts @@ -19,11 +19,29 @@ const { parseAcpxJsonOutput, parseAcpxAgent, parseCodexJson, + parseReviewOutput, parseOrThrow, piThinkingLevel, providerJsonSchema, } = __testing; +function makeFinding(overrides: Record = {}): Record { + return { + title: "Sample finding", + category: "bug", + severity: "medium", + confidence: "high", + evidence: [], + reasoning: "Sample reasoning.", + reproduction: null, + recommendation: "Sample recommendation.", + whyTestsDoNotAlreadyCoverThis: "Tests do not encode this case.", + suggestedRegressionTest: null, + minimumFixScope: "Touch only the offending line.", + ...overrides, + }; +} + function withEnv(name: string, value: string | undefined, fn: () => void): void { const previous = process.env[name]; if (value === undefined) { @@ -658,6 +676,85 @@ describe("extractOpencodeJson", () => { }); }); +describe("parseReviewOutput", () => { + it("preserves all findings when every finding is valid (fast path)", () => { + const output = { + findings: [ + makeFinding({ title: "first", category: "bug" }), + makeFinding({ title: "second", category: "security" }), + makeFinding({ title: "third", category: "performance" }), + ], + inspected: { files: ["src/a.ts"], symbols: [], notes: [] }, + }; + + const result = parseReviewOutput(output); + + expect(result.findings).toHaveLength(3); + expect(result.findings.map((f) => f.title)).toEqual(["first", "second", "third"]); + expect(result.droppedFindings).toEqual([]); + expect(result.inspected.files).toEqual(["src/a.ts"]); + }); + + it("keeps valid siblings when one finding has an invalid category", () => { + const output = { + findings: [ + makeFinding({ title: "first", category: "bug" }), + makeFinding({ title: "second", category: "security" }), + makeFinding({ title: "third", category: "quality" }), // invalid enum value + makeFinding({ title: "fourth", category: "performance" }), + ], + inspected: { files: [], symbols: [], notes: [] }, + }; + + const result = parseReviewOutput(output); + + expect(result.findings).toHaveLength(3); + expect(result.findings.map((f) => f.title)).toEqual(["first", "second", "fourth"]); + expect(result.droppedFindings).toHaveLength(1); + const dropped = result.droppedFindings[0]!; + expect(dropped.path[0]).toBe("findings"); + expect(dropped.path[1]).toBe(2); + expect(dropped.path).toContain("category"); + expect(dropped.message).toBeTypeOf("string"); + expect(dropped.sample).toContain("quality"); + expect(dropped.sample.length).toBeLessThanOrEqual(200); + }); + + it("throws ClawpatchError when findings is not an array", () => { + const output = { + findings: "not-an-array", + inspected: { files: [], symbols: [], notes: [] }, + }; + + try { + parseReviewOutput(output); + } catch (err) { + expect(err).toBeInstanceOf(ClawpatchError); + expect((err as ClawpatchError).code).toBe("malformed-output"); + expect((err as ClawpatchError).exitCode).toBe(8); + expect((err as Error).message).toMatch(/findings/u); + return; + } + throw new Error("expected parseReviewOutput to throw on non-array findings"); + }); + + it("truncates oversized samples to 200 characters", () => { + const longTitle = "x".repeat(500); + const output = { + findings: [ + makeFinding({ title: longTitle, category: "quality" }), // invalid → dropped + ], + inspected: { files: [], symbols: [], notes: [] }, + }; + + const result = parseReviewOutput(output); + + expect(result.droppedFindings).toHaveLength(1); + expect(result.droppedFindings[0]!.sample.length).toBeLessThanOrEqual(200); + expect(result.droppedFindings[0]!.sample.endsWith("...")).toBe(true); + }); +}); + function makeBadReview(overrides: Record = {}): unknown { return { findings: [ diff --git a/src/provider.ts b/src/provider.ts index 3b4efe6..8692a86 100644 --- a/src/provider.ts +++ b/src/provider.ts @@ -1,7 +1,7 @@ import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import type { ZodError, ZodIssue, ZodType } from "zod"; +import { z, type ZodError, type ZodIssue, type ZodType } from "zod"; import { runCommandArgs } from "./exec.js"; import { ClawpatchError } from "./errors.js"; import { @@ -15,10 +15,12 @@ import { extractJson, parseCodexJson, safeProviderPreview } from "./provider-jso import { AgentMapOutput, FixPlanOutput, - ReviewOutput, + ReviewFinding, RevalidateOutput, agentMapOutputSchema, fixPlanOutputSchema, + reviewFindingSchema, + reviewInspectedSchema, reviewOutputSchema, revalidateOutputSchema, type ReasoningEffort, @@ -149,11 +151,109 @@ export type ProviderOptions = { reasoningEffort: ReasoningEffort | null; skipGitRepoCheck: boolean; }; + +/** + * One review finding rejected by per-finding validation. `layer` records + * which gate dropped it: `schema` is the per-finding `reviewFindingSchema` + * Zod parse, `validation` is the evidence/quote/line-range check in + * `validateReviewOutputPartitioned`. Operators can use `layer` to tell + * "the model emitted nonsense for this finding" (schema) apart from + * "the model cited a real-looking finding but pointed at the wrong file + * or quoted text that isn't there" (validation). + */ +export type DroppedFinding = { + path: (string | number)[]; + message: string; + sample: string; + layer?: "schema" | "validation"; +}; + +export type PartitionedReviewOutput = { + findings: ReviewFinding[]; + inspected: z.infer; + droppedFindings: DroppedFinding[]; +}; + +const reviewContainerSchema = z.object({ + findings: z.array(z.unknown()), + inspected: reviewInspectedSchema, +}); + +function truncateSample(value: unknown): string { + let text: string; + try { + text = JSON.stringify(value); + } catch { + text = String(value); + } + if (text === undefined) { + text = String(value); + } + return text.length > 200 ? `${text.slice(0, 197)}...` : text; +} + +export function parseReviewOutput(output: unknown): PartitionedReviewOutput { + // Fast path: whole-document parse on success. + const whole = reviewOutputSchema.safeParse(output); + if (whole.success) { + return { + findings: whole.data.findings, + inspected: whole.data.inspected, + droppedFindings: [], + }; + } + + // Fallback: validate container shape, partition findings element-by-element. + const container = reviewContainerSchema.safeParse(output); + if (!container.success) { + // Container shape is fundamentally wrong (e.g. findings is not an array, or + // inspected is missing). Preserve today's throw contract via ClawpatchError + // so callers see a single clear malformed-output failure. + const issue = container.error.issues[0]; + const where = + issue?.path + .map((segment) => (typeof segment === "symbol" ? segment.toString() : segment)) + .join(".") ?? ""; + const detail = issue?.message ?? "invalid review output shape"; + throw new ClawpatchError( + `provider review output is malformed at ${where}: ${detail}`, + 8, + "malformed-output", + ); + } + + const validFindings: ReviewFinding[] = []; + const droppedFindings: DroppedFinding[] = []; + container.data.findings.forEach((candidate, idx) => { + const result = reviewFindingSchema.safeParse(candidate); + if (result.success) { + validFindings.push(result.data); + return; + } + const issue = result.error.issues[0]; + const issuePath = (issue?.path ?? []).map((segment) => + typeof segment === "symbol" ? segment.toString() : segment, + ); + droppedFindings.push({ + path: ["findings", idx, ...issuePath], + message: issue?.message ?? "invalid finding shape", + sample: truncateSample(candidate), + layer: "schema", + }); + }); + + return { + findings: validFindings, + inspected: container.data.inspected, + droppedFindings, + }; +} + export type Provider = { name: string; check(root: string): Promise; map(root: string, prompt: string, options: ProviderOptions): Promise; - review(root: string, prompt: string, options: ProviderOptions): Promise; + review(root: string, prompt: string, options: ProviderOptions): Promise; fix(root: string, prompt: string, options: ProviderOptions): Promise; revalidate(root: string, prompt: string, options: ProviderOptions): Promise; }; @@ -196,9 +296,13 @@ const codexProvider: Provider = { const output = await runCodexJson(root, prompt, options, agentMapJsonSchema); return parseOrThrow(agentMapOutputSchema, output, "codex agent-map"); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runCodexJson(root, prompt, options, reviewJsonSchema); - return parseOrThrow(reviewOutputSchema, output, "codex review"); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runCodexJson(root, prompt, options, fixPlanJsonSchema, "workspace-write"); @@ -227,9 +331,13 @@ const opencodeProvider: Provider = { const output = await runOpencodeJson(root, prompt, options.model, agentMapJsonSchema, true); return parseOrThrow(agentMapOutputSchema, output, "opencode agent-map"); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runOpencodeJson(root, prompt, options.model, reviewJsonSchema, true); - return parseOrThrow(reviewOutputSchema, output, "opencode review"); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runOpencodeJson(root, prompt, options.model, fixPlanJsonSchema, false); @@ -267,9 +375,13 @@ const acpxProvider: Provider = { parseOrThrow(agentMapOutputSchema, output, "acpx agent-map"), ); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { return runAcpxJson(root, prompt, options.model, reviewJsonSchema, "read", (output) => - parseOrThrow(reviewOutputSchema, output, "acpx review"), + parseReviewOutput(output), ); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { @@ -301,9 +413,13 @@ const grokProvider: Provider = { const output = await runGrokJson(root, prompt, options.model, agentMapJsonSchema, true); return parseOrThrow(agentMapOutputSchema, output, "grok agent-map"); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runGrokJson(root, prompt, options.model, reviewJsonSchema, true); - return parseOrThrow(reviewOutputSchema, output, "grok review"); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runGrokJson(root, prompt, options.model, fixPlanJsonSchema, false); @@ -334,9 +450,13 @@ const piProvider: Provider = { const output = await runPiJson(root, prompt, options, agentMapJsonSchema, true); return parseOrThrow(agentMapOutputSchema, output, "pi map"); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runPiJson(root, prompt, options, reviewJsonSchema, true); - return parseOrThrow(reviewOutputSchema, output, "pi review"); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runPiJson(root, prompt, options, fixPlanJsonSchema, false); @@ -480,9 +600,13 @@ const mockProvider: Provider = { notes: ["mock agent map"], }; }, - async review(_root: string, prompt: string): Promise { + async review(_root: string, prompt: string): Promise { if (!prompt.includes("TODO_BUG") && !prompt.includes("BUG:")) { - return { findings: [], inspected: { files: [], symbols: [], notes: ["mock clean"] } }; + return { + findings: [], + inspected: { files: [], symbols: [], notes: ["mock clean"] }, + droppedFindings: [], + }; } const evidencePath = prompt.includes("BAD_EVIDENCE") ? "src/not-included.ts" @@ -536,6 +660,7 @@ const mockProvider: Provider = { }, ], inspected: { files: [evidencePath], symbols: [], notes: ["mock mixed findings"] }, + droppedFindings: [], }; } return { @@ -564,6 +689,7 @@ const mockProvider: Provider = { }, ], inspected: { files: [evidencePath], symbols: [], notes: ["mock finding"] }, + droppedFindings: [], }; }, async fix(): Promise { @@ -618,7 +744,7 @@ const mockFailProvider: Provider = { async map(): Promise { throw new ClawpatchError("mock map failure", 1, "mock-failure"); }, - async review(): Promise { + async review(): Promise { throw new ClawpatchError("mock review failure", 1, "mock-failure"); }, async fix(): Promise { @@ -1265,6 +1391,7 @@ export const __testing = { formatZodIssue, parseAcpxAgent, parseCodexJson, + parseReviewOutput, parseOrThrow, piThinkingLevel, providerJsonSchema, diff --git a/src/review-validation.test.ts b/src/review-validation.test.ts index 27db552..48e4c80 100644 --- a/src/review-validation.test.ts +++ b/src/review-validation.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it } from "vitest"; import { defaultConfig } from "./config.js"; import type { ReviewPromptManifest } from "./prompt.js"; -import { validateReviewOutput } from "./review-validation.js"; +import { validateReviewOutput, validateReviewOutputPartitioned } from "./review-validation.js"; import { fixtureRoot, writeFixture } from "./test-helpers.js"; import type { FeatureRecord, ReviewOutput } from "./types.js"; @@ -120,6 +120,108 @@ describe("validateReviewOutput", () => { }); }); +describe("validateReviewOutputPartitioned", () => { + it("returns all findings when every finding's evidence is valid", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-ok-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + output("src/index.ts"), + ); + + expect(result.findings).toHaveLength(1); + expect(result.findings[0]!.title).toBe("Bug"); + expect(result.droppedFindings).toEqual([]); + }); + + it("keeps valid siblings when one finding has a stale quote", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-quote-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + + const goodFinding = output("src/index.ts").findings[0]!; + const badFinding: ReviewOutput["findings"][number] = { + ...goodFinding, + title: "Stale quote", + evidence: [ + { + path: "src/index.ts", + startLine: 1, + endLine: 1, + symbol: null, + quote: "this text does not exist", + }, + ], + }; + const provider: ReviewOutput = { + findings: [goodFinding, badFinding, { ...goodFinding, title: "Also good" }], + inspected: { files: ["src/index.ts"], symbols: [], notes: [] }, + }; + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + provider, + ); + + expect(result.findings.map((f) => f.title)).toEqual(["Bug", "Also good"]); + expect(result.droppedFindings).toHaveLength(1); + const dropped = result.droppedFindings[0]!; + expect(dropped.path).toEqual(["findings", 1]); + expect(dropped.layer).toBe("validation"); + expect(dropped.message).toMatch(/quote/iu); + expect(dropped.sample.length).toBeLessThanOrEqual(200); + }); + + it("drops findings whose evidence file was not included in the prompt", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-included-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + await writeFixture(root, "src/other.ts", "const value = 'TODO_BUG';\n"); + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + output("src/other.ts"), + ); + + expect(result.findings).toEqual([]); + expect(result.droppedFindings).toHaveLength(1); + expect(result.droppedFindings[0]!.layer).toBe("validation"); + expect(result.droppedFindings[0]!.message).toMatch(/not included/iu); + }); + + it("drops findings with empty evidence arrays without throwing", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-empty-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + + const goodFinding = output("src/index.ts").findings[0]!; + const provider: ReviewOutput = { + findings: [{ ...goodFinding, title: "No evidence", evidence: [] }, goodFinding], + inspected: { files: ["src/index.ts"], symbols: [], notes: [] }, + }; + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + provider, + ); + + expect(result.findings.map((f) => f.title)).toEqual(["Bug"]); + expect(result.droppedFindings).toHaveLength(1); + expect(result.droppedFindings[0]!.path).toEqual(["findings", 0]); + expect(result.droppedFindings[0]!.message).toMatch(/no evidence/iu); + }); +}); + function feature(path: string): FeatureRecord { return { schemaVersion: 1, diff --git a/src/review-validation.ts b/src/review-validation.ts index 7566a70..aa20d61 100644 --- a/src/review-validation.ts +++ b/src/review-validation.ts @@ -2,6 +2,7 @@ import { readFile, realpath } from "node:fs/promises"; import { isAbsolute, relative, resolve } from "node:path"; import { ClawpatchError } from "./errors.js"; import { REVIEW_PROMPT_FILE_CHAR_LIMIT, type ReviewPromptManifest } from "./prompt.js"; +import type { DroppedFinding } from "./provider.js"; import { ClawpatchConfig, FeatureRecord, ReviewOutput } from "./types.js"; export async function validateReviewOutput( @@ -18,21 +19,88 @@ export async function validateReviewOutput( const cache = new Map>(); const findings = output.findings; for (const finding of findings) { - if (finding.evidence.length === 0) { - throwMalformed(`finding "${finding.title}" has no evidence`); - } - for (const evidence of finding.evidence) { - assertIncludedPath(evidence.path, included, "evidence file"); - const promptFile = promptFiles.get(normalizePath(evidence.path)); - if (promptFile === undefined || !promptFile.readable) { - throwMalformed(`evidence file was not readable in review context: ${evidence.path}`); + await validateFinding(root, finding, included, promptFiles, cache); + } + return { ...output, findings }; +} + +/** + * Same evidence validation as {@link validateReviewOutput}, but runs + * per-finding and partitions failures instead of throwing on the first + * bad finding. Callers receive only the validated findings plus a list + * of {@link DroppedFinding}s describing why each rejection occurred. The + * caller is then free to surface drops as non-fatal run errors instead + * of losing the whole feature. + */ +export async function validateReviewOutputPartitioned( + root: string, + feature: FeatureRecord, + config: ClawpatchConfig, + manifest: ReviewPromptManifest, + output: ReviewOutput, +): Promise<{ findings: ReviewOutput["findings"]; droppedFindings: DroppedFinding[] }> { + const included = includedReviewPaths(feature, config); + const promptFiles = new Map( + manifest.includedFiles.map((file) => [normalizePath(file.path), file]), + ); + const cache = new Map>(); + const validFindings: ReviewOutput["findings"] = []; + const droppedFindings: DroppedFinding[] = []; + output.findings.forEach(() => undefined); + for (let idx = 0; idx < output.findings.length; idx += 1) { + const finding = output.findings[idx]!; + try { + await validateFinding(root, finding, included, promptFiles, cache); + validFindings.push(finding); + } catch (error: unknown) { + if (error instanceof ClawpatchError && error.code === "malformed-output") { + droppedFindings.push({ + path: ["findings", idx], + message: error.message.replace(/^malformed provider review output:\s*/u, ""), + sample: truncateValidationSample(finding), + layer: "validation", + }); + continue; } - const contents = await fileContents(root, evidence.path, promptFile.truncated, cache); - assertLineRange(contents, evidence); - assertQuote(contents, evidence); + throw error; } } - return { ...output, findings }; + return { findings: validFindings, droppedFindings }; +} + +async function validateFinding( + root: string, + finding: ReviewOutput["findings"][number], + included: ReadonlySet, + promptFiles: Map, + cache: Map>, +): Promise { + if (finding.evidence.length === 0) { + throwMalformed(`finding "${finding.title}" has no evidence`); + } + for (const evidence of finding.evidence) { + assertIncludedPath(evidence.path, included, "evidence file"); + const promptFile = promptFiles.get(normalizePath(evidence.path)); + if (promptFile === undefined || !promptFile.readable) { + throwMalformed(`evidence file was not readable in review context: ${evidence.path}`); + } + const contents = await fileContents(root, evidence.path, promptFile.truncated, cache); + assertLineRange(contents, evidence); + assertQuote(contents, evidence); + } +} + +function truncateValidationSample(finding: ReviewOutput["findings"][number]): string { + let text: string; + try { + text = JSON.stringify(finding); + } catch { + text = String(finding); + } + if (text === undefined) { + text = String(finding); + } + return text.length > 200 ? `${text.slice(0, 197)}...` : text; } function includedReviewPaths(feature: FeatureRecord, config: ClawpatchConfig): Set { diff --git a/src/types.ts b/src/types.ts index 70f7a78..d21df35 100644 --- a/src/types.ts +++ b/src/types.ts @@ -357,33 +357,39 @@ export const agentMapOutputSchema = z.object({ export type AgentMapOutput = z.infer; +export const reviewFindingSchema = z.object({ + title: z.string(), + category: z.enum(findingCategories), + severity: z.enum(["critical", "high", "medium", "low"]), + confidence: z.enum(["high", "medium", "low"]), + evidence: z.array(evidenceRefSchema), + reasoning: z.string(), + reproduction: z + .string() + .nullish() + .transform((v) => v ?? null), + recommendation: z.string(), + whyTestsDoNotAlreadyCoverThis: z.string(), + suggestedRegressionTest: z.string().nullable(), + minimumFixScope: z + .string() + .nullish() + .transform((v) => v ?? ""), +}); + +export type ReviewFinding = z.infer; + +export const reviewInspectedSchema = z.object({ + files: z.array(z.string()), + symbols: z.array(z.string()), + notes: z.array(z.string()), +}); + +export type ReviewInspected = z.infer; + export const reviewOutputSchema = z.object({ - findings: z.array( - z.object({ - title: z.string(), - category: z.enum(findingCategories), - severity: z.enum(["critical", "high", "medium", "low"]), - confidence: z.enum(["high", "medium", "low"]), - evidence: z.array(evidenceRefSchema), - reasoning: z.string(), - reproduction: z - .string() - .nullish() - .transform((v) => v ?? null), - recommendation: z.string(), - whyTestsDoNotAlreadyCoverThis: z.string(), - suggestedRegressionTest: z.string().nullable(), - minimumFixScope: z - .string() - .nullish() - .transform((v) => v ?? ""), - }), - ), - inspected: z.object({ - files: z.array(z.string()), - symbols: z.array(z.string()), - notes: z.array(z.string()), - }), + findings: z.array(reviewFindingSchema), + inspected: reviewInspectedSchema, }); export type ReviewOutput = z.infer;