From 5f83cd2a5deba755334ad9782fa90b7dfddb7dbf Mon Sep 17 00:00:00 2001 From: Cole Tebou Date: Sun, 17 May 2026 20:29:02 +0000 Subject: [PATCH 1/2] fix(provider): partition invalid review findings instead of discarding the whole feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a single finding in a provider review response had a `category` outside the `findingCategories` enum (e.g. a hallucinated "quality" label), `reviewOutputSchema.parse(output)` in each provider's `review()` threw and `reviewFeature`'s outer try/catch marked the whole feature errored. All sibling findings on that feature were lost. Six features in run 20260517T184420-d52a72 lost every finding this way. This change introduces `parseReviewOutput(output)` in `src/provider.ts` which: 1. Tries `reviewOutputSchema.safeParse` on the whole document (fast path; common case). 2. On per-element failure, validates the container shape loosely (findings: unknown[], inspected strict) and partitions the `findings` array element-by-element using the new `reviewFindingSchema`. Successes go through; failures are recorded as `{path, message, sample}` entries in `droppedFindings`. 3. Throws `ClawpatchError("malformed-output")` only when the container shape itself is invalid (e.g. `findings` is not an array) — preserving today's fail-loud contract for genuinely malformed provider output. `reviewFindingSchema` and `reviewInspectedSchema` are now exported from `src/types.ts` so the partitioner can reuse them; the composed `reviewOutputSchema` keeps its existing shape (no breaking change to consumers). The four provider `review()` methods (codex, opencode, acpx, grok) now call `parseReviewOutput` instead of `reviewOutputSchema.parse`. The provider `Provider.review` return type is now `PartitionedReviewOutput`, which is `ReviewOutput` plus an additive `droppedFindings` field. `reviewFeature` in `src/app.ts` threads `droppedFindings` back to the review-command worker loop, which appends each as a non-fatal `{message, code: "schema-drop"}` entry on the run's `errors[]`. The run-failure threshold is now gated on `errors.some(e => e.code !== "schema-drop")` so successful runs with dropped findings still complete (and surface the drops in `run.errors[]` for triage) instead of being silently flipped to "failed". Tests: - parseReviewOutput preserves all findings when every category is valid (fast path) - parseReviewOutput keeps valid siblings when one finding has an invalid category - parseReviewOutput throws ClawpatchError when findings is not an array - parseReviewOutput truncates oversized samples to 200 characters --- CHANGELOG.md | 1 + src/app.ts | 28 ++++++--- src/provider.test.ts | 97 ++++++++++++++++++++++++++++ src/provider.ts | 147 ++++++++++++++++++++++++++++++++++++++----- src/types.ts | 46 ++++++++------ 5 files changed, 277 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 570182c..02206b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Added `clawpatch open-pr --patch ` to turn an applied patch attempt into an explicit GitHub pull request. - Added review prompt provenance and budget accounting for included files, omitted files, prompt bytes, and approximate tokens. - Hardened review ingestion so provider findings must cite included files with valid line ranges and matching evidence quotes. +- Fixed provider review to preserve valid findings when a sibling finding fails per-finding schema validation. Previously a single bad enum value caused the whole feature to lose every finding via a strict `reviewOutputSchema.parse` throw. Dropped findings are now recorded in `run.errors` with `code: "schema-drop"` and do not fail the run. - Fixed `clawpatch open-pr` so repositories without default-branch metadata use a dedicated patch branch and let GitHub choose the PR base. - Fixed `clawpatch open-pr` retries to push the recorded patch commit instead of any later local branch tip. - Fixed first-time `clawpatch open-pr` branch creation to start from the recorded patch base. diff --git a/src/app.ts b/src/app.ts index 05be074..44278a1 100644 --- a/src/app.ts +++ b/src/app.ts @@ -22,7 +22,7 @@ import { stableId, runId } from "./id.js"; import { mapWithSource } from "./agent-mapper.js"; import { mapFeatures } from "./mapper.js"; import { emitProgress } from "./progress.js"; -import { providerByName } from "./provider.js"; +import { providerByName, type DroppedFinding } from "./provider.js"; import { buildFixPrompt, buildReviewPromptBundle, buildRevalidatePrompt } from "./prompt.js"; import type { ReviewMode, ReviewPromptManifest } from "./prompt.js"; import { @@ -343,6 +343,15 @@ export async function reviewCommand( allowNonPendingFeatureReview: stringFlag(flags, "feature") !== undefined, }); findingIds.push(...reviewed.findingIds); + for (const dropped of reviewed.droppedFindings) { + errors.push({ + message: + `dropped 1 finding from feature ${feature.featureId} ` + + `at ${dropped.path.join(".")}: ${dropped.message}`, + code: "schema-drop", + error: null, + }); + } } catch (error: unknown) { errors.push({ message: error instanceof Error ? error.message : String(error), @@ -353,7 +362,8 @@ export async function reviewCommand( } }), ); - if (errors.length > 0) { + const fatalErrors = errors.filter((entry) => entry.code !== "schema-drop"); + if (fatalErrors.length > 0) { await writeRun(loaded.paths, { ...run, status: "failed", @@ -363,15 +373,16 @@ export async function reviewCommand( }); emitProgress(context, "review", "failed", { run: currentRunId, - errors: errors.length, + errors: fatalErrors.length, }); - throw errors[0]?.error ?? new ClawpatchError("review failed", 1, "review-failed"); + throw fatalErrors[0]?.error ?? new ClawpatchError("review failed", 1, "review-failed"); } const finished: RunRecord = { ...run, status: "completed", finishedAt: nowIso(), findingIds, + errors: errors.map(({ message, code }) => ({ message, code })), }; await writeRun(loaded.paths, finished); emitProgress(context, "review", "done", { @@ -635,7 +646,9 @@ type ReviewFeatureOptions = { allowNonPendingFeatureReview: boolean; }; -async function reviewFeature(options: ReviewFeatureOptions): Promise<{ findingIds: string[] }> { +async function reviewFeature( + options: ReviewFeatureOptions, +): Promise<{ findingIds: string[]; droppedFindings: DroppedFinding[] }> { const { context, loaded, @@ -680,12 +693,13 @@ async function reviewFeature(options: ReviewFeatureOptions): Promise<{ findingId reviewPrompt.prompt, providerOptions(config), ); + const droppedFindings: DroppedFinding[] = providerOutput.droppedFindings; const reviewOutput = { - ...providerOutput, findings: reviewFindingsForMode(providerOutput.findings, mode).slice( 0, config.review.maxFindingsPerFeature, ), + inspected: providerOutput.inspected, }; const output = await validateReviewOutput( loaded.root, @@ -735,7 +749,7 @@ async function reviewFeature(options: ReviewFeatureOptions): Promise<{ findingId findings: findingIds.length, elapsed: `${Math.round((Date.now() - started) / 1000)}s`, }); - return { findingIds }; + return { findingIds, droppedFindings }; } catch (error: unknown) { const message = error instanceof Error ? error.message : String(error); if (locked !== null) { diff --git a/src/provider.test.ts b/src/provider.test.ts index 239185e..fb960ae 100644 --- a/src/provider.test.ts +++ b/src/provider.test.ts @@ -14,10 +14,28 @@ const { extractOpencodeJson, parseAcpxAgent, parseCodexJson, + parseReviewOutput, piThinkingLevel, providerJsonSchema, } = __testing; +function makeFinding(overrides: Record = {}): Record { + return { + title: "Sample finding", + category: "bug", + severity: "medium", + confidence: "high", + evidence: [], + reasoning: "Sample reasoning.", + reproduction: null, + recommendation: "Sample recommendation.", + whyTestsDoNotAlreadyCoverThis: "Tests do not encode this case.", + suggestedRegressionTest: null, + minimumFixScope: "Touch only the offending line.", + ...overrides, + }; +} + function updateEnvelope(update: object): string { return JSON.stringify({ jsonrpc: "2.0", @@ -555,6 +573,85 @@ describe("extractOpencodeJson", () => { }); }); +describe("parseReviewOutput", () => { + it("preserves all findings when every finding is valid (fast path)", () => { + const output = { + findings: [ + makeFinding({ title: "first", category: "bug" }), + makeFinding({ title: "second", category: "security" }), + makeFinding({ title: "third", category: "performance" }), + ], + inspected: { files: ["src/a.ts"], symbols: [], notes: [] }, + }; + + const result = parseReviewOutput(output); + + expect(result.findings).toHaveLength(3); + expect(result.findings.map((f) => f.title)).toEqual(["first", "second", "third"]); + expect(result.droppedFindings).toEqual([]); + expect(result.inspected.files).toEqual(["src/a.ts"]); + }); + + it("keeps valid siblings when one finding has an invalid category", () => { + const output = { + findings: [ + makeFinding({ title: "first", category: "bug" }), + makeFinding({ title: "second", category: "security" }), + makeFinding({ title: "third", category: "quality" }), // invalid enum value + makeFinding({ title: "fourth", category: "performance" }), + ], + inspected: { files: [], symbols: [], notes: [] }, + }; + + const result = parseReviewOutput(output); + + expect(result.findings).toHaveLength(3); + expect(result.findings.map((f) => f.title)).toEqual(["first", "second", "fourth"]); + expect(result.droppedFindings).toHaveLength(1); + const dropped = result.droppedFindings[0]!; + expect(dropped.path[0]).toBe("findings"); + expect(dropped.path[1]).toBe(2); + expect(dropped.path).toContain("category"); + expect(dropped.message).toBeTypeOf("string"); + expect(dropped.sample).toContain("quality"); + expect(dropped.sample.length).toBeLessThanOrEqual(200); + }); + + it("throws ClawpatchError when findings is not an array", () => { + const output = { + findings: "not-an-array", + inspected: { files: [], symbols: [], notes: [] }, + }; + + try { + parseReviewOutput(output); + } catch (err) { + expect(err).toBeInstanceOf(ClawpatchError); + expect((err as ClawpatchError).code).toBe("malformed-output"); + expect((err as ClawpatchError).exitCode).toBe(8); + expect((err as Error).message).toMatch(/findings/u); + return; + } + throw new Error("expected parseReviewOutput to throw on non-array findings"); + }); + + it("truncates oversized samples to 200 characters", () => { + const longTitle = "x".repeat(500); + const output = { + findings: [ + makeFinding({ title: longTitle, category: "quality" }), // invalid → dropped + ], + inspected: { files: [], symbols: [], notes: [] }, + }; + + const result = parseReviewOutput(output); + + expect(result.droppedFindings).toHaveLength(1); + expect(result.droppedFindings[0]!.sample.length).toBeLessThanOrEqual(200); + expect(result.droppedFindings[0]!.sample.endsWith("...")).toBe(true); + }); +}); + describe("providerByName", () => { it("returns provider instances for optional CLI-backed providers", () => { expect(providerByName("acpx").name).toBe("acpx"); diff --git a/src/provider.ts b/src/provider.ts index 3bc387d..a8cd0b9 100644 --- a/src/provider.ts +++ b/src/provider.ts @@ -1,6 +1,7 @@ import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; +import { z } from "zod"; import { runCommandArgs } from "./exec.js"; import { ClawpatchError } from "./errors.js"; import { @@ -14,10 +15,12 @@ import { extractJson, parseCodexJson, safeProviderPreview } from "./provider-jso import { AgentMapOutput, FixPlanOutput, - ReviewOutput, + ReviewFinding, RevalidateOutput, agentMapOutputSchema, fixPlanOutputSchema, + reviewFindingSchema, + reviewInspectedSchema, reviewOutputSchema, revalidateOutputSchema, type ReasoningEffort, @@ -30,11 +33,98 @@ export type ProviderOptions = { reasoningEffort: ReasoningEffort | null; skipGitRepoCheck: boolean; }; + +export type DroppedFinding = { + path: (string | number)[]; + message: string; + sample: string; +}; + +export type PartitionedReviewOutput = { + findings: ReviewFinding[]; + inspected: z.infer; + droppedFindings: DroppedFinding[]; +}; + +const reviewContainerSchema = z.object({ + findings: z.array(z.unknown()), + inspected: reviewInspectedSchema, +}); + +function truncateSample(value: unknown): string { + let text: string; + try { + text = JSON.stringify(value); + } catch { + text = String(value); + } + if (text === undefined) { + text = String(value); + } + return text.length > 200 ? `${text.slice(0, 197)}...` : text; +} + +export function parseReviewOutput(output: unknown): PartitionedReviewOutput { + // Fast path: whole-document parse on success. + const whole = reviewOutputSchema.safeParse(output); + if (whole.success) { + return { + findings: whole.data.findings, + inspected: whole.data.inspected, + droppedFindings: [], + }; + } + + // Fallback: validate container shape, partition findings element-by-element. + const container = reviewContainerSchema.safeParse(output); + if (!container.success) { + // Container shape is fundamentally wrong (e.g. findings is not an array, or + // inspected is missing). Preserve today's throw contract via ClawpatchError + // so callers see a single clear malformed-output failure. + const issue = container.error.issues[0]; + const where = + issue?.path + .map((segment) => (typeof segment === "symbol" ? segment.toString() : segment)) + .join(".") ?? ""; + const detail = issue?.message ?? "invalid review output shape"; + throw new ClawpatchError( + `provider review output is malformed at ${where}: ${detail}`, + 8, + "malformed-output", + ); + } + + const validFindings: ReviewFinding[] = []; + const droppedFindings: DroppedFinding[] = []; + container.data.findings.forEach((candidate, idx) => { + const result = reviewFindingSchema.safeParse(candidate); + if (result.success) { + validFindings.push(result.data); + return; + } + const issue = result.error.issues[0]; + const issuePath = (issue?.path ?? []).map((segment) => + typeof segment === "symbol" ? segment.toString() : segment, + ); + droppedFindings.push({ + path: ["findings", idx, ...issuePath], + message: issue?.message ?? "invalid finding shape", + sample: truncateSample(candidate), + }); + }); + + return { + findings: validFindings, + inspected: container.data.inspected, + droppedFindings, + }; +} + export type Provider = { name: string; check(root: string): Promise; map(root: string, prompt: string, options: ProviderOptions): Promise; - review(root: string, prompt: string, options: ProviderOptions): Promise; + review(root: string, prompt: string, options: ProviderOptions): Promise; fix(root: string, prompt: string, options: ProviderOptions): Promise; revalidate(root: string, prompt: string, options: ProviderOptions): Promise; }; @@ -77,9 +167,13 @@ const codexProvider: Provider = { const output = await runCodexJson(root, prompt, options, agentMapJsonSchema); return agentMapOutputSchema.parse(output); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runCodexJson(root, prompt, options, reviewJsonSchema); - return reviewOutputSchema.parse(output); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runCodexJson(root, prompt, options, fixPlanJsonSchema, "workspace-write"); @@ -108,9 +202,13 @@ const opencodeProvider: Provider = { const output = await runOpencodeJson(root, prompt, options.model, agentMapJsonSchema, true); return agentMapOutputSchema.parse(output); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runOpencodeJson(root, prompt, options.model, reviewJsonSchema, true); - return reviewOutputSchema.parse(output); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runOpencodeJson(root, prompt, options.model, fixPlanJsonSchema, false); @@ -147,9 +245,13 @@ const acpxProvider: Provider = { const output = await runAcpxJson(root, prompt, options.model, agentMapJsonSchema, "read"); return agentMapOutputSchema.parse(output); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runAcpxJson(root, prompt, options.model, reviewJsonSchema, "read"); - return reviewOutputSchema.parse(output); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runAcpxJson(root, prompt, options.model, fixPlanJsonSchema, "approve"); @@ -178,9 +280,13 @@ const grokProvider: Provider = { const output = await runGrokJson(root, prompt, options.model, agentMapJsonSchema, true); return agentMapOutputSchema.parse(output); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runGrokJson(root, prompt, options.model, reviewJsonSchema, true); - return reviewOutputSchema.parse(output); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runGrokJson(root, prompt, options.model, fixPlanJsonSchema, false); @@ -211,9 +317,13 @@ const piProvider: Provider = { const output = await runPiJson(root, prompt, options, agentMapJsonSchema, true); return agentMapOutputSchema.parse(output); }, - async review(root: string, prompt: string, options: ProviderOptions): Promise { + async review( + root: string, + prompt: string, + options: ProviderOptions, + ): Promise { const output = await runPiJson(root, prompt, options, reviewJsonSchema, true); - return reviewOutputSchema.parse(output); + return parseReviewOutput(output); }, async fix(root: string, prompt: string, options: ProviderOptions): Promise { const output = await runPiJson(root, prompt, options, fixPlanJsonSchema, false); @@ -357,9 +467,13 @@ const mockProvider: Provider = { notes: ["mock agent map"], }; }, - async review(_root: string, prompt: string): Promise { + async review(_root: string, prompt: string): Promise { if (!prompt.includes("TODO_BUG") && !prompt.includes("BUG:")) { - return { findings: [], inspected: { files: [], symbols: [], notes: ["mock clean"] } }; + return { + findings: [], + inspected: { files: [], symbols: [], notes: ["mock clean"] }, + droppedFindings: [], + }; } const evidencePath = prompt.includes("BAD_EVIDENCE") ? "src/not-included.ts" @@ -413,6 +527,7 @@ const mockProvider: Provider = { }, ], inspected: { files: [evidencePath], symbols: [], notes: ["mock mixed findings"] }, + droppedFindings: [], }; } return { @@ -441,6 +556,7 @@ const mockProvider: Provider = { }, ], inspected: { files: [evidencePath], symbols: [], notes: ["mock finding"] }, + droppedFindings: [], }; }, async fix(): Promise { @@ -495,7 +611,7 @@ const mockFailProvider: Provider = { async map(): Promise { throw new ClawpatchError("mock map failure", 1, "mock-failure"); }, - async review(): Promise { + async review(): Promise { throw new ClawpatchError("mock review failure", 1, "mock-failure"); }, async fix(): Promise { @@ -1072,6 +1188,7 @@ export const __testing = { extractOpencodeJson, parseAcpxAgent, parseCodexJson, + parseReviewOutput, piThinkingLevel, providerJsonSchema, }; diff --git a/src/types.ts b/src/types.ts index 4c958c0..611d33f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -349,27 +349,33 @@ export const agentMapOutputSchema = z.object({ export type AgentMapOutput = z.infer; +export const reviewFindingSchema = z.object({ + title: z.string(), + category: z.enum(findingCategories), + severity: z.enum(["critical", "high", "medium", "low"]), + confidence: z.enum(["high", "medium", "low"]), + evidence: z.array(evidenceRefSchema), + reasoning: z.string(), + reproduction: z.string().nullable(), + recommendation: z.string(), + whyTestsDoNotAlreadyCoverThis: z.string(), + suggestedRegressionTest: z.string().nullable(), + minimumFixScope: z.string(), +}); + +export type ReviewFinding = z.infer; + +export const reviewInspectedSchema = z.object({ + files: z.array(z.string()), + symbols: z.array(z.string()), + notes: z.array(z.string()), +}); + +export type ReviewInspected = z.infer; + export const reviewOutputSchema = z.object({ - findings: z.array( - z.object({ - title: z.string(), - category: z.enum(findingCategories), - severity: z.enum(["critical", "high", "medium", "low"]), - confidence: z.enum(["high", "medium", "low"]), - evidence: z.array(evidenceRefSchema), - reasoning: z.string(), - reproduction: z.string().nullable(), - recommendation: z.string(), - whyTestsDoNotAlreadyCoverThis: z.string(), - suggestedRegressionTest: z.string().nullable(), - minimumFixScope: z.string(), - }), - ), - inspected: z.object({ - files: z.array(z.string()), - symbols: z.array(z.string()), - notes: z.array(z.string()), - }), + findings: z.array(reviewFindingSchema), + inspected: reviewInspectedSchema, }); export type ReviewOutput = z.infer; From d981d6751f7ba8cd960f071ddcb27436acc8677f Mon Sep 17 00:00:00 2001 From: Cole Tebou Date: Mon, 18 May 2026 14:29:30 +0000 Subject: [PATCH 2/2] feat(review): partition validation drops without failing the feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `validateReviewOutput` (added in 0.3.0) throws `ClawpatchError("malformed-output")` on the FIRST evidence violation it finds — line range out of bounds, quote not present, evidence file not in the prompt's included files. The outer try/catch in `reviewFeature` then loses every sibling finding on the feature and marks the whole feature errored. Same problem the schema-layer partition (parseReviewOutput) addresses, just one layer later. This commit extends the partition pattern: - Add `validateReviewOutputPartitioned` in `src/review-validation.ts`. It runs the same per-evidence checks (now factored into a private `validateFinding` helper that both APIs call) but catches malformed-output ClawpatchErrors per finding and accumulates them into a `droppedFindings` array. The original `validateReviewOutput` keeps its throw-on-first-failure contract for any caller that wants it. - Add a `layer?: "schema" | "validation"` discriminator to `DroppedFinding` so operators can tell the two failure modes apart in `run.errors`. parseReviewOutput stamps `"schema"`; the validation partition stamps `"validation"`. - Wire reviewFeature in `src/app.ts` to call validateReviewOutputPartitioned and concatenate its drops onto the schema-layer drops already plumbed through. The orchestrator now emits `code: "validation-drop"` (distinct from `"schema-drop"`) for the new bucket and the fatal-errors filter excludes both. Behavior change for callers: the `Provider.review` return type and the schema-layer partition are unchanged. Adding new error codes is additive — anyone filtering on `"schema-drop"` keeps working but now silently misses validation drops. Update the filter to also exclude `"validation-drop"` to recover the prior "non-fatal drops" semantics. Tests: four new cases in `src/review-validation.test.ts` covering the all-clean, partial-drop, included-paths, and empty-evidence flows. --- CHANGELOG.md | 1 + src/app.ts | 20 +++++-- src/provider.ts | 11 ++++ src/review-validation.test.ts | 104 +++++++++++++++++++++++++++++++++- src/review-validation.ts | 92 ++++++++++++++++++++++++++---- 5 files changed, 209 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02206b9..1ce6ad8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Added review prompt provenance and budget accounting for included files, omitted files, prompt bytes, and approximate tokens. - Hardened review ingestion so provider findings must cite included files with valid line ranges and matching evidence quotes. - Fixed provider review to preserve valid findings when a sibling finding fails per-finding schema validation. Previously a single bad enum value caused the whole feature to lose every finding via a strict `reviewOutputSchema.parse` throw. Dropped findings are now recorded in `run.errors` with `code: "schema-drop"` and do not fail the run. +- Fixed review evidence validation to drop only the offending finding when a quote, line range, or evidence file is rejected by `validateReviewOutput`, instead of failing the whole feature. Dropped findings are recorded in `run.errors` with `code: "validation-drop"` and do not fail the run. - Fixed `clawpatch open-pr` so repositories without default-branch metadata use a dedicated patch branch and let GitHub choose the PR base. - Fixed `clawpatch open-pr` retries to push the recorded patch commit instead of any later local branch tip. - Fixed first-time `clawpatch open-pr` branch creation to start from the recorded patch base. diff --git a/src/app.ts b/src/app.ts index 44278a1..bcabbd4 100644 --- a/src/app.ts +++ b/src/app.ts @@ -32,7 +32,7 @@ import { renderFindingDetail, renderReport, } from "./reporting.js"; -import { validateReviewOutput } from "./review-validation.js"; +import { validateReviewOutputPartitioned } from "./review-validation.js"; import { filterFeaturesByChangedFiles, filterFeaturesByProject, @@ -344,11 +344,12 @@ export async function reviewCommand( }); findingIds.push(...reviewed.findingIds); for (const dropped of reviewed.droppedFindings) { + const code = dropped.layer === "validation" ? "validation-drop" : "schema-drop"; errors.push({ message: `dropped 1 finding from feature ${feature.featureId} ` + `at ${dropped.path.join(".")}: ${dropped.message}`, - code: "schema-drop", + code, error: null, }); } @@ -362,7 +363,9 @@ export async function reviewCommand( } }), ); - const fatalErrors = errors.filter((entry) => entry.code !== "schema-drop"); + const fatalErrors = errors.filter( + (entry) => entry.code !== "schema-drop" && entry.code !== "validation-drop", + ); if (fatalErrors.length > 0) { await writeRun(loaded.paths, { ...run, @@ -693,7 +696,8 @@ async function reviewFeature( reviewPrompt.prompt, providerOptions(config), ); - const droppedFindings: DroppedFinding[] = providerOutput.droppedFindings; + // Layer 1 drops: per-finding schema violations from parseReviewOutput. + const droppedFindings: DroppedFinding[] = [...providerOutput.droppedFindings]; const reviewOutput = { findings: reviewFindingsForMode(providerOutput.findings, mode).slice( 0, @@ -701,14 +705,18 @@ async function reviewFeature( ), inspected: providerOutput.inspected, }; - const output = await validateReviewOutput( + // Layer 2 drops: per-finding evidence validation (line ranges, quotes, + // included files). Partition so a single bad finding doesn't lose the + // whole feature. + const validated = await validateReviewOutputPartitioned( loaded.root, lockedFeature, config, reviewPrompt.manifest, reviewOutput, ); - const records = output.findings.map((finding) => + droppedFindings.push(...validated.droppedFindings); + const records = validated.findings.map((finding) => findingFromOutput(finding, lockedFeature.featureId, currentRunId), ); const findingIds: string[] = []; diff --git a/src/provider.ts b/src/provider.ts index a8cd0b9..158ee61 100644 --- a/src/provider.ts +++ b/src/provider.ts @@ -34,10 +34,20 @@ export type ProviderOptions = { skipGitRepoCheck: boolean; }; +/** + * One review finding rejected by per-finding validation. `layer` records + * which gate dropped it: `schema` is the per-finding `reviewFindingSchema` + * Zod parse, `validation` is the evidence/quote/line-range check in + * `validateReviewOutputPartitioned`. Operators can use `layer` to tell + * "the model emitted nonsense for this finding" (schema) apart from + * "the model cited a real-looking finding but pointed at the wrong file + * or quoted text that isn't there" (validation). + */ export type DroppedFinding = { path: (string | number)[]; message: string; sample: string; + layer?: "schema" | "validation"; }; export type PartitionedReviewOutput = { @@ -110,6 +120,7 @@ export function parseReviewOutput(output: unknown): PartitionedReviewOutput { path: ["findings", idx, ...issuePath], message: issue?.message ?? "invalid finding shape", sample: truncateSample(candidate), + layer: "schema", }); }); diff --git a/src/review-validation.test.ts b/src/review-validation.test.ts index 27db552..48e4c80 100644 --- a/src/review-validation.test.ts +++ b/src/review-validation.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it } from "vitest"; import { defaultConfig } from "./config.js"; import type { ReviewPromptManifest } from "./prompt.js"; -import { validateReviewOutput } from "./review-validation.js"; +import { validateReviewOutput, validateReviewOutputPartitioned } from "./review-validation.js"; import { fixtureRoot, writeFixture } from "./test-helpers.js"; import type { FeatureRecord, ReviewOutput } from "./types.js"; @@ -120,6 +120,108 @@ describe("validateReviewOutput", () => { }); }); +describe("validateReviewOutputPartitioned", () => { + it("returns all findings when every finding's evidence is valid", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-ok-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + output("src/index.ts"), + ); + + expect(result.findings).toHaveLength(1); + expect(result.findings[0]!.title).toBe("Bug"); + expect(result.droppedFindings).toEqual([]); + }); + + it("keeps valid siblings when one finding has a stale quote", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-quote-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + + const goodFinding = output("src/index.ts").findings[0]!; + const badFinding: ReviewOutput["findings"][number] = { + ...goodFinding, + title: "Stale quote", + evidence: [ + { + path: "src/index.ts", + startLine: 1, + endLine: 1, + symbol: null, + quote: "this text does not exist", + }, + ], + }; + const provider: ReviewOutput = { + findings: [goodFinding, badFinding, { ...goodFinding, title: "Also good" }], + inspected: { files: ["src/index.ts"], symbols: [], notes: [] }, + }; + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + provider, + ); + + expect(result.findings.map((f) => f.title)).toEqual(["Bug", "Also good"]); + expect(result.droppedFindings).toHaveLength(1); + const dropped = result.droppedFindings[0]!; + expect(dropped.path).toEqual(["findings", 1]); + expect(dropped.layer).toBe("validation"); + expect(dropped.message).toMatch(/quote/iu); + expect(dropped.sample.length).toBeLessThanOrEqual(200); + }); + + it("drops findings whose evidence file was not included in the prompt", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-included-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + await writeFixture(root, "src/other.ts", "const value = 'TODO_BUG';\n"); + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + output("src/other.ts"), + ); + + expect(result.findings).toEqual([]); + expect(result.droppedFindings).toHaveLength(1); + expect(result.droppedFindings[0]!.layer).toBe("validation"); + expect(result.droppedFindings[0]!.message).toMatch(/not included/iu); + }); + + it("drops findings with empty evidence arrays without throwing", async () => { + const root = await fixtureRoot("clawpatch-review-validation-partition-empty-"); + await writeFixture(root, "src/index.ts", "const value = 'TODO_BUG';\n"); + + const goodFinding = output("src/index.ts").findings[0]!; + const provider: ReviewOutput = { + findings: [{ ...goodFinding, title: "No evidence", evidence: [] }, goodFinding], + inspected: { files: ["src/index.ts"], symbols: [], notes: [] }, + }; + + const result = await validateReviewOutputPartitioned( + root, + feature("src/index.ts"), + defaultConfig(), + manifest("src/index.ts"), + provider, + ); + + expect(result.findings.map((f) => f.title)).toEqual(["Bug"]); + expect(result.droppedFindings).toHaveLength(1); + expect(result.droppedFindings[0]!.path).toEqual(["findings", 0]); + expect(result.droppedFindings[0]!.message).toMatch(/no evidence/iu); + }); +}); + function feature(path: string): FeatureRecord { return { schemaVersion: 1, diff --git a/src/review-validation.ts b/src/review-validation.ts index 7566a70..aa20d61 100644 --- a/src/review-validation.ts +++ b/src/review-validation.ts @@ -2,6 +2,7 @@ import { readFile, realpath } from "node:fs/promises"; import { isAbsolute, relative, resolve } from "node:path"; import { ClawpatchError } from "./errors.js"; import { REVIEW_PROMPT_FILE_CHAR_LIMIT, type ReviewPromptManifest } from "./prompt.js"; +import type { DroppedFinding } from "./provider.js"; import { ClawpatchConfig, FeatureRecord, ReviewOutput } from "./types.js"; export async function validateReviewOutput( @@ -18,21 +19,88 @@ export async function validateReviewOutput( const cache = new Map>(); const findings = output.findings; for (const finding of findings) { - if (finding.evidence.length === 0) { - throwMalformed(`finding "${finding.title}" has no evidence`); - } - for (const evidence of finding.evidence) { - assertIncludedPath(evidence.path, included, "evidence file"); - const promptFile = promptFiles.get(normalizePath(evidence.path)); - if (promptFile === undefined || !promptFile.readable) { - throwMalformed(`evidence file was not readable in review context: ${evidence.path}`); + await validateFinding(root, finding, included, promptFiles, cache); + } + return { ...output, findings }; +} + +/** + * Same evidence validation as {@link validateReviewOutput}, but runs + * per-finding and partitions failures instead of throwing on the first + * bad finding. Callers receive only the validated findings plus a list + * of {@link DroppedFinding}s describing why each rejection occurred. The + * caller is then free to surface drops as non-fatal run errors instead + * of losing the whole feature. + */ +export async function validateReviewOutputPartitioned( + root: string, + feature: FeatureRecord, + config: ClawpatchConfig, + manifest: ReviewPromptManifest, + output: ReviewOutput, +): Promise<{ findings: ReviewOutput["findings"]; droppedFindings: DroppedFinding[] }> { + const included = includedReviewPaths(feature, config); + const promptFiles = new Map( + manifest.includedFiles.map((file) => [normalizePath(file.path), file]), + ); + const cache = new Map>(); + const validFindings: ReviewOutput["findings"] = []; + const droppedFindings: DroppedFinding[] = []; + output.findings.forEach(() => undefined); + for (let idx = 0; idx < output.findings.length; idx += 1) { + const finding = output.findings[idx]!; + try { + await validateFinding(root, finding, included, promptFiles, cache); + validFindings.push(finding); + } catch (error: unknown) { + if (error instanceof ClawpatchError && error.code === "malformed-output") { + droppedFindings.push({ + path: ["findings", idx], + message: error.message.replace(/^malformed provider review output:\s*/u, ""), + sample: truncateValidationSample(finding), + layer: "validation", + }); + continue; } - const contents = await fileContents(root, evidence.path, promptFile.truncated, cache); - assertLineRange(contents, evidence); - assertQuote(contents, evidence); + throw error; } } - return { ...output, findings }; + return { findings: validFindings, droppedFindings }; +} + +async function validateFinding( + root: string, + finding: ReviewOutput["findings"][number], + included: ReadonlySet, + promptFiles: Map, + cache: Map>, +): Promise { + if (finding.evidence.length === 0) { + throwMalformed(`finding "${finding.title}" has no evidence`); + } + for (const evidence of finding.evidence) { + assertIncludedPath(evidence.path, included, "evidence file"); + const promptFile = promptFiles.get(normalizePath(evidence.path)); + if (promptFile === undefined || !promptFile.readable) { + throwMalformed(`evidence file was not readable in review context: ${evidence.path}`); + } + const contents = await fileContents(root, evidence.path, promptFile.truncated, cache); + assertLineRange(contents, evidence); + assertQuote(contents, evidence); + } +} + +function truncateValidationSample(finding: ReviewOutput["findings"][number]): string { + let text: string; + try { + text = JSON.stringify(finding); + } catch { + text = String(finding); + } + if (text === undefined) { + text = String(finding); + } + return text.length > 200 ? `${text.slice(0, 197)}...` : text; } function includedReviewPaths(feature: FeatureRecord, config: ClawpatchConfig): Set {