diff --git a/src/action/triggers/executor.ts b/src/action/triggers/executor.ts index ed4d8b6d..ca7ff482 100644 --- a/src/action/triggers/executor.ts +++ b/src/action/triggers/executor.ts @@ -54,6 +54,8 @@ export interface TriggerExecutorDeps { globalRequestChanges?: boolean; /** Global fail-check from action inputs (trigger-specific takes precedence) */ globalFailCheck?: boolean; + /** Prior-phase skill reports, injected into prompts for second-pass skills */ + priorReports?: SkillReport[]; } /** @@ -134,6 +136,8 @@ export async function executeTrigger( maxTurns: trigger.maxTurns, batchDelayMs: config.defaults?.batchDelayMs, pathToClaudeCodeExecutable: claudePath, + priorReports: deps.priorReports, + scope: trigger.scope, }, }; diff --git a/src/action/workflow/pr-workflow.ts b/src/action/workflow/pr-workflow.ts index 0a5e774a..ef915b86 100644 --- a/src/action/workflow/pr-workflow.ts +++ b/src/action/workflow/pr-workflow.ts @@ -18,6 +18,7 @@ import type { ExistingComment } from '../../output/dedup.js'; import { buildAnalyzedScope, findStaleComments, resolveStaleComments } from '../../output/stale.js'; import type { EventContext, SkillReport, Finding } from '../../types/index.js'; import { processInBatches } from '../../utils/index.js'; +import { groupByPhase } from '../../pipeline/index.js'; import { evaluateFixAttempts, postThreadReply } from '../fix-evaluation/index.js'; import type { FixEvaluation } from '../fix-evaluation/index.js'; import { logAction, warnAction } from '../../cli/output/tty.js'; @@ -229,7 +230,9 @@ async function setupGitHubState( } /** - * Run all matched triggers in parallel batches. + * Run all matched triggers, grouped by phase. + * Within each phase triggers run in parallel batches. + * Prior-phase reports are threaded forward to later phases. */ async function executeAllTriggers( matchedTriggers: ResolvedTrigger[], @@ -241,23 +244,36 @@ async function executeAllTriggers( const concurrency = config.runner?.concurrency ?? inputs.parallel; const claudePath = await findClaudeCodeExecutable(); - return processInBatches( - matchedTriggers, - (trigger) => - executeTrigger(trigger, { - octokit, - context, - config, - anthropicApiKey: inputs.anthropicApiKey, - claudePath, - globalFailOn: inputs.failOn, - globalReportOn: inputs.reportOn, - globalMaxFindings: inputs.maxFindings, - globalRequestChanges: inputs.requestChanges, - globalFailCheck: inputs.failCheck, - }), - concurrency - ); + const byPhase = groupByPhase(matchedTriggers); + const allResults: TriggerResult[] = []; + const priorReports: SkillReport[] = []; + + for (const [, phaseTriggers] of byPhase) { + const results = await processInBatches( + phaseTriggers, + (trigger) => + executeTrigger(trigger, { + octokit, + context, + config, + anthropicApiKey: inputs.anthropicApiKey, + claudePath, + globalFailOn: inputs.failOn, + globalReportOn: inputs.reportOn, + globalMaxFindings: inputs.maxFindings, + globalRequestChanges: inputs.requestChanges, + globalFailCheck: inputs.failCheck, + priorReports: priorReports.length > 0 ? priorReports : undefined, + }), + concurrency + ); + + const reports = results.flatMap((r) => r.report ? [r.report] : []); + priorReports.push(...reports); + allResults.push(...results); + } + + return allResults; } /** diff --git a/src/cli/main.ts b/src/cli/main.ts index ad8fd600..e249e5db 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -3,7 +3,7 @@ import { dirname, join, resolve } from 'node:path'; import { config as dotenvConfig } from 'dotenv'; import { Sentry, flushSentry } from '../sentry.js'; import { loadWardenConfig, resolveSkillConfigs } from '../config/loader.js'; -import type { SkillRunnerOptions } from '../sdk/runner.js'; +import { groupByPhase } from '../pipeline/index.js'; import { resolveSkillAsync } from '../skills/loader.js'; import { matchTrigger, filterContextByPaths, shouldFail, countFindingsAtOrAbove } from '../triggers/matcher.js'; import type { SkillReport } from '../types/index.js'; @@ -90,6 +90,8 @@ interface SkillToRun { skill: string; remote?: string; filters: { paths?: string[]; ignorePaths?: string[] }; + phase?: number; + scope?: 'diff' | 'report'; } interface ProcessedResults { @@ -253,7 +255,7 @@ async function runSkills( seen.add(t.skill); return true; }) - .map((t) => ({ skill: t.skill, remote: t.remote, filters: t.filters })); + .map((t) => ({ skill: t.skill, remote: t.remote, filters: t.filters, phase: t.phase, scope: t.scope })); } else { skillsToRun = []; } @@ -269,41 +271,54 @@ async function runSkills( return 0; } - // Build skill tasks + // Build skill tasks grouped by phase // Model precedence: defaults.model > CLI flag > WARDEN_MODEL env var > SDK default const model = config?.defaults?.model ?? options.model ?? process.env['WARDEN_MODEL']; - const runnerOptions: SkillRunnerOptions = { - apiKey, - model, - abortController, - maxTurns: config?.defaults?.maxTurns, - batchDelayMs: config?.defaults?.batchDelayMs, - }; - const tasks: SkillTaskOptions[] = skillsToRun.map(({ skill, remote, filters }) => ({ - name: skill, - failOn: options.failOn, - resolveSkill: () => resolveSkillAsync(skill, repoPath, { - remote, - offline: options.offline, - }), - context: filterContextByPaths(context, filters), - runnerOptions, - })); - - // Run skills with Ink UI (TTY) or simple console output (non-TTY) const concurrency = options.parallel ?? DEFAULT_CONCURRENCY; const taskOptions = { mode: reporter.mode, verbosity: reporter.verbosity, concurrency, }; - const results = reporter.mode.isTTY - ? await runSkillTasksWithInk(tasks, taskOptions) - : await runSkillTasks(tasks, taskOptions); + + // Group skills by phase + const byPhase = groupByPhase(skillsToRun); + + const allResults: Awaited> = []; + const priorReports: SkillReport[] = []; + + for (const [, phaseSkills] of byPhase) { + const tasks: SkillTaskOptions[] = phaseSkills.map(({ skill, remote, filters, scope }) => ({ + name: skill, + failOn: options.failOn, + resolveSkill: () => resolveSkillAsync(skill, repoPath, { + remote, + offline: options.offline, + }), + context: filterContextByPaths(context, filters), + runnerOptions: { + apiKey, + model, + abortController, + maxTurns: config?.defaults?.maxTurns, + batchDelayMs: config?.defaults?.batchDelayMs, + priorReports: priorReports.length > 0 ? priorReports : undefined, + scope, + }, + })); + + const results = reporter.mode.isTTY + ? await runSkillTasksWithInk(tasks, taskOptions) + : await runSkillTasks(tasks, taskOptions); + + const reports = results.flatMap((r) => r.report ? [r.report] : []); + priorReports.push(...reports); + allResults.push(...results); + } // Process results and output const totalDuration = Date.now() - startTime; - const processed = processTaskResults(results, options.reportOn); + const processed = processTaskResults(allResults, options.reportOn); return outputResultsAndHandleFixes(processed, options, reporter, repoPath ?? cwd, totalDuration); } @@ -515,38 +530,51 @@ async function runConfigMode(options: CLIOptions, reporter: Reporter): Promise ({ - name: trigger.name, - displayName: trigger.skill, - failOn: trigger.failOn ?? options.failOn, - resolveSkill: () => resolveSkillAsync(trigger.skill, repoPath, { - remote: trigger.remote, - offline: options.offline, - }), - context: filterContextByPaths(context, trigger.filters), - runnerOptions: { - apiKey, - model: trigger.model, - abortController, - maxTurns: trigger.maxTurns, - }, - })); - - // Run triggers with Ink UI (TTY) or simple console output (non-TTY) + // Group triggers by phase and run each phase sequentially + const byPhase = groupByPhase(triggersToRun); const concurrency = options.parallel ?? config.runner?.concurrency ?? DEFAULT_CONCURRENCY; const taskOptions = { mode: reporter.mode, verbosity: reporter.verbosity, concurrency, }; - const results = reporter.mode.isTTY - ? await runSkillTasksWithInk(tasks, taskOptions) - : await runSkillTasks(tasks, taskOptions); + + const allResults: Awaited> = []; + const priorReports: SkillReport[] = []; + + for (const [, phaseTriggers] of byPhase) { + const tasks: SkillTaskOptions[] = phaseTriggers.map((trigger) => ({ + name: trigger.name, + displayName: trigger.skill, + failOn: trigger.failOn ?? options.failOn, + resolveSkill: () => resolveSkillAsync(trigger.skill, repoPath, { + remote: trigger.remote, + offline: options.offline, + }), + context: filterContextByPaths(context, trigger.filters), + runnerOptions: { + apiKey, + model: trigger.model, + abortController, + maxTurns: trigger.maxTurns, + priorReports: priorReports.length > 0 ? priorReports : undefined, + scope: trigger.scope, + }, + })); + + const results = reporter.mode.isTTY + ? await runSkillTasksWithInk(tasks, taskOptions) + : await runSkillTasks(tasks, taskOptions); + + // Collect reports for next phase + const reports = results.flatMap((r) => r.report ? [r.report] : []); + priorReports.push(...reports); + allResults.push(...results); + } // Process results and output const totalDuration = Date.now() - startTime; - const processed = processTaskResults(results, options.reportOn); + const processed = processTaskResults(allResults, options.reportOn); return outputResultsAndHandleFixes(processed, options, reporter, repoPath, totalDuration); } diff --git a/src/cli/output/tasks.ts b/src/cli/output/tasks.ts index b9dc8516..fbc41a28 100644 --- a/src/cli/output/tasks.ts +++ b/src/cli/output/tasks.ts @@ -9,6 +9,7 @@ import { Sentry, emitSkillMetrics, emitDedupMetrics, logger } from '../../sentry import { prepareFiles, analyzeFile, + analyzeReport, aggregateUsage, aggregateAuxiliaryUsage, deduplicateFindings, @@ -176,6 +177,53 @@ export async function runSkillTask( // Resolve the skill const skill = await resolveSkill(); + // Build PR context for inclusion in prompts (if available) + const prContext: PRPromptContext | undefined = context.pullRequest + ? { + changedFiles: context.pullRequest.files.map((f) => f.filename), + title: context.pullRequest.title, + body: context.pullRequest.body, + } + : undefined; + + // Report-scoped: single SDK call on prior findings, skip per-file loop + if (runnerOptions.scope === 'report') { + callbacks.onSkillStart({ + name, + displayName, + status: 'running', + startTime, + files: [], + findings: [], + }); + + const result = await analyzeReport(skill, context.repoPath, runnerOptions, prContext); + const uniqueFindings = deduplicateFindings(result.findings); + const duration = Date.now() - startTime; + + const report: SkillReport = { + skill: skill.name, + summary: generateSummary(skill.name, uniqueFindings, skill.description), + findings: uniqueFindings, + usage: result.usage, + durationMs: duration, + }; + if (result.auxiliaryUsage) { + report.auxiliaryUsage = aggregateAuxiliaryUsage(result.auxiliaryUsage); + } + + emitSkillMetrics(report); + callbacks.onSkillUpdate(name, { + status: 'done', + durationMs: duration, + findings: uniqueFindings, + usage: report.usage, + }); + callbacks.onSkillComplete(name, report); + + return { name, report, failOn }; + } + // Prepare files (parse patches into hunks) const { files: preparedFiles, skippedFiles } = prepareFiles(context, { contextLines: runnerOptions.contextLines, @@ -216,15 +264,6 @@ export async function runSkillTask( findings: [], }); - // Build PR context for inclusion in prompts (if available) - const prContext: PRPromptContext | undefined = context.pullRequest - ? { - changedFiles: context.pullRequest.files.map((f) => f.filename), - title: context.pullRequest.title, - body: context.pullRequest.body, - } - : undefined; - // Process files with concurrency const processFile = async (prepared: PreparedFile, index: number): Promise => { const filename = prepared.filename; @@ -334,7 +373,7 @@ export async function runSkillTask( const report: SkillReport = { skill: skill.name, - summary: generateSummary(skill.name, uniqueFindings), + summary: generateSummary(skill.name, uniqueFindings, skill.description), findings: uniqueFindings, usage: aggregateUsage(allUsage), durationMs: duration, diff --git a/src/config/loader.test.ts b/src/config/loader.test.ts index c6bd4305..5fcbfe69 100644 --- a/src/config/loader.test.ts +++ b/src/config/loader.test.ts @@ -739,3 +739,108 @@ describe('defaults.ignorePaths config', () => { expect(result.data?.defaults?.ignorePaths).toEqual(['dist/**', 'node_modules/**']); }); }); + +describe('scope config', () => { + it('accepts scope = "diff" in skill', () => { + const config = { + version: 1, + skills: [{ name: 'test', scope: 'diff' }], + }; + + const result = WardenConfigSchema.safeParse(config); + expect(result.success).toBe(true); + expect(result.data?.skills[0]?.scope).toBe('diff'); + }); + + it('accepts scope = "report" in skill', () => { + const config = { + version: 1, + skills: [{ name: 'test', scope: 'report' }], + }; + + const result = WardenConfigSchema.safeParse(config); + expect(result.success).toBe(true); + expect(result.data?.skills[0]?.scope).toBe('report'); + }); + + it('defaults scope to "diff"', () => { + const config = { + version: 1, + skills: [{ name: 'test' }], + }; + + const result = WardenConfigSchema.safeParse(config); + expect(result.success).toBe(true); + expect(result.data?.skills[0]?.scope).toBe('diff'); + }); + + it('rejects invalid scope values', () => { + const config = { + version: 1, + skills: [{ name: 'test', scope: 'invalid' }], + }; + + const result = WardenConfigSchema.safeParse(config); + expect(result.success).toBe(false); + }); + + it('report-scoped triggers get forced to last phase (Infinity)', () => { + const config: WardenConfig = { + version: 1, + skills: [ + { name: 'diff-skill', phase: 1 }, + { name: 'report-skill', scope: 'report', phase: 1 }, + ], + }; + + const resolved = resolveSkillConfigs(config); + const diffSkill = resolved.find((t) => t.name === 'diff-skill'); + const reportSkill = resolved.find((t) => t.name === 'report-skill'); + + expect(diffSkill?.phase).toBe(1); + expect(reportSkill?.phase).toBe(Infinity); + expect(reportSkill?.scope).toBe('report'); + }); + + it('report-scoped triggers override explicit phase', () => { + const config: WardenConfig = { + version: 1, + skills: [ + { name: 'report-skill', scope: 'report', phase: 2 }, + ], + }; + + const [resolved] = resolveSkillConfigs(config); + expect(resolved?.phase).toBe(Infinity); + }); + + it('diff-scoped triggers preserve their phase', () => { + const config: WardenConfig = { + version: 1, + skills: [ + { name: 'diff-skill', scope: 'diff', phase: 3 }, + ], + }; + + const [resolved] = resolveSkillConfigs(config); + expect(resolved?.phase).toBe(3); + expect(resolved?.scope).toBeUndefined(); + }); + + it('scope is threaded through trigger entries', () => { + const config: WardenConfig = { + version: 1, + skills: [{ + name: 'report-skill', + scope: 'report', + triggers: [ + { type: 'pull_request', actions: ['opened'] }, + ], + }], + }; + + const [resolved] = resolveSkillConfigs(config); + expect(resolved?.scope).toBe('report'); + expect(resolved?.phase).toBe(Infinity); + }); +}); diff --git a/src/config/loader.ts b/src/config/loader.ts index ff0b260c..42356b7d 100644 --- a/src/config/loader.ts +++ b/src/config/loader.ts @@ -98,6 +98,10 @@ export interface ResolvedTrigger { maxTurns?: number; /** Schedule-specific configuration */ schedule?: ScheduleConfig; + /** Execution phase (default: 1). Phase-2 skills receive phase-1 findings in their prompt. */ + phase?: number; + /** Execution scope: 'diff' (default) analyzes hunks, 'report' analyzes prior findings as a whole. */ + scope?: 'diff' | 'report'; } /** @@ -148,6 +152,10 @@ export function resolveSkillConfigs( ignorePaths: mergedIgnorePaths.length > 0 ? mergedIgnorePaths : undefined, }; + // Report-scoped skills always run after all diff-scoped skills + const scope = skill.scope === 'report' ? 'report' : undefined; + const effectivePhase = scope === 'report' ? Infinity : skill.phase; + if (!skill.triggers || skill.triggers.length === 0) { // Wildcard: no triggers means run everywhere result.push({ @@ -164,6 +172,8 @@ export function resolveSkillConfigs( failCheck: skill.failCheck ?? defaults?.failCheck, model: baseModel, maxTurns: skill.maxTurns ?? defaults?.maxTurns, + phase: effectivePhase, + scope, }); } else { for (const trigger of skill.triggers) { @@ -184,6 +194,8 @@ export function resolveSkillConfigs( model: emptyToUndefined(trigger.model) ?? baseModel, maxTurns: trigger.maxTurns ?? skill.maxTurns ?? defaults?.maxTurns, schedule: trigger.schedule, + phase: effectivePhase, + scope, }); } } diff --git a/src/config/schema.ts b/src/config/schema.ts index 4b1c9ae3..39a01b0e 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -91,6 +91,10 @@ export const SkillConfigSchema = z.object({ ignorePaths: z.array(z.string()).optional(), /** Remote repository reference for the skill (e.g., "owner/repo" or "owner/repo@sha") */ remote: z.string().optional(), + /** Execution phase (default: 1). Phase-2 skills receive phase-1 findings in their prompt. */ + phase: z.number().int().positive().default(1).optional(), + /** Execution scope: 'diff' (default) analyzes hunks, 'report' analyzes prior findings as a whole. */ + scope: z.enum(['diff', 'report']).default('diff').optional(), // Flattened output fields (skill-level defaults) failOn: SeverityThresholdSchema.optional(), reportOn: SeverityThresholdSchema.optional(), diff --git a/src/output/renderer.ts b/src/output/renderer.ts index 2031b553..1a709c9d 100644 --- a/src/output/renderer.ts +++ b/src/output/renderer.ts @@ -55,7 +55,7 @@ function renderReview( if (findingsWithoutLocation.length > 0) { return { event, - body: renderFindingsBody(findingsWithoutLocation, report.skill), + body: renderFindingsBody(findingsWithoutLocation, report.skill, report.summary), comments: [], }; } @@ -105,7 +105,7 @@ function renderReview( // Include locationless findings in the review body when mixed with inline comments const body = findingsWithoutLocation.length > 0 - ? renderFindingsBody(findingsWithoutLocation, report.skill) + ? renderFindingsBody(findingsWithoutLocation, report.skill, report.summary) : ''; return { @@ -261,8 +261,14 @@ function renderFindingItem(finding: Finding): string { } /** Render findings as markdown for inclusion in a review body. */ -export function renderFindingsBody(findings: Finding[], skill: string): string { +export function renderFindingsBody(findings: Finding[], skill: string, summary?: string): string { const lines: string[] = []; + if (summary) { + lines.push(`### ${skill}`); + lines.push(''); + lines.push(summary); + lines.push(''); + } for (const finding of findings) { const emoji = SEVERITY_EMOJI[finding.severity]; const location = finding.location diff --git a/src/pipeline/index.ts b/src/pipeline/index.ts new file mode 100644 index 00000000..592c4e7b --- /dev/null +++ b/src/pipeline/index.ts @@ -0,0 +1 @@ +export { groupByPhase } from './phase.js'; diff --git a/src/pipeline/phase.test.ts b/src/pipeline/phase.test.ts new file mode 100644 index 00000000..dd9c7182 --- /dev/null +++ b/src/pipeline/phase.test.ts @@ -0,0 +1,53 @@ +import { describe, it, expect } from 'vitest'; +import { groupByPhase } from './phase.js'; +import type { ResolvedTrigger } from '../config/loader.js'; + +/** Minimal trigger stub for testing. */ +function makeTrigger(name: string, phase?: number): ResolvedTrigger { + return { + name, + skill: name, + type: '*', + filters: {}, + phase, + }; +} + +describe('groupByPhase', () => { + it('defaults triggers with no phase to 1', () => { + const triggers = [makeTrigger('a'), makeTrigger('b')]; + const result = groupByPhase(triggers); + + expect([...result.keys()]).toEqual([1]); + expect(result.get(1)).toHaveLength(2); + }); + + it('groups mixed phases correctly and sorts ascending', () => { + const triggers = [ + makeTrigger('p2-a', 2), + makeTrigger('p1-a', 1), + makeTrigger('p2-b', 2), + makeTrigger('p3', 3), + makeTrigger('default'), + ]; + const result = groupByPhase(triggers); + + expect([...result.keys()]).toEqual([1, 2, 3]); + expect(result.get(1)!.map((t) => t.name)).toEqual(['p1-a', 'default']); + expect(result.get(2)!.map((t) => t.name)).toEqual(['p2-a', 'p2-b']); + expect(result.get(3)!.map((t) => t.name)).toEqual(['p3']); + }); + + it('returns a single group for single-phase triggers', () => { + const triggers = [makeTrigger('a', 1), makeTrigger('b', 1)]; + const result = groupByPhase(triggers); + + expect([...result.keys()]).toEqual([1]); + expect(result.get(1)).toHaveLength(2); + }); + + it('handles empty input', () => { + const result = groupByPhase([]); + expect(result.size).toBe(0); + }); +}); diff --git a/src/pipeline/phase.ts b/src/pipeline/phase.ts new file mode 100644 index 00000000..4424794c --- /dev/null +++ b/src/pipeline/phase.ts @@ -0,0 +1,26 @@ +/** + * Phase grouping utility for multi-pass skill execution. + * + * Groups items by their `phase` field (defaulting to 1) + * and returns them sorted by phase ascending. + */ + +/** + * Group items by execution phase, sorted ascending. + * Items without a phase default to phase 1. + */ +export function groupByPhase(items: T[]): Map { + const grouped = new Map(); + + for (const item of items) { + const phase = item.phase ?? 1; + const existing = grouped.get(phase); + if (existing) { + existing.push(item); + } else { + grouped.set(phase, [item]); + } + } + + return new Map([...grouped.entries()].sort(([a], [b]) => a - b)); +} diff --git a/src/sdk/analyze.ts b/src/sdk/analyze.ts index 8e0ce0b3..3acefeb8 100644 --- a/src/sdk/analyze.ts +++ b/src/sdk/analyze.ts @@ -6,7 +6,7 @@ import { Sentry, emitExtractionMetrics, emitRetryMetric, emitDedupMetrics } from import { SkillRunnerError, WardenAuthenticationError, isRetryableError, isAuthenticationError, isAuthenticationErrorMessage } from './errors.js'; import { DEFAULT_RETRY_CONFIG, calculateRetryDelay, sleep } from './retry.js'; import { extractUsage, aggregateUsage, emptyUsage, estimateTokens, aggregateAuxiliaryUsage } from './usage.js'; -import { buildHunkSystemPrompt, buildHunkUserPrompt, type PRPromptContext } from './prompt.js'; +import { buildHunkSystemPrompt, buildHunkUserPrompt, buildReportUserPrompt, type PRPromptContext, type PriorFindingsContext } from './prompt.js'; import { extractFindingsJson, extractFindingsWithLLM, validateFindings, deduplicateFindings } from './extract.js'; import { LARGE_PROMPT_THRESHOLD_CHARS, @@ -340,7 +340,10 @@ async function analyzeHunk( const { apiKey, abortController, retry } = options; const systemPrompt = buildHunkSystemPrompt(skill); - const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext); + const priorFindings: PriorFindingsContext | undefined = options.priorReports?.length + ? { reports: options.priorReports } + : undefined; + const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext, priorFindings); // Report prompt size information const systemChars = systemPrompt.length; @@ -627,10 +630,77 @@ export async function analyzeFile( ); } +/** + * Analyze prior findings as a whole (report-scoped skill). + * Makes a single SDK call instead of per-hunk iteration. + */ +export async function analyzeReport( + skill: SkillDefinition, + repoPath: string, + options: SkillRunnerOptions, + prContext?: PRPromptContext +): Promise<{ findings: Finding[]; usage: UsageStats; failed: boolean; auxiliaryUsage?: AuxiliaryUsageEntry[] }> { + return Sentry.startSpan( + { + op: 'skill.analyze_report', + name: `analyze report ${skill.name}`, + attributes: { 'skill.name': skill.name, 'skill.scope': 'report' }, + }, + async (span) => { + const { apiKey } = options; + + const systemPrompt = buildHunkSystemPrompt(skill); + const priorFindings: PriorFindingsContext | undefined = options.priorReports?.length + ? { reports: options.priorReports } + : undefined; + const userPrompt = buildReportUserPrompt(skill, prContext, priorFindings); + + try { + const { result: resultMessage, authError } = await executeQuery(systemPrompt, userPrompt, repoPath, options, skill.name); + + if (authError) { + throw new WardenAuthenticationError(authError); + } + + if (!resultMessage) { + return { findings: [], usage: emptyUsage(), failed: true }; + } + + const usage = extractUsage(resultMessage); + + const isError = resultMessage.is_error || resultMessage.subtype !== 'success'; + if (isError) { + return { findings: [], usage, failed: true }; + } + + // Report-scoped findings don't have a single filename context. + // Pass empty string — parseHunkOutput validates/fills location from the finding itself. + const parseResult = await parseHunkOutput(resultMessage, '', apiKey); + + span.setAttribute('finding.count', parseResult.findings.length); + + return { + findings: parseResult.findings, + usage, + failed: false, + auxiliaryUsage: parseResult.extractionUsage + ? [{ agent: 'extraction', usage: parseResult.extractionUsage }] + : undefined, + }; + } catch (error) { + if (error instanceof WardenAuthenticationError) throw error; + if (isAuthenticationError(error)) throw new WardenAuthenticationError(); + console.error(`Report analysis failed: ${error instanceof Error ? error.message : String(error)}`); + return { findings: [], usage: emptyUsage(), failed: true }; + } + }, + ); +} + /** * Generate a summary of findings. */ -export function generateSummary(skillName: string, findings: Finding[]): string { +export function generateSummary(skillName: string, findings: Finding[], skillDescription?: string): string { if (findings.length === 0) { return `${skillName}: No issues found`; } @@ -647,7 +717,11 @@ export function generateSummary(skillName: string, findings: Finding[]): string if (counts['low']) parts.push(`${counts['low']} low`); if (counts['info']) parts.push(`${counts['info']} info`); - return `${skillName}: Found ${findings.length} issue${findings.length === 1 ? '' : 's'} (${parts.join(', ')})`; + const stats = `Found ${findings.length} issue${findings.length === 1 ? '' : 's'} (${parts.join(', ')})`; + if (skillDescription) { + return `${skillDescription}\n\n${stats}`; + } + return `${skillName}: ${stats}`; } /** @@ -665,6 +739,25 @@ export async function runSkill( throw new SkillRunnerError('Pull request context required for skill execution'); } + // Report-scoped: single SDK call on prior findings, skip per-file loop + if (options.scope === 'report') { + const prContext: PRPromptContext = { + changedFiles: context.pullRequest.files.map((f) => f.filename), + title: context.pullRequest.title, + body: context.pullRequest.body, + }; + const result = await analyzeReport(skill, context.repoPath, options, prContext); + const uniqueFindings = deduplicateFindings(result.findings); + return { + skill: skill.name, + summary: generateSummary(skill.name, uniqueFindings, skill.description), + findings: uniqueFindings, + usage: result.usage, + durationMs: Date.now() - startTime, + ...(result.auxiliaryUsage && { auxiliaryUsage: aggregateAuxiliaryUsage(result.auxiliaryUsage) }), + }; + } + const { files: fileHunks, skippedFiles } = prepareFiles(context, { contextLines: options.contextLines, // Note: chunking config should come from the caller (e.g., from warden.toml defaults) @@ -819,8 +912,8 @@ export async function runSkill( const uniqueFindings = deduplicateFindings(allFindings); emitDedupMetrics(allFindings.length, uniqueFindings.length); - // Generate summary - const summary = generateSummary(skill.name, uniqueFindings); + // Generate summary, including skill description when findings exist + const summary = generateSummary(skill.name, uniqueFindings, skill.description); // Aggregate usage across all hunks const totalUsage = aggregateUsage(allUsage); diff --git a/src/sdk/prompt.test.ts b/src/sdk/prompt.test.ts new file mode 100644 index 00000000..af6452d5 --- /dev/null +++ b/src/sdk/prompt.test.ts @@ -0,0 +1,270 @@ +import { describe, it, expect } from 'vitest'; +import { buildHunkUserPrompt, buildReportUserPrompt, serializeAllPriorFindings, type PriorFindingsContext, type PRPromptContext } from './prompt.js'; +import type { SkillDefinition } from '../config/schema.js'; +import type { HunkWithContext } from '../diff/index.js'; +import type { SkillReport } from '../types/index.js'; + +/** Minimal skill stub. */ +const skill: SkillDefinition = { + name: 'test-skill', + description: 'Test skill', + prompt: 'Check for issues', +}; + +/** Minimal hunk stub. */ +function makeHunk(filename: string): HunkWithContext { + return { + filename, + hunk: { + oldStart: 1, + oldCount: 3, + newStart: 1, + newCount: 5, + content: '@@ -1,3 +1,5 @@\n line1\n+line2\n+line3\n line4\n line5', + lines: [' line1', '+line2', '+line3', ' line4', ' line5'], + }, + contextBefore: [], + contextAfter: [], + contextStartLine: 1, + language: 'typescript', + }; +} + +/** Minimal skill report stub. */ +function makeReport(skillName: string, findings: SkillReport['findings']): SkillReport { + return { + skill: skillName, + summary: `${skillName}: ${findings.length} findings`, + findings, + }; +} + +describe('buildHunkUserPrompt', () => { + it('returns unchanged output when no prior findings provided', () => { + const hunk = makeHunk('src/foo.ts'); + const result = buildHunkUserPrompt(skill, hunk); + expect(result).not.toContain('Prior Findings'); + expect(result).toContain('test-skill'); + }); + + it('includes Prior Findings section for matching file', () => { + const hunk = makeHunk('src/foo.ts'); + const priorFindings: PriorFindingsContext = { + reports: [ + makeReport('security-scan', [ + { + id: 'sec-1', + severity: 'high', + title: 'SQL Injection', + description: 'Unsanitized input in query', + location: { path: 'src/foo.ts', startLine: 10 }, + }, + ]), + ], + }; + + const result = buildHunkUserPrompt(skill, hunk, undefined, priorFindings); + expect(result).toContain('## Prior Findings'); + expect(result).toContain('security-scan'); + expect(result).toContain('[high] SQL Injection'); + expect(result).toContain('Unsanitized input in query'); + }); + + it('omits Prior Findings section when findings are for a different file', () => { + const hunk = makeHunk('src/foo.ts'); + const priorFindings: PriorFindingsContext = { + reports: [ + makeReport('security-scan', [ + { + id: 'sec-1', + severity: 'high', + title: 'SQL Injection', + description: 'Unsanitized input', + location: { path: 'src/bar.ts', startLine: 10 }, + }, + ]), + ], + }; + + const result = buildHunkUserPrompt(skill, hunk, undefined, priorFindings); + expect(result).not.toContain('Prior Findings'); + }); + + it('includes suggestedFix in serialized findings', () => { + const hunk = makeHunk('src/foo.ts'); + const priorFindings: PriorFindingsContext = { + reports: [ + makeReport('lint-check', [ + { + id: 'lint-1', + severity: 'medium', + title: 'Unused variable', + description: 'Variable x is declared but never used', + location: { path: 'src/foo.ts', startLine: 5 }, + suggestedFix: { + description: 'Remove the variable declaration', + diff: '- const x = 1;', + }, + }, + ]), + ], + }; + + const result = buildHunkUserPrompt(skill, hunk, undefined, priorFindings); + expect(result).toContain('Prior Findings'); + expect(result).toContain('Suggested fix: Remove the variable declaration'); + }); + + it('places Prior Findings between PR context and diff', () => { + const hunk = makeHunk('src/foo.ts'); + const prContext: PRPromptContext = { + changedFiles: ['src/foo.ts', 'src/bar.ts'], + title: 'Add feature', + }; + const priorFindings: PriorFindingsContext = { + reports: [ + makeReport('security-scan', [ + { + id: 'sec-1', + severity: 'high', + title: 'Issue found', + description: 'Details', + location: { path: 'src/foo.ts', startLine: 1 }, + }, + ]), + ], + }; + + const result = buildHunkUserPrompt(skill, hunk, prContext, priorFindings); + + const prContextIndex = result.indexOf('Pull Request Context'); + const priorFindingsIndex = result.indexOf('Prior Findings'); + const diffIndex = result.indexOf('## File:'); + + // Prior findings should appear after PR context and before the diff + expect(prContextIndex).toBeGreaterThan(-1); + expect(priorFindingsIndex).toBeGreaterThan(prContextIndex); + expect(diffIndex).toBeGreaterThan(priorFindingsIndex); + }); +}); + +describe('serializeAllPriorFindings', () => { + it('includes findings from all files', () => { + const priorFindings: PriorFindingsContext = { + reports: [ + makeReport('security-scan', [ + { + id: 'sec-1', + severity: 'high', + title: 'SQL Injection', + description: 'Unsanitized input', + location: { path: 'src/foo.ts', startLine: 10 }, + }, + { + id: 'sec-2', + severity: 'medium', + title: 'XSS Risk', + description: 'Unescaped output', + location: { path: 'src/bar.ts', startLine: 5 }, + }, + ]), + ], + }; + + const result = serializeAllPriorFindings(priorFindings); + expect(result).toContain('security-scan'); + expect(result).toContain('SQL Injection'); + expect(result).toContain('src/foo.ts'); + expect(result).toContain('XSS Risk'); + expect(result).toContain('src/bar.ts'); + }); + + it('returns undefined when no findings exist', () => { + const priorFindings: PriorFindingsContext = { + reports: [makeReport('empty-skill', [])], + }; + + const result = serializeAllPriorFindings(priorFindings); + expect(result).toBeUndefined(); + }); + + it('includes findings from multiple skills', () => { + const priorFindings: PriorFindingsContext = { + reports: [ + makeReport('security-scan', [ + { id: 'sec-1', severity: 'high', title: 'Issue A', description: 'Desc A', location: { path: 'a.ts', startLine: 1 } }, + ]), + makeReport('lint-check', [ + { id: 'lint-1', severity: 'low', title: 'Issue B', description: 'Desc B', location: { path: 'b.ts', startLine: 2 } }, + ]), + ], + }; + + const result = serializeAllPriorFindings(priorFindings); + expect(result).toContain('### security-scan'); + expect(result).toContain('### lint-check'); + expect(result).toContain('Issue A'); + expect(result).toContain('Issue B'); + }); +}); + +describe('buildReportUserPrompt', () => { + it('includes all prior findings (not filtered by file)', () => { + const priorFindings: PriorFindingsContext = { + reports: [ + makeReport('security-scan', [ + { id: 'sec-1', severity: 'high', title: 'SQL Injection', description: 'In foo', location: { path: 'src/foo.ts', startLine: 10 } }, + { id: 'sec-2', severity: 'medium', title: 'XSS', description: 'In bar', location: { path: 'src/bar.ts', startLine: 5 } }, + ]), + ], + }; + + const result = buildReportUserPrompt(skill, undefined, priorFindings); + expect(result).toContain('Prior Findings'); + expect(result).toContain('SQL Injection'); + expect(result).toContain('XSS'); + expect(result).toContain('src/foo.ts'); + expect(result).toContain('src/bar.ts'); + }); + + it('includes changed files list', () => { + const prContext: PRPromptContext = { + changedFiles: ['src/foo.ts', 'src/bar.ts', 'src/baz.ts'], + title: 'Add feature', + }; + + const result = buildReportUserPrompt(skill, prContext); + expect(result).toContain('## Changed Files'); + expect(result).toContain('src/foo.ts'); + expect(result).toContain('src/bar.ts'); + expect(result).toContain('src/baz.ts'); + }); + + it('produces valid prompt with empty prior findings', () => { + const priorFindings: PriorFindingsContext = { + reports: [makeReport('empty-skill', [])], + }; + + const result = buildReportUserPrompt(skill, undefined, priorFindings); + expect(result).toContain('test-skill'); + expect(result).not.toContain('Prior Findings'); + expect(result).toContain('IMPORTANT'); + }); + + it('includes PR context when provided', () => { + const prContext: PRPromptContext = { + changedFiles: ['src/foo.ts'], + title: 'Fix authentication bug', + body: 'This PR fixes a critical auth bypass', + }; + + const result = buildReportUserPrompt(skill, prContext); + expect(result).toContain('Fix authentication bug'); + expect(result).toContain('critical auth bypass'); + }); + + it('references skill name in opening instruction', () => { + const result = buildReportUserPrompt(skill); + expect(result).toContain(`"${skill.name}"`); + }); +}); diff --git a/src/sdk/prompt.ts b/src/sdk/prompt.ts index 261dd897..eaad8940 100644 --- a/src/sdk/prompt.ts +++ b/src/sdk/prompt.ts @@ -1,6 +1,7 @@ import { existsSync } from 'node:fs'; import { join } from 'node:path'; import type { SkillDefinition } from '../config/schema.js'; +import type { SkillReport } from '../types/index.js'; import { formatHunkForAnalysis, type HunkWithContext } from '../diff/index.js'; /** @@ -19,6 +20,101 @@ export interface PRPromptContext { body?: string | null; } +/** + * Prior findings from earlier-phase skills, injected into prompts + * for second-pass skills. + */ +export interface PriorFindingsContext { + /** Findings from prior-phase skills, grouped by skill */ + reports: SkillReport[]; +} + +/** + * Serialize a list of findings into markdown lines. + * Shared by both file-scoped and report-scoped serializers. + */ +function serializeFindingsList(findings: SkillReport['findings']): string[] { + const lines: string[] = []; + for (const finding of findings) { + const loc = finding.location; + const locStr = loc + ? `${loc.path}:${loc.startLine}${loc.endLine ? `-${loc.endLine}` : ''}` + : 'general'; + lines.push(`- **[${finding.severity}] ${finding.title}** (${locStr})`); + lines.push(` ${finding.description}`); + if (finding.suggestedFix) { + lines.push(` Suggested fix: ${finding.suggestedFix.description}`); + } + } + return lines; +} + +/** + * Serialize prior findings for a specific file into a prompt section. + * Only includes findings whose location matches the given filename. + * Returns undefined if no relevant findings exist. + */ +function serializePriorFindings( + priorFindings: PriorFindingsContext, + filename: string +): string | undefined { + const lines: string[] = []; + + for (const report of priorFindings.reports) { + const relevant = report.findings.filter( + (f) => f.location?.path === filename + ); + if (relevant.length === 0) continue; + + lines.push(`### ${report.skill}`); + lines.push(...serializeFindingsList(relevant)); + } + + if (lines.length === 0) return undefined; + + return `## Prior Findings\nThe following findings were reported by earlier-phase skills for this file:\n\n${lines.join('\n')}`; +} + +/** + * Serialize all prior findings (across all files) into a prompt section. + * Used by report-scoped skills that analyze findings as a whole. + * Returns undefined if no findings exist. + */ +export function serializeAllPriorFindings( + priorFindings: PriorFindingsContext +): string | undefined { + const lines: string[] = []; + + for (const report of priorFindings.reports) { + if (report.findings.length === 0) continue; + + lines.push(`### ${report.skill}`); + lines.push(...serializeFindingsList(report.findings)); + } + + if (lines.length === 0) return undefined; + + return `## Prior Findings\nThe following findings were reported by earlier-phase skills:\n\n${lines.join('\n')}`; +} + +/** + * Format PR context (title + truncated body) as a prompt section. + * Returns undefined if no title is available. + */ +function formatPRContext(prContext?: PRPromptContext): string | undefined { + if (!prContext?.title) return undefined; + + let section = `## Pull Request Context\n**Title:** ${prContext.title}`; + if (prContext.body) { + const maxBodyLength = 1000; + const body = prContext.body.length > maxBodyLength + ? prContext.body.slice(0, maxBodyLength) + '...' + : prContext.body; + section += `\n\n**Description:**\n${body}`; + } + return section; +} + /** * Builds the system prompt for hunk-based analysis. * @@ -108,24 +204,22 @@ You can read files from ${dirList} subdirectories using the Read tool with the f export function buildHunkUserPrompt( skill: SkillDefinition, hunkCtx: HunkWithContext, - prContext?: PRPromptContext + prContext?: PRPromptContext, + priorFindings?: PriorFindingsContext ): string { const sections: string[] = []; sections.push(`Analyze this code change according to the "${skill.name}" skill criteria.`); - // Include PR title and description for context on intent - if (prContext?.title) { - let prSection = `## Pull Request Context\n**Title:** ${prContext.title}`; - if (prContext.body) { - // Truncate very long PR descriptions to avoid bloating prompts - const maxBodyLength = 1000; - const body = prContext.body.length > maxBodyLength - ? prContext.body.slice(0, maxBodyLength) + '...' - : prContext.body; - prSection += `\n\n**Description:**\n${body}`; + const prSection = formatPRContext(prContext); + if (prSection) sections.push(prSection); + + // Include prior findings from earlier-phase skills (between PR context and diff) + if (priorFindings) { + const priorSection = serializePriorFindings(priorFindings, hunkCtx.filename); + if (priorSection) { + sections.push(priorSection); } - sections.push(prSection); } // Include list of other files being changed in the PR for context @@ -144,3 +238,39 @@ ${otherFiles.map((f) => `- ${f}`).join('\n')}`); return sections.join('\n\n'); } + +/** + * Builds the user prompt for a report-scoped skill. + * Report-scoped skills analyze all prior findings as a whole (single SDK call, no diff). + */ +export function buildReportUserPrompt( + skill: SkillDefinition, + prContext?: PRPromptContext, + priorFindings?: PriorFindingsContext +): string { + const sections: string[] = []; + + sections.push(`Analyze the findings from prior skills according to the "${skill.name}" skill criteria.`); + + const prSection = formatPRContext(prContext); + if (prSection) sections.push(prSection); + + // Include changed files list for context + if (prContext?.changedFiles && prContext.changedFiles.length > 0) { + sections.push(`## Changed Files\nThe following files were changed in this PR:\n${prContext.changedFiles.map((f) => `- ${f}`).join('\n')}`); + } + + // Include all prior findings + if (priorFindings) { + const priorSection = serializeAllPriorFindings(priorFindings); + if (priorSection) { + sections.push(priorSection); + } + } + + sections.push( + `IMPORTANT: Only report findings that are explicitly covered by the skill instructions. Do not report general code quality issues, bugs, or improvements unless the skill specifically asks for them. Return an empty findings array if no issues match the skill's criteria.` + ); + + return sections.join('\n\n'); +} diff --git a/src/sdk/runner.ts b/src/sdk/runner.ts index 9b40e1e6..7875a0e6 100644 --- a/src/sdk/runner.ts +++ b/src/sdk/runner.ts @@ -25,8 +25,8 @@ export { aggregateUsage, aggregateAuxiliaryUsage, mergeAuxiliaryUsage, estimateT export { apiUsageToStats } from './pricing.js'; // Re-export prompt building (with legacy alias) -export { buildHunkSystemPrompt, buildHunkUserPrompt } from './prompt.js'; -export type { PRPromptContext } from './prompt.js'; +export { buildHunkSystemPrompt, buildHunkUserPrompt, buildReportUserPrompt, serializeAllPriorFindings } from './prompt.js'; +export type { PRPromptContext, PriorFindingsContext } from './prompt.js'; // Legacy export for backwards compatibility export { buildHunkSystemPrompt as buildSystemPrompt } from './prompt.js'; @@ -46,7 +46,7 @@ export type { ExtractFindingsResult } from './extract.js'; export { prepareFiles } from './prepare.js'; // Re-export analysis functions -export { analyzeFile, runSkill, generateSummary } from './analyze.js'; +export { analyzeFile, analyzeReport, runSkill, generateSummary } from './analyze.js'; // Re-export types export type { diff --git a/src/sdk/types.ts b/src/sdk/types.ts index dd77e162..c2b24180 100644 --- a/src/sdk/types.ts +++ b/src/sdk/types.ts @@ -1,4 +1,4 @@ -import type { Finding, UsageStats, SkippedFile, RetryConfig } from '../types/index.js'; +import type { Finding, UsageStats, SkippedFile, RetryConfig, SkillReport } from '../types/index.js'; import type { HunkWithContext } from '../diff/index.js'; import type { ChunkingConfig } from '../config/schema.js'; @@ -75,6 +75,10 @@ export interface SkillRunnerOptions { retry?: RetryConfig; /** Enable verbose logging for retry attempts */ verbose?: boolean; + /** Prior-phase skill reports, injected into prompts for second-pass skills */ + priorReports?: SkillReport[]; + /** Execution scope: 'report' runs a single analysis on prior findings instead of per-hunk */ + scope?: 'diff' | 'report'; } /** diff --git a/warden.toml b/warden.toml index de66b1e3..aa44db98 100644 --- a/warden.toml +++ b/warden.toml @@ -17,6 +17,15 @@ ignorePaths = ["src/**/*.test.ts"] type = "pull_request" actions = ["opened", "synchronize", "reopened"] +[[skills]] +name = "warden-lint-judge" +remote = "getsentry/skills" +scope = "report" + +[[skills.triggers]] +type = "pull_request" +actions = ["opened", "synchronize", "reopened"] + [[skills]] name = "code-simplifier" remote = "getsentry/sentry-skills"