diff --git a/src/cli/args.test.ts b/src/cli/args.test.ts index 4534c36..870e6d0 100644 --- a/src/cli/args.test.ts +++ b/src/cli/args.test.ts @@ -344,6 +344,55 @@ describe('parseCliArgs', () => { expect(result.command).toBe('sync'); expect(result.options.remote).toBe('getsentry/skills'); }); + + it('parses replay command', () => { + const result = parseCliArgs(['replay']); + expect(result.command).toBe('replay'); + expect(result.replayOptions).toBeDefined(); + expect(result.replayOptions!.files).toEqual([]); + }); + + it('parses replay command with single file', () => { + const result = parseCliArgs(['replay', 'run.jsonl']); + expect(result.command).toBe('replay'); + expect(result.replayOptions!.files).toEqual(['run.jsonl']); + }); + + it('parses replay command with multiple files', () => { + const result = parseCliArgs(['replay', 'run1.jsonl', 'run2.jsonl', 'run3.jsonl']); + expect(result.command).toBe('replay'); + expect(result.replayOptions!.files).toEqual(['run1.jsonl', 'run2.jsonl', 'run3.jsonl']); + }); + + it('parses replay command with --json flag', () => { + const result = parseCliArgs(['replay', 'run.jsonl', '--json']); + expect(result.command).toBe('replay'); + expect(result.options.json).toBe(true); + }); + + it('parses replay command with --report-on option', () => { + const result = parseCliArgs(['replay', 'run.jsonl', '--report-on', 'high']); + expect(result.command).toBe('replay'); + expect(result.options.reportOn).toBe('high'); + }); + + it('parses replay command with --min-confidence option', () => { + const result = parseCliArgs(['replay', 'run.jsonl', '--min-confidence', 'high']); + expect(result.command).toBe('replay'); + expect(result.options.minConfidence).toBe('high'); + }); + + it('parses replay command with verbosity flags', () => { + const result = parseCliArgs(['replay', 'run.jsonl', '-v']); + expect(result.command).toBe('replay'); + expect(result.options.verbose).toBe(1); + }); + + it('parses replay command with --quiet flag', () => { + const result = parseCliArgs(['replay', 'run.jsonl', '--quiet']); + expect(result.command).toBe('replay'); + expect(result.options.quiet).toBe(true); + }); }); describe('CLIOptionsSchema', () => { diff --git a/src/cli/args.ts b/src/cli/args.ts index 83d4b06..8ee7059 100644 --- a/src/cli/args.ts +++ b/src/cli/args.ts @@ -55,10 +55,15 @@ export interface SetupAppOptions { open: boolean; } +export interface ReplayOptions { + files: string[]; +} + export interface ParsedArgs { - command: 'run' | 'help' | 'init' | 'add' | 'version' | 'setup-app' | 'sync'; + command: 'run' | 'help' | 'init' | 'add' | 'version' | 'setup-app' | 'sync' | 'replay'; options: CLIOptions; setupAppOptions?: SetupAppOptions; + replayOptions?: ReplayOptions; } export function showVersion(): void { @@ -75,6 +80,7 @@ Commands: add [skill] Add a skill trigger to warden.toml sync [remote] Update cached remote skills to latest setup-app Create a GitHub App for Warden via manifest flow + replay Replay results from JSONL log files (default) Run analysis on targets or using warden.toml triggers Targets: @@ -313,7 +319,7 @@ export function parseCliArgs(argv: string[] = process.argv.slice(2)): ParsedArgs } // Filter out known commands from positionals - const commands = ['run', 'help', 'init', 'add', 'version', 'setup-app', 'sync']; + const commands = ['run', 'help', 'init', 'add', 'version', 'setup-app', 'sync', 'replay']; const targets = positionals.filter((p) => !commands.includes(p)); // Handle explicit help command @@ -397,6 +403,30 @@ export function parseCliArgs(argv: string[] = process.argv.slice(2)): ParsedArgs }; } + // Handle replay command + if (positionals.includes('replay')) { + // All positionals after 'replay' are log files + const replayIndex = positionals.indexOf('replay'); + const logFiles = positionals.slice(replayIndex + 1); + + return { + command: 'replay', + options: CLIOptionsSchema.parse({ + json: values.json, + reportOn: values['report-on'] as SeverityThreshold | undefined, + minConfidence: values['min-confidence'] as ConfidenceThreshold | undefined, + quiet: values.quiet, + verbose: verboseCount, + debug: values.debug, + log: values.log, + color: resolveColorOption(values), + }), + replayOptions: { + files: logFiles, + }, + }; + } + const rawOptions = { targets: targets.length > 0 ? targets : undefined, skill: values.skill, diff --git a/src/cli/main.ts b/src/cli/main.ts index f667d94..1c1c135 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -1,4 +1,4 @@ -import { existsSync } from 'node:fs'; +import { existsSync, readFileSync } from 'node:fs'; import { dirname, join, resolve } from 'node:path'; import { config as dotenvConfig } from 'dotenv'; import { Sentry, flushSentry, setGlobalAttributes, emitRunMetric, getTraceId } from '../sentry.js'; @@ -9,7 +9,7 @@ import { matchTrigger, filterContextByPaths, shouldFail, countFindingsAtOrAbove import type { SkillReport, ConfidenceThreshold } from '../types/index.js'; import { filterFindings } from '../types/index.js'; import { DEFAULT_CONCURRENCY, getAnthropicApiKey } from '../utils/index.js'; -import { parseCliArgs, showHelp, showVersion, classifyTargets, type CLIOptions } from './args.js'; +import { parseCliArgs, showHelp, showVersion, classifyTargets, type CLIOptions, type ReplayOptions } from './args.js'; import { buildLocalEventContext, buildFileEventContext } from './context.js'; import { getRepoRoot, refExists, hasUncommittedChanges } from './git.js'; import { renderTerminalReport, filterReports } from './terminal.js'; @@ -25,6 +25,8 @@ import { renderJsonlString, getRepoLogPath, generateRunId, + parseJsonlReports, + type JsonlRunMetadata, type SkillTaskOptions, } from './output/index.js'; import { cleanupLogs } from './log-cleanup.js'; @@ -718,6 +720,84 @@ async function runDirectSkillMode(options: CLIOptions, reporter: Reporter): Prom return runSkills(context, options, reporter); } +/** + * Run in replay mode: render results from JSONL log files. + */ +async function runReplay(replayOptions: ReplayOptions, options: CLIOptions, reporter: Reporter): Promise { + const { files } = replayOptions; + + if (files.length === 0) { + reporter.error('No log files specified'); + reporter.tip('Usage: warden replay [file2.jsonl ...]'); + return 1; + } + + // Validate all files exist before processing + const missingFiles: string[] = []; + for (const file of files) { + if (!existsSync(file)) { + missingFiles.push(file); + } + } + + if (missingFiles.length > 0) { + reporter.error(`Log ${pluralize(missingFiles.length, 'file')} not found: ${missingFiles.join(', ')}`); + return 1; + } + + // Parse and merge reports from all files + const allReports: SkillReport[] = []; + let totalDurationMs = 0; + let lastRunMetadata: JsonlRunMetadata | undefined; + + for (const file of files) { + try { + const content = readFileSync(file, 'utf-8'); + const parsed = parseJsonlReports(content); + allReports.push(...parsed.reports); + totalDurationMs = Math.max(totalDurationMs, parsed.totalDurationMs); + + if (parsed.runMetadata) { + lastRunMetadata = parsed.runMetadata; + reporter.debug(`Loaded ${parsed.reports.length} ${pluralize(parsed.reports.length, 'skill')} from ${file}`); + reporter.debug(` Run ID: ${parsed.runMetadata.runId}`); + reporter.debug(` Timestamp: ${parsed.runMetadata.timestamp}`); + } + } catch (err) { + reporter.error(`Failed to parse ${file}: ${err instanceof Error ? err.message : String(err)}`); + return 1; + } + } + + if (allReports.length === 0) { + reporter.warning('No skill reports found in log files'); + return 0; + } + + // Apply filtering + const filteredReports = filterReports(allReports, options.reportOn, options.minConfidence ?? 'medium'); + + // Output results + reporter.blank(); + if (options.json) { + // Re-render as JSONL for piping, preserving original run metadata + const jsonlContent = renderJsonlString(filteredReports, totalDurationMs, lastRunMetadata ? { + runId: lastRunMetadata.runId, + traceId: lastRunMetadata.traceId, + timestamp: new Date(lastRunMetadata.timestamp), + } : undefined); + process.stdout.write(jsonlContent); + } else { + console.log(renderTerminalReport(filteredReports, reporter.mode, { verbosity: reporter.verbosity })); + } + + // Show summary + reporter.blank(); + reporter.renderSummary(filteredReports, totalDurationMs); + + return 0; +} + async function runCommand(options: CLIOptions, reporter: Reporter): Promise { const targets = options.targets ?? []; @@ -759,7 +839,7 @@ async function runCommand(options: CLIOptions, reporter: Reporter): Promise { - const { command, options, setupAppOptions } = parseCliArgs(); + const { command, options, setupAppOptions, replayOptions } = parseCliArgs(); if (command === 'help') { showHelp(); @@ -813,6 +893,12 @@ export async function main(): Promise { return runSetupApp(setupAppOptions, reporter); case 'sync': return runSync(options, reporter); + case 'replay': + if (!replayOptions) { + reporter.error('Missing replay options'); + process.exit(1); + } + return runReplay(replayOptions, options, reporter); default: return runCommand(options, reporter); } diff --git a/src/cli/output/index.ts b/src/cli/output/index.ts index d3a6407..13627ac 100644 --- a/src/cli/output/index.ts +++ b/src/cli/output/index.ts @@ -43,7 +43,9 @@ export { renderJsonlString, getRepoLogPath, generateRunId, + parseJsonlReports, type JsonlRecord, type JsonlRunMetadata, + type ParsedJsonlLog, } from './jsonl.js'; export { ICON_CHECK, ICON_SKIPPED, SPINNER_FRAMES } from './icons.js'; diff --git a/src/cli/output/jsonl.test.ts b/src/cli/output/jsonl.test.ts index d37e953..5024fca 100644 --- a/src/cli/output/jsonl.test.ts +++ b/src/cli/output/jsonl.test.ts @@ -8,6 +8,8 @@ import { generateRunId, shortRunId, readJsonlLog, + parseJsonlReports, + renderJsonlString, type JsonlRecord, } from './jsonl.js'; import type { SkillReport } from '../../types/index.js'; @@ -551,3 +553,122 @@ describe('repo-local logging integration', () => { expect(content2.run.runId).toBe(runId2); }); }); + +describe('parseJsonlReports', () => { + it('reconstructs SkillReport from JSONL content', () => { + // Sample JSONL content that matches what would be written by renderJsonlString + const jsonlContent = `{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":2000,"cwd":"/test","runId":"test-123"},"skill":"security-review","summary":"Found 2 issues","findings":[{"id":"sec-001","severity":"high","title":"SQL Injection","description":"User input passed directly to query"},{"id":"sec-002","severity":"medium","title":"XSS Risk","description":"Unescaped output"}],"durationMs":1234,"usage":{"inputTokens":1000,"outputTokens":500,"costUSD":0.01}} +{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":2000,"cwd":"/test","runId":"test-123"},"type":"summary","totalFindings":2,"bySeverity":{"critical":0,"high":1,"medium":1,"low":0,"info":0},"usage":{"inputTokens":1000,"outputTokens":500,"costUSD":0.01}} +`; + + const result = parseJsonlReports(jsonlContent); + + expect(result.reports.length).toBe(1); + expect(result.reports[0]!.skill).toBe('security-review'); + expect(result.reports[0]!.findings.length).toBe(2); + expect(result.reports[0]!.findings[0]!.id).toBe('sec-001'); + expect(result.reports[0]!.durationMs).toBe(1234); + expect(result.reports[0]!.usage?.inputTokens).toBe(1000); + expect(result.totalDurationMs).toBe(2000); + expect(result.runMetadata?.runId).toBe('test-123'); + }); + + it('handles multiple skill records', () => { + const jsonlContent = `{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":3000,"cwd":"/test","runId":"multi-123"},"skill":"skill-1","summary":"Done","findings":[],"durationMs":1000} +{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":3000,"cwd":"/test","runId":"multi-123"},"skill":"skill-2","summary":"Issues found","findings":[{"id":"a","severity":"low","title":"A","description":"A"}],"durationMs":2000} +{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":3000,"cwd":"/test","runId":"multi-123"},"type":"summary","totalFindings":1,"bySeverity":{"critical":0,"high":0,"medium":0,"low":1,"info":0}} +`; + + const result = parseJsonlReports(jsonlContent); + + expect(result.reports.length).toBe(2); + expect(result.reports[0]!.skill).toBe('skill-1'); + expect(result.reports[1]!.skill).toBe('skill-2'); + expect(result.reports[1]!.findings.length).toBe(1); + expect(result.totalDurationMs).toBe(3000); + }); + + it('handles empty logs (summary only)', () => { + const jsonlContent = `{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":100,"cwd":"/test","runId":"empty-123"},"type":"summary","totalFindings":0,"bySeverity":{"critical":0,"high":0,"medium":0,"low":0,"info":0}} +`; + + const result = parseJsonlReports(jsonlContent); + + expect(result.reports.length).toBe(0); + expect(result.totalDurationMs).toBe(100); + expect(result.runMetadata?.runId).toBe('empty-123'); + }); + + it('skips invalid lines gracefully', () => { + const jsonlContent = `invalid json here +{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":1000,"cwd":"/test","runId":"partial-123"},"skill":"valid-skill","summary":"OK","findings":[]} +another bad line +{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":1000,"cwd":"/test","runId":"partial-123"},"type":"summary","totalFindings":0,"bySeverity":{"critical":0,"high":0,"medium":0,"low":0,"info":0}} +`; + + const result = parseJsonlReports(jsonlContent); + + expect(result.reports.length).toBe(1); + expect(result.reports[0]!.skill).toBe('valid-skill'); + }); + + it('reconstructs files array from JSONL', () => { + const jsonlContent = `{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":2000,"cwd":"/test","runId":"files-123"},"skill":"review","summary":"Done","findings":[],"files":[{"filename":"src/api.ts","findings":1,"durationMs":1200},{"filename":"src/utils.ts","findings":0,"durationMs":800}]} +{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":2000,"cwd":"/test","runId":"files-123"},"type":"summary","totalFindings":1,"bySeverity":{"critical":0,"high":1,"medium":0,"low":0,"info":0}} +`; + + const result = parseJsonlReports(jsonlContent); + + expect(result.reports[0]!.files).toBeDefined(); + expect(result.reports[0]!.files!.length).toBe(2); + expect(result.reports[0]!.files![0]!.filename).toBe('src/api.ts'); + expect(result.reports[0]!.files![0]!.findingCount).toBe(1); + expect(result.reports[0]!.files![1]!.filename).toBe('src/utils.ts'); + }); + + it('handles skippedFiles in reports', () => { + const jsonlContent = `{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":1000,"cwd":"/test","runId":"skip-123"},"skill":"review","summary":"Done","findings":[],"skippedFiles":[{"filename":"dist/bundle.js","reason":"builtin"}]} +{"run":{"timestamp":"2026-02-18T14:32:15.123Z","durationMs":1000,"cwd":"/test","runId":"skip-123"},"type":"summary","totalFindings":0,"bySeverity":{"critical":0,"high":0,"medium":0,"low":0,"info":0},"totalSkippedFiles":1} +`; + + const result = parseJsonlReports(jsonlContent); + + expect(result.reports[0]!.skippedFiles).toBeDefined(); + expect(result.reports[0]!.skippedFiles!.length).toBe(1); + expect(result.reports[0]!.skippedFiles![0]!.filename).toBe('dist/bundle.js'); + expect(result.reports[0]!.skippedFiles![0]!.reason).toBe('builtin'); + }); + + it('round-trips through write and parse', () => { + const original: SkillReport[] = [ + { + skill: 'test-skill', + summary: 'Found issues', + findings: [ + { id: 'test-1', severity: 'high', title: 'Test Finding', description: 'Test description' }, + ], + durationMs: 1500, + usage: { inputTokens: 500, outputTokens: 250, costUSD: 0.005 }, + files: [ + { filename: 'src/test.ts', findingCount: 1, durationMs: 1500 }, + ], + }, + ]; + + // Write to JSONL string + const jsonlContent = renderJsonlString(original, 2000, { runId: 'round-trip-123' }); + + // Parse back + const result = parseJsonlReports(jsonlContent); + + expect(result.reports.length).toBe(1); + expect(result.reports[0]!.skill).toBe('test-skill'); + expect(result.reports[0]!.summary).toBe('Found issues'); + expect(result.reports[0]!.findings.length).toBe(1); + expect(result.reports[0]!.findings[0]!.id).toBe('test-1'); + expect(result.reports[0]!.durationMs).toBe(1500); + expect(result.reports[0]!.usage?.inputTokens).toBe(500); + expect(result.reports[0]!.files?.length).toBe(1); + expect(result.reports[0]!.files![0]!.findingCount).toBe(1); + }); +}); diff --git a/src/cli/output/jsonl.ts b/src/cli/output/jsonl.ts index 50c2d22..e8eb0c8 100644 --- a/src/cli/output/jsonl.ts +++ b/src/cli/output/jsonl.ts @@ -234,3 +234,66 @@ export function writeJsonlContent(outputPath: string, content: string): void { export function readJsonlLog(logPath: string): string { return readFileSync(logPath, 'utf-8'); } + +/** + * Parse JSONL content and reconstruct SkillReport objects. + * Returns an object with the reports array, run metadata from the summary, + * and total duration. + */ +export interface ParsedJsonlLog { + reports: SkillReport[]; + runMetadata?: JsonlRunMetadata; + totalDurationMs: number; +} + +export function parseJsonlReports(content: string): ParsedJsonlLog { + const lines = content.trim().split('\n').filter((line) => line.trim()); + const reports: SkillReport[] = []; + let runMetadata: JsonlRunMetadata | undefined; + let totalDurationMs = 0; + + for (const line of lines) { + try { + const parsed = JSON.parse(line); + + // Skip summary record (but capture metadata from it) + if (parsed.type === 'summary') { + const summary = JsonlSummaryRecordSchema.parse(parsed); + runMetadata = summary.run; + totalDurationMs = summary.run.durationMs; + continue; + } + + // Parse skill record and convert to SkillReport + const record = JsonlRecordSchema.parse(parsed); + reports.push({ + skill: record.skill, + summary: record.summary, + findings: record.findings, + metadata: record.metadata, + durationMs: record.durationMs, + usage: record.usage, + auxiliaryUsage: record.auxiliaryUsage, + skippedFiles: record.skippedFiles, + failedHunks: record.failedHunks, + failedExtractions: record.failedExtractions, + files: record.files?.map((f) => ({ + filename: f.filename, + findingCount: f.findings, + durationMs: f.durationMs, + usage: f.usage, + })), + }); + + // Capture run metadata from first record if no summary yet + if (!runMetadata) { + runMetadata = record.run; + totalDurationMs = record.run.durationMs; + } + } catch { + // Skip invalid lines + } + } + + return { reports, runMetadata, totalDurationMs }; +}