From fc1d6c201fe0fce54e686f2d6e1a583b42cbeb50 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Sat, 28 Feb 2026 10:53:24 -0300 Subject: [PATCH 1/5] sqlite: add SQLite persistence for projects, runs, and mutants Introduce --sqlite[=] flag (default: mutation.db) on both the mutate and analyze subcommands, backed by a new db module. mutate: - Opens/creates the database, applies schema (projects, runs, mutants with indexes and CHECK constraints), and seeds the Bitcoin Core project row via INSERT OR IGNORE. - Creates a runs row capturing commit_hash, tool_version, and optional pr_number before generation begins. - mutate_file now returns Vec (unified diff, SHA-256 patch_hash, file_path, operator) alongside the existing file writes. - Mutants are batch-inserted in chunks of 100 within transactions; duplicate (run_id, patch_hash) pairs are silently ignored. analyze: - New --sqlite + --run_id flags trigger DB-based analysis. - Reads all mutants for the run, applies each diff via `git apply` (temp file), runs the supplied --command, updates status to killed/survived/error, then restores the file with git restore. - --command is required in DB mode; --run_id without --sqlite is rejected with a clear error. Other: - get_commit_hash() added to git_changes (git rev-parse HEAD). - MutationError gains a Rusqlite(#[from] rusqlite::Error) variant. - rusqlite 0.32 (bundled) and sha2 0.10 added as dependencies. - Running without --sqlite preserves existing behaviour exactly. Co-Authored-By: Claude Sonnet 4.6 --- Cargo.toml | 2 + src/analyze.rs | 121 ++++++++++++++++++++++++++++ src/db.rs | 192 +++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 3 + src/git_changes.rs | 5 ++ src/lib.rs | 20 ++--- src/main.rs | 26 +++++- src/mutation.rs | 148 +++++++++++++++++++++++----------- 8 files changed, 460 insertions(+), 57 deletions(-) create mode 100644 src/db.rs diff --git a/Cargo.toml b/Cargo.toml index b6c606e..9393c89 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,8 @@ chrono = { version = "0.4", features = ["serde"] } futures = "0.3" walkdir = "2.4" rayon = "1.8" +rusqlite = { version = "0.32", features = ["bundled"] } +sha2 = "0.10" tempfile = "3.8" [dev-dependencies] diff --git a/src/analyze.rs b/src/analyze.rs index c3ed569..07da547 100644 --- a/src/analyze.rs +++ b/src/analyze.rs @@ -1,8 +1,10 @@ +use crate::db::Database; use crate::error::{MutationError, Result}; use crate::report::generate_report; use std::fs; use std::path::{Path, PathBuf}; use std::time::Duration; +use tempfile::NamedTempFile; use tokio::process::Command as TokioCommand; use tokio::time::timeout; use walkdir::WalkDir; @@ -13,7 +15,23 @@ pub async fn run_analysis( jobs: u32, timeout_secs: u64, survival_threshold: f64, + sqlite_path: Option, + run_id: Option, ) -> Result<()> { + // DB-based analysis mode: read mutants from DB and test them. + if let (Some(ref path), Some(rid)) = (sqlite_path.as_ref(), run_id) { + let command = command.ok_or_else(|| { + MutationError::InvalidInput( + "--command is required when using --sqlite with --run_id".to_string(), + ) + })?; + let db = Database::open(path)?; + db.ensure_schema()?; + db.seed_projects()?; + return run_db_analysis(&db, rid, &command, timeout_secs).await; + } + + // Folder-based analysis mode (existing behaviour). let folders = if let Some(folder_path) = folder { vec![folder_path] } else { @@ -35,6 +53,109 @@ pub async fn run_analysis( Ok(()) } +/// Test all pending mutants in `run_id` from the database. +async fn run_db_analysis( + db: &Database, + run_id: i64, + command: &str, + timeout_secs: u64, +) -> Result<()> { + let mutants = db.get_mutants_for_run(run_id)?; + let total = mutants.len(); + + println!("* {} MUTANTS in run_id={} *", total, run_id); + + if total == 0 { + return Err(MutationError::InvalidInput(format!( + "No mutants found for run_id={}", + run_id + ))); + } + + let mut num_killed: u64 = 0; + let mut num_survived: u64 = 0; + + for (i, mutant) in mutants.iter().enumerate() { + println!("[{}/{}] Analyzing mutant id={}", i + 1, total, mutant.id); + + // Update status to 'running' and record the command. + db.update_mutant_status(mutant.id, "running", command)?; + + // Write the patch to a temp file and apply it with `git apply`. + let apply_result = apply_diff(&mutant.diff).await; + if let Err(ref e) = apply_result { + eprintln!(" Failed to apply diff for mutant {}: {}", mutant.id, e); + db.update_mutant_status(mutant.id, "error", command)?; + continue; + } + + // Determine the file path to restore later. + let file_path = mutant.file_path.as_deref().unwrap_or(""); + + // Run the test command. + let killed = !run_command(command, timeout_secs).await?; + + let new_status = if killed { + println!(" KILLED ✅"); + num_killed += 1; + "killed" + } else { + println!(" NOT KILLED ❌"); + num_survived += 1; + "survived" + }; + + db.update_mutant_status(mutant.id, new_status, command)?; + + // Restore the modified file. + if !file_path.is_empty() { + restore_file(file_path).await?; + } + } + + let score = if total > 0 { + num_killed as f64 / total as f64 + } else { + 0.0 + }; + println!( + "\nMUTATION SCORE: {:.2}% ({} killed / {} total)", + score * 100.0, + num_killed, + total + ); + println!("Survived: {}", num_survived); + + Ok(()) +} + +/// Apply a unified diff patch using `git apply`. +async fn apply_diff(diff: &str) -> Result<()> { + use std::io::Write; + + let mut tmp = NamedTempFile::new()?; + tmp.write_all(diff.as_bytes())?; + tmp.flush()?; + + let tmp_path = tmp.path().to_path_buf(); + // Keep `tmp` alive until after the command runs. + let output = TokioCommand::new("git") + .args(["apply", "--whitespace=nowarn", tmp_path.to_str().unwrap()]) + .output() + .await + .map_err(|e| MutationError::Git(format!("git apply failed: {}", e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(MutationError::Git(format!( + "git apply error: {}", + stderr.trim() + ))); + } + + Ok(()) +} + fn find_mutation_folders() -> Result> { let mut folders = Vec::new(); diff --git a/src/db.rs b/src/db.rs new file mode 100644 index 0000000..04accf9 --- /dev/null +++ b/src/db.rs @@ -0,0 +1,192 @@ +use crate::error::Result; +use rusqlite::{params, Connection}; +use sha2::{Digest, Sha256}; +use std::path::Path; + +const SCHEMA: &str = " +PRAGMA foreign_keys = ON; + +CREATE TABLE IF NOT EXISTS projects ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + repository_url TEXT, + UNIQUE(name), + UNIQUE(repository_url) +); + +CREATE TABLE IF NOT EXISTS runs ( + id INTEGER PRIMARY KEY, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + commit_hash TEXT NOT NULL, + pr_number INTEGER, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + tool_version TEXT +); + +CREATE INDEX IF NOT EXISTS idx_runs_project_created ON runs(project_id, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_runs_commit ON runs(commit_hash); + +CREATE TABLE IF NOT EXISTS mutants ( + id INTEGER PRIMARY KEY, + run_id INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE, + diff TEXT NOT NULL, + patch_hash TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending' + CHECK (status IN ('pending','running','killed','survived', + 'timeout','error','skipped','equivalent','unproductive')), + killed INTEGER GENERATED ALWAYS AS (CASE WHEN status='killed' THEN 1 ELSE 0 END) VIRTUAL, + command_to_test TEXT, + file_path TEXT, + operator TEXT, + UNIQUE(run_id, patch_hash) +); + +CREATE INDEX IF NOT EXISTS idx_mutants_run_status ON mutants(run_id, status); +CREATE INDEX IF NOT EXISTS idx_mutants_file ON mutants(file_path); +CREATE INDEX IF NOT EXISTS idx_mutants_operator ON mutants(operator); +CREATE INDEX IF NOT EXISTS idx_mutants_killed ON mutants(killed); +"; + +/// Data collected during mutation for a single generated mutant. +pub struct MutantData { + pub diff: String, + pub patch_hash: String, + pub file_path: String, + pub operator: String, +} + +/// A mutant row read back from the database. +pub struct MutantRow { + pub id: i64, + pub diff: String, + pub file_path: Option, +} + +pub struct Database { + conn: Connection, +} + +impl Database { + /// Open (or create) the database at `path` and enable foreign keys. + pub fn open(path: &Path) -> Result { + let conn = Connection::open(path)?; + conn.execute_batch("PRAGMA foreign_keys = ON;")?; + Ok(Database { conn }) + } + + /// Create tables and indexes if they do not yet exist. + pub fn ensure_schema(&self) -> Result<()> { + self.conn.execute_batch(SCHEMA)?; + Ok(()) + } + + /// Insert the Bitcoin Core project row if not already present. + pub fn seed_projects(&self) -> Result<()> { + self.conn.execute( + "INSERT OR IGNORE INTO projects (name, repository_url) VALUES (?1, ?2)", + params!["Bitcoin Core", "https://github.com/bitcoin/bitcoin"], + )?; + Ok(()) + } + + /// Return the id of the Bitcoin Core project row. + pub fn get_bitcoin_core_project_id(&self) -> Result { + let id = self.conn.query_row( + "SELECT id FROM projects WHERE name = 'Bitcoin Core'", + [], + |row| row.get(0), + )?; + Ok(id) + } + + /// Create a new run row and return its id. + pub fn create_run( + &self, + project_id: i64, + commit_hash: &str, + tool_version: &str, + pr_number: Option, + ) -> Result { + self.conn.execute( + "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number) + VALUES (?1, ?2, ?3, ?4)", + params![project_id, commit_hash, tool_version, pr_number], + )?; + Ok(self.conn.last_insert_rowid()) + } + + /// Batch-insert mutants under `run_id` using a single transaction. + /// Duplicates (same run_id + patch_hash) are silently ignored. + pub fn insert_mutant_batch(&mut self, run_id: i64, mutants: &[MutantData]) -> Result<()> { + let tx = self.conn.transaction()?; + { + let mut stmt = tx.prepare( + "INSERT OR IGNORE INTO mutants + (run_id, diff, patch_hash, status, file_path, operator) + VALUES (?1, ?2, ?3, 'pending', ?4, ?5)", + )?; + for m in mutants { + stmt.execute(params![ + run_id, + m.diff, + m.patch_hash, + m.file_path, + m.operator + ])?; + } + } + tx.commit()?; + Ok(()) + } + + /// Return all mutants belonging to `run_id`. + pub fn get_mutants_for_run(&self, run_id: i64) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT id, diff, file_path + FROM mutants + WHERE run_id = ?1", + )?; + let rows = stmt.query_map(params![run_id], |row| { + Ok(MutantRow { + id: row.get(0)?, + diff: row.get(1)?, + file_path: row.get(2)?, + }) + })?; + + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + /// Update the status and command_to_test for a single mutant. + pub fn update_mutant_status(&self, id: i64, status: &str, command: &str) -> Result<()> { + self.conn.execute( + "UPDATE mutants SET status = ?1, command_to_test = ?2 WHERE id = ?3", + params![status, command, id], + )?; + Ok(()) + } +} + +/// Compute the SHA-256 hex digest of `diff`. +pub fn compute_patch_hash(diff: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(diff.as_bytes()); + format!("{:x}", hasher.finalize()) +} + +/// Build a minimal unified-diff patch for a single-line substitution. +/// The produced patch is suitable for `git apply`. +pub fn generate_diff( + file_path: &str, + line_num: usize, + original: &str, + mutated: &str, +) -> String { + format!( + "--- a/{file_path}\n+++ b/{file_path}\n@@ -{line_num},1 +{line_num},1 @@\n-{original}\n+{mutated}\n", + ) +} diff --git a/src/error.rs b/src/error.rs index 36a61bd..69a9f6a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -26,6 +26,9 @@ pub enum MutationError { #[error("Walkdir error: {0}")] Walkdir(#[from] walkdir::Error), + #[error("SQLite error: {0}")] + Rusqlite(#[from] rusqlite::Error), + #[error("Other error: {0}")] Other(#[from] anyhow::Error), } diff --git a/src/git_changes.rs b/src/git_changes.rs index cfc3188..3d1802f 100644 --- a/src/git_changes.rs +++ b/src/git_changes.rs @@ -23,6 +23,11 @@ pub async fn run_git_command(args: &[&str]) -> Result> { Ok(stdout.lines().map(|s| s.to_string()).collect()) } +pub async fn get_commit_hash() -> Result { + let lines = run_git_command(&["rev-parse", "HEAD"]).await?; + Ok(lines.into_iter().next().unwrap_or_default()) +} + pub async fn get_changed_files(pr_number: Option) -> Result> { let mut used_remote = "upstream"; // Track which remote we successfully used diff --git a/src/lib.rs b/src/lib.rs index 295d75b..59d7f51 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,16 +18,17 @@ //! async fn main() -> Result<(), Box> { //! // Generate mutants for a specific file with AST filtering //! mutation::run_mutation( -//! None, // PR number +//! None, // PR number //! Some("src/test.cpp".into()), // file path -//! false, // one_mutant -//! false, // only_security_mutations -//! None, // range_lines -//! None, // coverage -//! false, // test_only -//! HashMap::new(), // skip_lines -//! true, // enable_ast_filtering -//! None, // custom_expert_rule +//! false, // one_mutant +//! false, // only_security_mutations +//! None, // range_lines +//! None, // coverage +//! false, // test_only +//! HashMap::new(), // skip_lines +//! true, // enable_ast_filtering +//! None, // custom_expert_rule +//! None, // sqlite_path //! ).await?; //! //! Ok(()) @@ -37,6 +38,7 @@ pub mod analyze; pub mod ast_analysis; pub mod coverage; +pub mod db; pub mod error; pub mod git_changes; pub mod mutation; diff --git a/src/main.rs b/src/main.rs index 30d3a51..50b0845 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ use std::path::PathBuf; mod analyze; mod ast_analysis; mod coverage; +mod db; mod error; mod git_changes; mod mutation; @@ -64,6 +65,10 @@ enum Commands { /// Add custom expert rule for arid node detection #[arg(long, value_name = "PATTERN")] add_expert_rule: Option, + + /// Persist results to a SQLite database (default path: mutation.db) + #[arg(long, value_name = "PATH", num_args = 0..=1, default_missing_value = "mutation.db")] + sqlite: Option, }, /// Analyze mutants Analyze { @@ -86,6 +91,14 @@ enum Commands { /// Maximum acceptable survival rate (0.3 = 30%) #[arg(long, default_value = "0.75")] survival_threshold: f64, + + /// SQLite database path to read mutants from (requires --run_id) + #[arg(long, value_name = "PATH", num_args = 0..=1, default_missing_value = "mutation.db")] + sqlite: Option, + + /// Run ID to analyze from the SQLite database (requires --sqlite) + #[arg(long)] + run_id: Option, }, } @@ -105,6 +118,7 @@ async fn main() -> Result<()> { only_security_mutations, disable_ast_filtering, add_expert_rule, + sqlite, } => { let skip_lines_map = if let Some(path) = skip_lines { read_skip_lines(&path)? @@ -155,6 +169,7 @@ async fn main() -> Result<()> { skip_lines_map, !disable_ast_filtering, add_expert_rule, + sqlite, ) .await?; } @@ -164,8 +179,17 @@ async fn main() -> Result<()> { jobs, command, survival_threshold, + sqlite, + run_id, } => { - analyze::run_analysis(folder, command, jobs, timeout, survival_threshold).await?; + if run_id.is_some() && sqlite.is_none() { + return Err(MutationError::InvalidInput( + "--run_id requires --sqlite ".to_string(), + )); + } + + analyze::run_analysis(folder, command, jobs, timeout, survival_threshold, sqlite, run_id) + .await?; } } diff --git a/src/mutation.rs b/src/mutation.rs index 3c8e7e6..8b456a4 100644 --- a/src/mutation.rs +++ b/src/mutation.rs @@ -1,6 +1,7 @@ use crate::ast_analysis::{filter_mutatable_lines, AridNodeDetector}; +use crate::db::{compute_patch_hash, generate_diff, Database, MutantData}; use crate::error::{MutationError, Result}; -use crate::git_changes::{get_changed_files, get_lines_touched}; +use crate::git_changes::{get_changed_files, get_commit_hash, get_lines_touched}; use crate::operators::{ get_do_not_mutate_patterns, get_do_not_mutate_py_patterns, get_do_not_mutate_unit_patterns, get_regex_operators, get_security_operators, get_skip_if_contain_patterns, get_test_operators, @@ -18,6 +19,9 @@ pub struct FileToMutate { pub is_unit_test: bool, } +/// Chunk size for DB batch inserts. +const DB_BATCH_SIZE: usize = 100; + pub async fn run_mutation( pr_number: Option, file: Option, @@ -29,12 +33,29 @@ pub async fn run_mutation( skip_lines: HashMap>, enable_ast_filtering: bool, custom_expert_rule: Option, + sqlite_path: Option, ) -> Result<()> { + // Set up database if requested. + let mut db_and_run: Option<(Database, i64)> = None; + if let Some(ref path) = sqlite_path { + let db = Database::open(path)?; + db.ensure_schema()?; + db.seed_projects()?; + let project_id = db.get_bitcoin_core_project_id()?; + let commit_hash = get_commit_hash().await.unwrap_or_else(|_| "unknown".to_string()); + let tool_version = env!("CARGO_PKG_VERSION"); + let run_id = db.create_run(project_id, &commit_hash, tool_version, pr_number)?; + println!("SQLite: created run id={} in {}", run_id, path.display()); + db_and_run = Some((db, run_id)); + } + + let mut all_mutants: Vec = Vec::new(); + if let Some(file_path) = file { let file_str = file_path.to_string_lossy().to_string(); let is_unit_test = file_str.contains("test") && !file_str.contains(".py"); - mutate_file( + let mutants = mutate_file( &file_str, None, None, @@ -48,56 +69,71 @@ pub async fn run_mutation( custom_expert_rule, ) .await?; - return Ok(()); - } + all_mutants.extend(mutants); + } else { + let files_changed = get_changed_files(pr_number).await?; + let mut files_to_mutate = Vec::new(); + + for file_changed in files_changed { + // Skip certain file types + if file_changed.contains("doc") + || file_changed.contains("fuzz") + || file_changed.contains("bench") + || file_changed.contains("util") + || file_changed.contains("sanitizer_supressions") + || file_changed.contains("test_framework.py") + || file_changed.ends_with(".txt") + { + continue; + } - let files_changed = get_changed_files(pr_number).await?; - let mut files_to_mutate = Vec::new(); - - for file_changed in files_changed { - // Skip certain file types - if file_changed.contains("doc") - || file_changed.contains("fuzz") - || file_changed.contains("bench") - || file_changed.contains("util") - || file_changed.contains("sanitizer_supressions") - || file_changed.contains("test_framework.py") - || file_changed.ends_with(".txt") - { - continue; - } + let lines_touched = get_lines_touched(&file_changed).await?; + let is_unit_test = file_changed.contains("test") + && !file_changed.contains(".py") + && !file_changed.contains("util"); - let lines_touched = get_lines_touched(&file_changed).await?; - let is_unit_test = file_changed.contains("test") - && !file_changed.contains(".py") - && !file_changed.contains("util"); + if test_only && !(is_unit_test || file_changed.contains(".py")) { + continue; + } - if test_only && !(is_unit_test || file_changed.contains(".py")) { - continue; + files_to_mutate.push(FileToMutate { + file_path: file_changed, + lines_touched, + is_unit_test, + }); } - files_to_mutate.push(FileToMutate { - file_path: file_changed, - lines_touched, - is_unit_test, - }); + for file_info in files_to_mutate { + let mutants = mutate_file( + &file_info.file_path, + Some(file_info.lines_touched), + pr_number, + one_mutant, + only_security_mutations, + range_lines, + &coverage, + file_info.is_unit_test, + &skip_lines, + enable_ast_filtering, + custom_expert_rule.clone(), + ) + .await?; + all_mutants.extend(mutants); + } } - for file_info in files_to_mutate { - mutate_file( - &file_info.file_path, - Some(file_info.lines_touched), - pr_number, - one_mutant, - only_security_mutations, - range_lines, - &coverage, - file_info.is_unit_test, - &skip_lines, - enable_ast_filtering, - custom_expert_rule.clone(), - ) - .await?; + // Persist mutants to the database in chunks. + if let Some((ref mut db, run_id)) = db_and_run { + let total = all_mutants.len(); + let mut inserted = 0usize; + for chunk in all_mutants.chunks(DB_BATCH_SIZE) { + db.insert_mutant_batch(run_id, chunk)?; + inserted += chunk.len(); + } + println!( + "SQLite: inserted {}/{} mutants for run_id={}", + inserted, total, run_id + ); } Ok(()) @@ -115,7 +151,7 @@ pub async fn mutate_file( skip_lines: &HashMap>, enable_ast_filtering: bool, custom_expert_rule: Option, -) -> Result<()> { +) -> Result> { println!("\n\nGenerating mutants for {}...", file_to_mutate); let source_code = fs::read_to_string(file_to_mutate)?; @@ -210,6 +246,7 @@ pub async fn mutate_file( } let mut mutant_count = 0; + let mut collected: Vec = Vec::new(); if one_mutant { println!("One mutant mode enabled"); @@ -277,6 +314,23 @@ pub async fn mutate_file( range_lines, )?; + // Collect mutant metadata for DB persistence. + let diff = generate_diff( + file_to_mutate, + line_num, + line_before_mutation, + &line_mutated, + ); + let patch_hash = compute_patch_hash(&diff); + let operator_label = + format!("{} ==> {}", operator.pattern.as_str(), operator.replacement); + collected.push(MutantData { + diff, + patch_hash, + file_path: file_to_mutate.to_string(), + operator: operator_label, + }); + if one_mutant { break; // Break only from operator loop, continue to next line } @@ -303,7 +357,7 @@ pub async fn mutate_file( } println!("Generated {} mutants...", mutant_count); - Ok(()) + Ok(collected) } fn should_skip_line(line: &str, file_path: &str, is_unit_test: bool) -> Result { From 8121f4fae55107ba559087187b13add21296cb5e Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Sat, 28 Feb 2026 11:00:17 -0300 Subject: [PATCH 2/5] sqlite: persist --range in runs.config_json Add a config_json TEXT column to the runs table. When --range is supplied together with --sqlite, the range bounds are written as {"range":[start,end]} into that column, making the exact scope of every run reproducible and queryable. Schema migration: ensure_schema now issues ALTER TABLE runs ADD COLUMN config_json TEXT after the CREATE TABLE statement and silently ignores the "duplicate column name" error so existing databases are upgraded non-destructively without requiring a full schema recreation. Co-Authored-By: Claude Sonnet 4.6 --- src/db.rs | 25 ++++++++++++++++++++----- src/mutation.rs | 15 ++++++++++++++- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/db.rs b/src/db.rs index 04accf9..dd06f9c 100644 --- a/src/db.rs +++ b/src/db.rs @@ -20,7 +20,8 @@ CREATE TABLE IF NOT EXISTS runs ( commit_hash TEXT NOT NULL, pr_number INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - tool_version TEXT + tool_version TEXT, + config_json TEXT ); CREATE INDEX IF NOT EXISTS idx_runs_project_created ON runs(project_id, created_at DESC); @@ -74,9 +75,22 @@ impl Database { Ok(Database { conn }) } - /// Create tables and indexes if they do not yet exist. + /// Create tables and indexes if they do not yet exist, and apply any + /// additive migrations needed for older databases. pub fn ensure_schema(&self) -> Result<()> { self.conn.execute_batch(SCHEMA)?; + // Migration: add config_json to runs if the column is missing. + // ALTER TABLE ADD COLUMN fails with "duplicate column name" when the + // column already exists; silence that specific error so the function + // is idempotent on databases created before this column was added. + if let Err(e) = self + .conn + .execute_batch("ALTER TABLE runs ADD COLUMN config_json TEXT;") + { + if !e.to_string().contains("duplicate column name") { + return Err(e.into()); + } + } Ok(()) } @@ -106,11 +120,12 @@ impl Database { commit_hash: &str, tool_version: &str, pr_number: Option, + config_json: Option<&str>, ) -> Result { self.conn.execute( - "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number) - VALUES (?1, ?2, ?3, ?4)", - params![project_id, commit_hash, tool_version, pr_number], + "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number, config_json) + VALUES (?1, ?2, ?3, ?4, ?5)", + params![project_id, commit_hash, tool_version, pr_number, config_json], )?; Ok(self.conn.last_insert_rowid()) } diff --git a/src/mutation.rs b/src/mutation.rs index 8b456a4..fdf7d54 100644 --- a/src/mutation.rs +++ b/src/mutation.rs @@ -22,6 +22,12 @@ pub struct FileToMutate { /// Chunk size for DB batch inserts. const DB_BATCH_SIZE: usize = 100; +/// Serialize execution config options into a JSON string for the runs table. +/// Returns `None` when there is nothing worth recording. +fn build_config_json(range_lines: Option<(usize, usize)>) -> Option { + range_lines.map(|(start, end)| format!("{{\"range\":[{},{}]}}", start, end)) +} + pub async fn run_mutation( pr_number: Option, file: Option, @@ -44,7 +50,14 @@ pub async fn run_mutation( let project_id = db.get_bitcoin_core_project_id()?; let commit_hash = get_commit_hash().await.unwrap_or_else(|_| "unknown".to_string()); let tool_version = env!("CARGO_PKG_VERSION"); - let run_id = db.create_run(project_id, &commit_hash, tool_version, pr_number)?; + let config_json = build_config_json(range_lines); + let run_id = db.create_run( + project_id, + &commit_hash, + tool_version, + pr_number, + config_json.as_deref(), + )?; println!("SQLite: created run id={} in {}", run_id, path.display()); db_and_run = Some((db, run_id)); } From eb610dc8b776a687d903f7db74d6b056ad9403a1 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Sat, 28 Feb 2026 13:36:58 -0300 Subject: [PATCH 3/5] analyze: fix silent restore failure causing cascading patch errors restore_file discarded the bool returned by run_command, so a failed `git restore` was silently ignored and left the file in a mutated state. All subsequent mutants targeting the same file then failed with "patch does not apply" because the `-` line in their diff no longer matched the actual file content. Two fixes: - restore_file now returns an error when `git restore` exits non-zero. - run_db_analysis resets the file to HEAD at the start of each mutant iteration (soft pre-restore with a warning on failure) so a single bad restore cannot poison the rest of the run. Co-Authored-By: Claude Sonnet 4.6 --- src/analyze.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/analyze.rs b/src/analyze.rs index 07da547..8ff07f4 100644 --- a/src/analyze.rs +++ b/src/analyze.rs @@ -78,6 +78,17 @@ async fn run_db_analysis( for (i, mutant) in mutants.iter().enumerate() { println!("[{}/{}] Analyzing mutant id={}", i + 1, total, mutant.id); + // Determine the file path to restore later. + let file_path = mutant.file_path.as_deref().unwrap_or(""); + + // Ensure the file is at HEAD before applying the mutant diff. + // A previous mutant may have been left applied if restore silently failed. + if !file_path.is_empty() { + if let Err(e) = restore_file(file_path).await { + eprintln!(" Warning: pre-restore failed for {}: {}", file_path, e); + } + } + // Update status to 'running' and record the command. db.update_mutant_status(mutant.id, "running", command)?; @@ -89,9 +100,6 @@ async fn run_db_analysis( continue; } - // Determine the file path to restore later. - let file_path = mutant.file_path.as_deref().unwrap_or(""); - // Run the test command. let killed = !run_command(command, timeout_secs).await?; @@ -367,7 +375,13 @@ fn get_command_to_kill(target_file_path: &str, jobs: u32) -> Result { async fn restore_file(target_file_path: &str) -> Result<()> { let restore_command = format!("git restore {}", target_file_path); - run_command(&restore_command, 30).await?; + let success = run_command(&restore_command, 30).await?; + if !success { + return Err(MutationError::Git(format!( + "git restore failed for {}", + target_file_path + ))); + } Ok(()) } From 1aeb25d7e923890c18f1b2a62777b07ce9bd8089 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Sat, 28 Feb 2026 15:46:18 -0300 Subject: [PATCH 4/5] sqlite: generate real git diff patches with context lines Replace the hand-crafted minimal diff in generate_diff with an actual `git diff --no-index` invocation against a temp file. This produces a proper unified diff with 3 context lines that `git apply` can reliably locate and apply, fixing silent patch failures during DB-based analysis. Co-Authored-By: Claude Sonnet 4.6 --- src/db.rs | 68 ++++++++++++++++++++++++++++++++++++++++--------- src/mutation.rs | 16 +++++++----- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/src/db.rs b/src/db.rs index dd06f9c..c0aea35 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,4 +1,4 @@ -use crate::error::Result; +use crate::error::{MutationError, Result}; use rusqlite::{params, Connection}; use sha2::{Digest, Sha256}; use std::path::Path; @@ -193,15 +193,59 @@ pub fn compute_patch_hash(diff: &str) -> String { format!("{:x}", hasher.finalize()) } -/// Build a minimal unified-diff patch for a single-line substitution. -/// The produced patch is suitable for `git apply`. -pub fn generate_diff( - file_path: &str, - line_num: usize, - original: &str, - mutated: &str, -) -> String { - format!( - "--- a/{file_path}\n+++ b/{file_path}\n@@ -{line_num},1 +{line_num},1 @@\n-{original}\n+{mutated}\n", - ) +/// Generate a proper unified diff by running `git diff --no-index` between the +/// original file on disk and a temp file containing `mutated_content`. +/// The resulting patch includes context lines and is suitable for `git apply`. +pub async fn generate_diff(file_path: &str, mutated_content: &str) -> Result { + use std::io::Write; + use tempfile::NamedTempFile; + use tokio::process::Command; + + let mut tmp = NamedTempFile::new()?; + tmp.write_all(mutated_content.as_bytes())?; + tmp.flush()?; + + let tmp_path = tmp.path().to_string_lossy().to_string(); + + // `git diff --no-index` exits with 1 when differences exist — that is expected. + let output = Command::new("git") + .args(["diff", "--no-index", "--", file_path, &tmp_path]) + .output() + .await + .map_err(|e| MutationError::Git(format!("git diff failed to spawn: {}", e)))?; + + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + + if stdout.is_empty() { + return Err(MutationError::Git(format!( + "git diff produced no output for {}", + file_path + ))); + } + + // Fix the temp-file path back to the real file path in the diff headers. + // `git diff --no-index` shows the second argument's path in `+++ b/` and + // `diff --git … b/…`; replace those with `file_path`. + let fixed = stdout + .lines() + .map(|line| { + if line.starts_with("+++ ") { + format!("+++ b/{}", file_path) + } else if line.starts_with("diff --git ") { + format!("diff --git a/{} b/{}", file_path, file_path) + } else { + line.to_string() + } + }) + .collect::>() + .join("\n"); + + // Preserve trailing newline present in git diff output. + let fixed = if stdout.ends_with('\n') { + fixed + "\n" + } else { + fixed + }; + + Ok(fixed) } diff --git a/src/mutation.rs b/src/mutation.rs index fdf7d54..4b0d7d5 100644 --- a/src/mutation.rs +++ b/src/mutation.rs @@ -328,12 +328,16 @@ pub async fn mutate_file( )?; // Collect mutant metadata for DB persistence. - let diff = generate_diff( - file_to_mutate, - line_num, - line_before_mutation, - &line_mutated, - ); + let diff = match generate_diff(file_to_mutate, &mutated_content).await { + Ok(d) => d, + Err(e) => { + eprintln!( + " Warning: could not generate diff for mutant at line {}: {}", + line_num, e + ); + continue; + } + }; let patch_hash = compute_patch_hash(&diff); let operator_label = format!("{} ==> {}", operator.pattern.as_str(), operator.replacement); From cb9c6c92f2fa7e76b6a2603995ad40f410846254 Mon Sep 17 00:00:00 2001 From: Bruno Garcia Date: Sat, 28 Feb 2026 16:31:21 -0300 Subject: [PATCH 5/5] analyze: add --file_path filter for DB-based analysis Allows narrowing a run's mutants to a single source file, enabling per-file analysis with different test commands on multi-file PRs. Co-Authored-By: Claude Sonnet 4.6 --- src/analyze.rs | 14 +++++++++---- src/db.rs | 55 ++++++++++++++++++++++++++++++++------------------ src/main.rs | 13 +++++++++++- 3 files changed, 57 insertions(+), 25 deletions(-) diff --git a/src/analyze.rs b/src/analyze.rs index 8ff07f4..66a2a75 100644 --- a/src/analyze.rs +++ b/src/analyze.rs @@ -17,6 +17,7 @@ pub async fn run_analysis( survival_threshold: f64, sqlite_path: Option, run_id: Option, + file_path: Option, ) -> Result<()> { // DB-based analysis mode: read mutants from DB and test them. if let (Some(ref path), Some(rid)) = (sqlite_path.as_ref(), run_id) { @@ -28,7 +29,7 @@ pub async fn run_analysis( let db = Database::open(path)?; db.ensure_schema()?; db.seed_projects()?; - return run_db_analysis(&db, rid, &command, timeout_secs).await; + return run_db_analysis(&db, rid, &command, timeout_secs, file_path.as_deref()).await; } // Folder-based analysis mode (existing behaviour). @@ -53,17 +54,22 @@ pub async fn run_analysis( Ok(()) } -/// Test all pending mutants in `run_id` from the database. +/// Test all pending mutants in `run_id` from the database, optionally filtered by `file_path`. async fn run_db_analysis( db: &Database, run_id: i64, command: &str, timeout_secs: u64, + file_path: Option<&str>, ) -> Result<()> { - let mutants = db.get_mutants_for_run(run_id)?; + let mutants = db.get_mutants_for_run(run_id, file_path)?; let total = mutants.len(); - println!("* {} MUTANTS in run_id={} *", total, run_id); + if let Some(fp) = file_path { + println!("* {} MUTANTS in run_id={} (file: {}) *", total, run_id, fp); + } else { + println!("* {} MUTANTS in run_id={} *", total, run_id); + } if total == 0 { return Err(MutationError::InvalidInput(format!( diff --git a/src/db.rs b/src/db.rs index c0aea35..7a40c3f 100644 --- a/src/db.rs +++ b/src/db.rs @@ -154,26 +154,41 @@ impl Database { Ok(()) } - /// Return all mutants belonging to `run_id`. - pub fn get_mutants_for_run(&self, run_id: i64) -> Result> { - let mut stmt = self.conn.prepare( - "SELECT id, diff, file_path - FROM mutants - WHERE run_id = ?1", - )?; - let rows = stmt.query_map(params![run_id], |row| { - Ok(MutantRow { - id: row.get(0)?, - diff: row.get(1)?, - file_path: row.get(2)?, - }) - })?; - - let mut result = Vec::new(); - for row in rows { - result.push(row?); - } - Ok(result) + /// Return mutants belonging to `run_id`, optionally filtered by `file_path`. + pub fn get_mutants_for_run( + &self, + run_id: i64, + file_path: Option<&str>, + ) -> Result> { + let sql = if file_path.is_some() { + "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1 AND file_path = ?2" + } else { + "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1" + }; + + let mut stmt = self.conn.prepare(sql)?; + + let rows: Vec = if let Some(fp) = file_path { + stmt.query_map(params![run_id, fp], |row| { + Ok(MutantRow { + id: row.get(0)?, + diff: row.get(1)?, + file_path: row.get(2)?, + }) + })? + .collect::>()? + } else { + stmt.query_map(params![run_id], |row| { + Ok(MutantRow { + id: row.get(0)?, + diff: row.get(1)?, + file_path: row.get(2)?, + }) + })? + .collect::>()? + }; + + Ok(rows) } /// Update the status and command_to_test for a single mutant. diff --git a/src/main.rs b/src/main.rs index 50b0845..5cdc607 100644 --- a/src/main.rs +++ b/src/main.rs @@ -99,6 +99,10 @@ enum Commands { /// Run ID to analyze from the SQLite database (requires --sqlite) #[arg(long)] run_id: Option, + + /// Only analyze mutants for this file path (requires --run_id) + #[arg(long)] + file_path: Option, }, } @@ -181,6 +185,7 @@ async fn main() -> Result<()> { survival_threshold, sqlite, run_id, + file_path, } => { if run_id.is_some() && sqlite.is_none() { return Err(MutationError::InvalidInput( @@ -188,7 +193,13 @@ async fn main() -> Result<()> { )); } - analyze::run_analysis(folder, command, jobs, timeout, survival_threshold, sqlite, run_id) + if file_path.is_some() && run_id.is_none() { + return Err(MutationError::InvalidInput( + "--file_path requires --run_id".to_string(), + )); + } + + analyze::run_analysis(folder, command, jobs, timeout, survival_threshold, sqlite, run_id, file_path) .await?; } }