From fc1d6c201fe0fce54e686f2d6e1a583b42cbeb50 Mon Sep 17 00:00:00 2001
From: Bruno Garcia <brunoely.gc@gmail.com>
Date: Sat, 28 Feb 2026 10:53:24 -0300
Subject: [PATCH 1/5] sqlite: add SQLite persistence for projects, runs, and
 mutants

Introduce --sqlite[=<path>] flag (default: mutation.db) on both the
mutate and analyze subcommands, backed by a new db module.

mutate:
- Opens/creates the database, applies schema (projects, runs, mutants
  with indexes and CHECK constraints), and seeds the Bitcoin Core
  project row via INSERT OR IGNORE.
- Creates a runs row capturing commit_hash, tool_version, and optional
  pr_number before generation begins.
- mutate_file now returns Vec<MutantData> (unified diff, SHA-256
  patch_hash, file_path, operator) alongside the existing file writes.
- Mutants are batch-inserted in chunks of 100 within transactions;
  duplicate (run_id, patch_hash) pairs are silently ignored.

analyze:
- New --sqlite <path> + --run_id <id> flags trigger DB-based analysis.
- Reads all mutants for the run, applies each diff via `git apply`
  (temp file), runs the supplied --command, updates status to
  killed/survived/error, then restores the file with git restore.
- --command is required in DB mode; --run_id without --sqlite is
  rejected with a clear error.

Other:
- get_commit_hash() added to git_changes (git rev-parse HEAD).
- MutationError gains a Rusqlite(#[from] rusqlite::Error) variant.
- rusqlite 0.32 (bundled) and sha2 0.10 added as dependencies.
- Running without --sqlite preserves existing behaviour exactly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 Cargo.toml         |   2 +
 src/analyze.rs     | 121 ++++++++++++++++++++++++++++
 src/db.rs          | 192 +++++++++++++++++++++++++++++++++++++++++++++
 src/error.rs       |   3 +
 src/git_changes.rs |   5 ++
 src/lib.rs         |  20 ++---
 src/main.rs        |  26 +++++-
 src/mutation.rs    | 148 +++++++++++++++++++++++-----------
 8 files changed, 460 insertions(+), 57 deletions(-)
 create mode 100644 src/db.rs
diff --git a/Cargo.toml b/Cargo.toml
index b6c606e..9393c89 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,6 +25,8 @@ chrono = { version = "0.4", features = ["serde"] }
 futures = "0.3"
 walkdir = "2.4"
 rayon = "1.8"
+rusqlite = { version = "0.32", features = ["bundled"] }
+sha2 = "0.10"
 tempfile = "3.8"
 
 [dev-dependencies]
diff --git a/src/analyze.rs b/src/analyze.rs
index c3ed569..07da547 100644
--- a/src/analyze.rs
+++ b/src/analyze.rs
@@ -1,8 +1,10 @@
+use crate::db::Database;
 use crate::error::{MutationError, Result};
 use crate::report::generate_report;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::time::Duration;
+use tempfile::NamedTempFile;
 use tokio::process::Command as TokioCommand;
 use tokio::time::timeout;
 use walkdir::WalkDir;
@@ -13,7 +15,23 @@ pub async fn run_analysis(
     jobs: u32,
     timeout_secs: u64,
     survival_threshold: f64,
+    sqlite_path: Option<PathBuf>,
+    run_id: Option<i64>,
 ) -> Result<()> {
+    // DB-based analysis mode: read mutants from DB and test them.
+    if let (Some(ref path), Some(rid)) = (sqlite_path.as_ref(), run_id) {
+        let command = command.ok_or_else(|| {
+            MutationError::InvalidInput(
+                "--command is required when using --sqlite with --run_id".to_string(),
+            )
+        })?;
+        let db = Database::open(path)?;
+        db.ensure_schema()?;
+        db.seed_projects()?;
+        return run_db_analysis(&db, rid, &command, timeout_secs).await;
+    }
+
+    // Folder-based analysis mode (existing behaviour).
     let folders = if let Some(folder_path) = folder {
         vec![folder_path]
     } else {
@@ -35,6 +53,109 @@ pub async fn run_analysis(
     Ok(())
 }
 
+/// Test all pending mutants in `run_id` from the database.
+async fn run_db_analysis(
+    db: &Database,
+    run_id: i64,
+    command: &str,
+    timeout_secs: u64,
+) -> Result<()> {
+    let mutants = db.get_mutants_for_run(run_id)?;
+    let total = mutants.len();
+
+    println!("* {} MUTANTS in run_id={} *", total, run_id);
+
+    if total == 0 {
+        return Err(MutationError::InvalidInput(format!(
+            "No mutants found for run_id={}",
+            run_id
+        )));
+    }
+
+    let mut num_killed: u64 = 0;
+    let mut num_survived: u64 = 0;
+
+    for (i, mutant) in mutants.iter().enumerate() {
+        println!("[{}/{}] Analyzing mutant id={}", i + 1, total, mutant.id);
+
+        // Update status to 'running' and record the command.
+        db.update_mutant_status(mutant.id, "running", command)?;
+
+        // Write the patch to a temp file and apply it with `git apply`.
+        let apply_result = apply_diff(&mutant.diff).await;
+        if let Err(ref e) = apply_result {
+            eprintln!("  Failed to apply diff for mutant {}: {}", mutant.id, e);
+            db.update_mutant_status(mutant.id, "error", command)?;
+            continue;
+        }
+
+        // Determine the file path to restore later.
+        let file_path = mutant.file_path.as_deref().unwrap_or("");
+
+        // Run the test command.
+        let killed = !run_command(command, timeout_secs).await?;
+
+        let new_status = if killed {
+            println!("  KILLED ✅");
+            num_killed += 1;
+            "killed"
+        } else {
+            println!("  NOT KILLED ❌");
+            num_survived += 1;
+            "survived"
+        };
+
+        db.update_mutant_status(mutant.id, new_status, command)?;
+
+        // Restore the modified file.
+        if !file_path.is_empty() {
+            restore_file(file_path).await?;
+        }
+    }
+
+    let score = if total > 0 {
+        num_killed as f64 / total as f64
+    } else {
+        0.0
+    };
+    println!(
+        "\nMUTATION SCORE: {:.2}% ({} killed / {} total)",
+        score * 100.0,
+        num_killed,
+        total
+    );
+    println!("Survived: {}", num_survived);
+
+    Ok(())
+}
+
+/// Apply a unified diff patch using `git apply`.
+async fn apply_diff(diff: &str) -> Result<()> {
+    use std::io::Write;
+
+    let mut tmp = NamedTempFile::new()?;
+    tmp.write_all(diff.as_bytes())?;
+    tmp.flush()?;
+
+    let tmp_path = tmp.path().to_path_buf();
+    // Keep `tmp` alive until after the command runs.
+    let output = TokioCommand::new("git")
+        .args(["apply", "--whitespace=nowarn", tmp_path.to_str().unwrap()])
+        .output()
+        .await
+        .map_err(|e| MutationError::Git(format!("git apply failed: {}", e)))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(MutationError::Git(format!(
+            "git apply error: {}",
+            stderr.trim()
+        )));
+    }
+
+    Ok(())
+}
+
 fn find_mutation_folders() -> Result<Vec<PathBuf>> {
     let mut folders = Vec::new();
 
diff --git a/src/db.rs b/src/db.rs
new file mode 100644
index 0000000..04accf9
--- /dev/null
+++ b/src/db.rs
@@ -0,0 +1,192 @@
+use crate::error::Result;
+use rusqlite::{params, Connection};
+use sha2::{Digest, Sha256};
+use std::path::Path;
+
+const SCHEMA: &str = "
+PRAGMA foreign_keys = ON;
+
+CREATE TABLE IF NOT EXISTS projects (
+  id              INTEGER PRIMARY KEY,
+  name            TEXT NOT NULL,
+  repository_url  TEXT,
+  UNIQUE(name),
+  UNIQUE(repository_url)
+);
+
+CREATE TABLE IF NOT EXISTS runs (
+  id              INTEGER PRIMARY KEY,
+  project_id      INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+  commit_hash     TEXT NOT NULL,
+  pr_number       INTEGER,
+  created_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+  tool_version    TEXT
+);
+
+CREATE INDEX IF NOT EXISTS idx_runs_project_created ON runs(project_id, created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_runs_commit ON runs(commit_hash);
+
+CREATE TABLE IF NOT EXISTS mutants (
+  id              INTEGER PRIMARY KEY,
+  run_id          INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+  diff            TEXT NOT NULL,
+  patch_hash      TEXT NOT NULL,
+  status          TEXT NOT NULL DEFAULT 'pending'
+                    CHECK (status IN ('pending','running','killed','survived',
+                                      'timeout','error','skipped','equivalent','unproductive')),
+  killed          INTEGER GENERATED ALWAYS AS (CASE WHEN status='killed' THEN 1 ELSE 0 END) VIRTUAL,
+  command_to_test TEXT,
+  file_path       TEXT,
+  operator        TEXT,
+  UNIQUE(run_id, patch_hash)
+);
+
+CREATE INDEX IF NOT EXISTS idx_mutants_run_status ON mutants(run_id, status);
+CREATE INDEX IF NOT EXISTS idx_mutants_file ON mutants(file_path);
+CREATE INDEX IF NOT EXISTS idx_mutants_operator ON mutants(operator);
+CREATE INDEX IF NOT EXISTS idx_mutants_killed ON mutants(killed);
+";
+
+/// Data collected during mutation for a single generated mutant.
+pub struct MutantData {
+    pub diff: String,
+    pub patch_hash: String,
+    pub file_path: String,
+    pub operator: String,
+}
+
+/// A mutant row read back from the database.
+pub struct MutantRow {
+    pub id: i64,
+    pub diff: String,
+    pub file_path: Option<String>,
+}
+
+pub struct Database {
+    conn: Connection,
+}
+
+impl Database {
+    /// Open (or create) the database at `path` and enable foreign keys.
+    pub fn open(path: &Path) -> Result<Self> {
+        let conn = Connection::open(path)?;
+        conn.execute_batch("PRAGMA foreign_keys = ON;")?;
+        Ok(Database { conn })
+    }
+
+    /// Create tables and indexes if they do not yet exist.
+    pub fn ensure_schema(&self) -> Result<()> {
+        self.conn.execute_batch(SCHEMA)?;
+        Ok(())
+    }
+
+    /// Insert the Bitcoin Core project row if not already present.
+    pub fn seed_projects(&self) -> Result<()> {
+        self.conn.execute(
+            "INSERT OR IGNORE INTO projects (name, repository_url) VALUES (?1, ?2)",
+            params!["Bitcoin Core", "https://github.com/bitcoin/bitcoin"],
+        )?;
+        Ok(())
+    }
+
+    /// Return the id of the Bitcoin Core project row.
+    pub fn get_bitcoin_core_project_id(&self) -> Result<i64> {
+        let id = self.conn.query_row(
+            "SELECT id FROM projects WHERE name = 'Bitcoin Core'",
+            [],
+            |row| row.get(0),
+        )?;
+        Ok(id)
+    }
+
+    /// Create a new run row and return its id.
+    pub fn create_run(
+        &self,
+        project_id: i64,
+        commit_hash: &str,
+        tool_version: &str,
+        pr_number: Option<u32>,
+    ) -> Result<i64> {
+        self.conn.execute(
+            "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number)
+             VALUES (?1, ?2, ?3, ?4)",
+            params![project_id, commit_hash, tool_version, pr_number],
+        )?;
+        Ok(self.conn.last_insert_rowid())
+    }
+
+    /// Batch-insert mutants under `run_id` using a single transaction.
+    /// Duplicates (same run_id + patch_hash) are silently ignored.
+    pub fn insert_mutant_batch(&mut self, run_id: i64, mutants: &[MutantData]) -> Result<()> {
+        let tx = self.conn.transaction()?;
+        {
+            let mut stmt = tx.prepare(
+                "INSERT OR IGNORE INTO mutants
+                   (run_id, diff, patch_hash, status, file_path, operator)
+                 VALUES (?1, ?2, ?3, 'pending', ?4, ?5)",
+            )?;
+            for m in mutants {
+                stmt.execute(params![
+                    run_id,
+                    m.diff,
+                    m.patch_hash,
+                    m.file_path,
+                    m.operator
+                ])?;
+            }
+        }
+        tx.commit()?;
+        Ok(())
+    }
+
+    /// Return all mutants belonging to `run_id`.
+    pub fn get_mutants_for_run(&self, run_id: i64) -> Result<Vec<MutantRow>> {
+        let mut stmt = self.conn.prepare(
+            "SELECT id, diff, file_path
+             FROM mutants
+             WHERE run_id = ?1",
+        )?;
+        let rows = stmt.query_map(params![run_id], |row| {
+            Ok(MutantRow {
+                id: row.get(0)?,
+                diff: row.get(1)?,
+                file_path: row.get(2)?,
+            })
+        })?;
+
+        let mut result = Vec::new();
+        for row in rows {
+            result.push(row?);
+        }
+        Ok(result)
+    }
+
+    /// Update the status and command_to_test for a single mutant.
+    pub fn update_mutant_status(&self, id: i64, status: &str, command: &str) -> Result<()> {
+        self.conn.execute(
+            "UPDATE mutants SET status = ?1, command_to_test = ?2 WHERE id = ?3",
+            params![status, command, id],
+        )?;
+        Ok(())
+    }
+}
+
+/// Compute the SHA-256 hex digest of `diff`.
+pub fn compute_patch_hash(diff: &str) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(diff.as_bytes());
+    format!("{:x}", hasher.finalize())
+}
+
+/// Build a minimal unified-diff patch for a single-line substitution.
+/// The produced patch is suitable for `git apply`.
+pub fn generate_diff(
+    file_path: &str,
+    line_num: usize,
+    original: &str,
+    mutated: &str,
+) -> String {
+    format!(
+        "--- a/{file_path}\n+++ b/{file_path}\n@@ -{line_num},1 +{line_num},1 @@\n-{original}\n+{mutated}\n",
+    )
+}
diff --git a/src/error.rs b/src/error.rs
index 36a61bd..69a9f6a 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -26,6 +26,9 @@ pub enum MutationError {
     #[error("Walkdir error: {0}")]
     Walkdir(#[from] walkdir::Error),
 
+    #[error("SQLite error: {0}")]
+    Rusqlite(#[from] rusqlite::Error),
+
     #[error("Other error: {0}")]
     Other(#[from] anyhow::Error),
 }
diff --git a/src/git_changes.rs b/src/git_changes.rs
index cfc3188..3d1802f 100644
--- a/src/git_changes.rs
+++ b/src/git_changes.rs
@@ -23,6 +23,11 @@ pub async fn run_git_command(args: &[&str]) -> Result<Vec<String>> {
     Ok(stdout.lines().map(|s| s.to_string()).collect())
 }
 
+pub async fn get_commit_hash() -> Result<String> {
+    let lines = run_git_command(&["rev-parse", "HEAD"]).await?;
+    Ok(lines.into_iter().next().unwrap_or_default())
+}
+
 pub async fn get_changed_files(pr_number: Option<u32>) -> Result<Vec<String>> {
     let mut used_remote = "upstream"; // Track which remote we successfully used
 
diff --git a/src/lib.rs b/src/lib.rs
index 295d75b..59d7f51 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -18,16 +18,17 @@
 //! async fn main() -> Result<(), Box<dyn std::error::Error>> {
 //!     // Generate mutants for a specific file with AST filtering
 //!     mutation::run_mutation(
-//!         None,                    // PR number
+//!         None,                        // PR number
 //!         Some("src/test.cpp".into()), // file path
-//!         false,                   // one_mutant
-//!         false,                   // only_security_mutations
-//!         None,                    // range_lines
-//!         None,                    // coverage
-//!         false,                   // test_only
-//!         HashMap::new(),          // skip_lines
-//!         true,                    // enable_ast_filtering
-//!         None,                    // custom_expert_rule
+//!         false,                       // one_mutant
+//!         false,                       // only_security_mutations
+//!         None,                        // range_lines
+//!         None,                        // coverage
+//!         false,                       // test_only
+//!         HashMap::new(),              // skip_lines
+//!         true,                        // enable_ast_filtering
+//!         None,                        // custom_expert_rule
+//!         None,                        // sqlite_path
 //!     ).await?;
 //!
 //!     Ok(())
@@ -37,6 +38,7 @@
 pub mod analyze;
 pub mod ast_analysis;
 pub mod coverage;
+pub mod db;
 pub mod error;
 pub mod git_changes;
 pub mod mutation;
diff --git a/src/main.rs b/src/main.rs
index 30d3a51..50b0845 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -5,6 +5,7 @@ use std::path::PathBuf;
 mod analyze;
 mod ast_analysis;
 mod coverage;
+mod db;
 mod error;
 mod git_changes;
 mod mutation;
@@ -64,6 +65,10 @@ enum Commands {
         /// Add custom expert rule for arid node detection
         #[arg(long, value_name = "PATTERN")]
         add_expert_rule: Option<String>,
+
+        /// Persist results to a SQLite database (default path: mutation.db)
+        #[arg(long, value_name = "PATH", num_args = 0..=1, default_missing_value = "mutation.db")]
+        sqlite: Option<PathBuf>,
     },
     /// Analyze mutants
     Analyze {
@@ -86,6 +91,14 @@ enum Commands {
         /// Maximum acceptable survival rate (0.3 = 30%)
         #[arg(long, default_value = "0.75")]
         survival_threshold: f64,
+
+        /// SQLite database path to read mutants from (requires --run_id)
+        #[arg(long, value_name = "PATH", num_args = 0..=1, default_missing_value = "mutation.db")]
+        sqlite: Option<PathBuf>,
+
+        /// Run ID to analyze from the SQLite database (requires --sqlite)
+        #[arg(long)]
+        run_id: Option<i64>,
     },
 }
 
@@ -105,6 +118,7 @@ async fn main() -> Result<()> {
             only_security_mutations,
             disable_ast_filtering,
             add_expert_rule,
+            sqlite,
         } => {
             let skip_lines_map = if let Some(path) = skip_lines {
                 read_skip_lines(&path)?
@@ -155,6 +169,7 @@ async fn main() -> Result<()> {
                 skip_lines_map,
                 !disable_ast_filtering,
                 add_expert_rule,
+                sqlite,
             )
             .await?;
         }
@@ -164,8 +179,17 @@ async fn main() -> Result<()> {
             jobs,
             command,
             survival_threshold,
+            sqlite,
+            run_id,
         } => {
-            analyze::run_analysis(folder, command, jobs, timeout, survival_threshold).await?;
+            if run_id.is_some() && sqlite.is_none() {
+                return Err(MutationError::InvalidInput(
+                    "--run_id requires --sqlite <path>".to_string(),
+                ));
+            }
+
+            analyze::run_analysis(folder, command, jobs, timeout, survival_threshold, sqlite, run_id)
+                .await?;
         }
     }
 
diff --git a/src/mutation.rs b/src/mutation.rs
index 3c8e7e6..8b456a4 100644
--- a/src/mutation.rs
+++ b/src/mutation.rs
@@ -1,6 +1,7 @@
 use crate::ast_analysis::{filter_mutatable_lines, AridNodeDetector};
+use crate::db::{compute_patch_hash, generate_diff, Database, MutantData};
 use crate::error::{MutationError, Result};
-use crate::git_changes::{get_changed_files, get_lines_touched};
+use crate::git_changes::{get_changed_files, get_commit_hash, get_lines_touched};
 use crate::operators::{
     get_do_not_mutate_patterns, get_do_not_mutate_py_patterns, get_do_not_mutate_unit_patterns,
     get_regex_operators, get_security_operators, get_skip_if_contain_patterns, get_test_operators,
@@ -18,6 +19,9 @@ pub struct FileToMutate {
     pub is_unit_test: bool,
 }
 
+/// Chunk size for DB batch inserts.
+const DB_BATCH_SIZE: usize = 100;
+
 pub async fn run_mutation(
     pr_number: Option<u32>,
     file: Option<PathBuf>,
@@ -29,12 +33,29 @@ pub async fn run_mutation(
     skip_lines: HashMap<String, Vec<usize>>,
     enable_ast_filtering: bool,
     custom_expert_rule: Option<String>,
+    sqlite_path: Option<PathBuf>,
 ) -> Result<()> {
+    // Set up database if requested.
+    let mut db_and_run: Option<(Database, i64)> = None;
+    if let Some(ref path) = sqlite_path {
+        let db = Database::open(path)?;
+        db.ensure_schema()?;
+        db.seed_projects()?;
+        let project_id = db.get_bitcoin_core_project_id()?;
+        let commit_hash = get_commit_hash().await.unwrap_or_else(|_| "unknown".to_string());
+        let tool_version = env!("CARGO_PKG_VERSION");
+        let run_id = db.create_run(project_id, &commit_hash, tool_version, pr_number)?;
+        println!("SQLite: created run id={} in {}", run_id, path.display());
+        db_and_run = Some((db, run_id));
+    }
+
+    let mut all_mutants: Vec<MutantData> = Vec::new();
+
     if let Some(file_path) = file {
         let file_str = file_path.to_string_lossy().to_string();
         let is_unit_test = file_str.contains("test") && !file_str.contains(".py");
 
-        mutate_file(
+        let mutants = mutate_file(
             &file_str,
             None,
             None,
@@ -48,56 +69,71 @@ pub async fn run_mutation(
             custom_expert_rule,
         )
         .await?;
-        return Ok(());
-    }
+        all_mutants.extend(mutants);
+    } else {
+        let files_changed = get_changed_files(pr_number).await?;
+        let mut files_to_mutate = Vec::new();
+
+        for file_changed in files_changed {
+            // Skip certain file types
+            if file_changed.contains("doc")
+                || file_changed.contains("fuzz")
+                || file_changed.contains("bench")
+                || file_changed.contains("util")
+                || file_changed.contains("sanitizer_supressions")
+                || file_changed.contains("test_framework.py")
+                || file_changed.ends_with(".txt")
+            {
+                continue;
+            }
 
-    let files_changed = get_changed_files(pr_number).await?;
-    let mut files_to_mutate = Vec::new();
-
-    for file_changed in files_changed {
-        // Skip certain file types
-        if file_changed.contains("doc")
-            || file_changed.contains("fuzz")
-            || file_changed.contains("bench")
-            || file_changed.contains("util")
-            || file_changed.contains("sanitizer_supressions")
-            || file_changed.contains("test_framework.py")
-            || file_changed.ends_with(".txt")
-        {
-            continue;
-        }
+            let lines_touched = get_lines_touched(&file_changed).await?;
+            let is_unit_test = file_changed.contains("test")
+                && !file_changed.contains(".py")
+                && !file_changed.contains("util");
 
-        let lines_touched = get_lines_touched(&file_changed).await?;
-        let is_unit_test = file_changed.contains("test")
-            && !file_changed.contains(".py")
-            && !file_changed.contains("util");
+            if test_only && !(is_unit_test || file_changed.contains(".py")) {
+                continue;
+            }
 
-        if test_only && !(is_unit_test || file_changed.contains(".py")) {
-            continue;
+            files_to_mutate.push(FileToMutate {
+                file_path: file_changed,
+                lines_touched,
+                is_unit_test,
+            });
         }
 
-        files_to_mutate.push(FileToMutate {
-            file_path: file_changed,
-            lines_touched,
-            is_unit_test,
-        });
+        for file_info in files_to_mutate {
+            let mutants = mutate_file(
+                &file_info.file_path,
+                Some(file_info.lines_touched),
+                pr_number,
+                one_mutant,
+                only_security_mutations,
+                range_lines,
+                &coverage,
+                file_info.is_unit_test,
+                &skip_lines,
+                enable_ast_filtering,
+                custom_expert_rule.clone(),
+            )
+            .await?;
+            all_mutants.extend(mutants);
+        }
     }
 
-    for file_info in files_to_mutate {
-        mutate_file(
-            &file_info.file_path,
-            Some(file_info.lines_touched),
-            pr_number,
-            one_mutant,
-            only_security_mutations,
-            range_lines,
-            &coverage,
-            file_info.is_unit_test,
-            &skip_lines,
-            enable_ast_filtering,
-            custom_expert_rule.clone(),
-        )
-        .await?;
+    // Persist mutants to the database in chunks.
+    if let Some((ref mut db, run_id)) = db_and_run {
+        let total = all_mutants.len();
+        let mut inserted = 0usize;
+        for chunk in all_mutants.chunks(DB_BATCH_SIZE) {
+            db.insert_mutant_batch(run_id, chunk)?;
+            inserted += chunk.len();
+        }
+        println!(
+            "SQLite: inserted {}/{} mutants for run_id={}",
+            inserted, total, run_id
+        );
     }
 
     Ok(())
@@ -115,7 +151,7 @@ pub async fn mutate_file(
     skip_lines: &HashMap<String, Vec<usize>>,
     enable_ast_filtering: bool,
     custom_expert_rule: Option<String>,
-) -> Result<()> {
+) -> Result<Vec<MutantData>> {
     println!("\n\nGenerating mutants for {}...", file_to_mutate);
 
     let source_code = fs::read_to_string(file_to_mutate)?;
@@ -210,6 +246,7 @@ pub async fn mutate_file(
     }
 
     let mut mutant_count = 0;
+    let mut collected: Vec<MutantData> = Vec::new();
 
     if one_mutant {
         println!("One mutant mode enabled");
@@ -277,6 +314,23 @@ pub async fn mutate_file(
                     range_lines,
                 )?;
 
+                // Collect mutant metadata for DB persistence.
+                let diff = generate_diff(
+                    file_to_mutate,
+                    line_num,
+                    line_before_mutation,
+                    &line_mutated,
+                );
+                let patch_hash = compute_patch_hash(&diff);
+                let operator_label =
+                    format!("{} ==> {}", operator.pattern.as_str(), operator.replacement);
+                collected.push(MutantData {
+                    diff,
+                    patch_hash,
+                    file_path: file_to_mutate.to_string(),
+                    operator: operator_label,
+                });
+
                 if one_mutant {
                     break; // Break only from operator loop, continue to next line
                 }
@@ -303,7 +357,7 @@ pub async fn mutate_file(
     }
 
     println!("Generated {} mutants...", mutant_count);
-    Ok(())
+    Ok(collected)
 }
 
 fn should_skip_line(line: &str, file_path: &str, is_unit_test: bool) -> Result<bool> {

From 8121f4fae55107ba559087187b13add21296cb5e Mon Sep 17 00:00:00 2001
From: Bruno Garcia <brunoely.gc@gmail.com>
Date: Sat, 28 Feb 2026 11:00:17 -0300
Subject: [PATCH 2/5] sqlite: persist --range in runs.config_json

Add a config_json TEXT column to the runs table. When --range is
supplied together with --sqlite, the range bounds are written as
{"range":[start,end]} into that column, making the exact scope of
every run reproducible and queryable.

Schema migration: ensure_schema now issues
  ALTER TABLE runs ADD COLUMN config_json TEXT
after the CREATE TABLE statement and silently ignores the
"duplicate column name" error so existing databases are upgraded
non-destructively without requiring a full schema recreation.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/db.rs       | 25 ++++++++++++++++++++-----
 src/mutation.rs | 15 ++++++++++++++-
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/db.rs b/src/db.rs
index 04accf9..dd06f9c 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -20,7 +20,8 @@ CREATE TABLE IF NOT EXISTS runs (
   commit_hash     TEXT NOT NULL,
   pr_number       INTEGER,
   created_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-  tool_version    TEXT
+  tool_version    TEXT,
+  config_json     TEXT
 );
 
 CREATE INDEX IF NOT EXISTS idx_runs_project_created ON runs(project_id, created_at DESC);
@@ -74,9 +75,22 @@ impl Database {
         Ok(Database { conn })
     }
 
-    /// Create tables and indexes if they do not yet exist.
+    /// Create tables and indexes if they do not yet exist, and apply any
+    /// additive migrations needed for older databases.
     pub fn ensure_schema(&self) -> Result<()> {
         self.conn.execute_batch(SCHEMA)?;
+        // Migration: add config_json to runs if the column is missing.
+        // ALTER TABLE ADD COLUMN fails with "duplicate column name" when the
+        // column already exists; silence that specific error so the function
+        // is idempotent on databases created before this column was added.
+        if let Err(e) = self
+            .conn
+            .execute_batch("ALTER TABLE runs ADD COLUMN config_json TEXT;")
+        {
+            if !e.to_string().contains("duplicate column name") {
+                return Err(e.into());
+            }
+        }
         Ok(())
     }
 
@@ -106,11 +120,12 @@ impl Database {
         commit_hash: &str,
         tool_version: &str,
         pr_number: Option<u32>,
+        config_json: Option<&str>,
     ) -> Result<i64> {
         self.conn.execute(
-            "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number)
-             VALUES (?1, ?2, ?3, ?4)",
-            params![project_id, commit_hash, tool_version, pr_number],
+            "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number, config_json)
+             VALUES (?1, ?2, ?3, ?4, ?5)",
+            params![project_id, commit_hash, tool_version, pr_number, config_json],
         )?;
         Ok(self.conn.last_insert_rowid())
     }
diff --git a/src/mutation.rs b/src/mutation.rs
index 8b456a4..fdf7d54 100644
--- a/src/mutation.rs
+++ b/src/mutation.rs
@@ -22,6 +22,12 @@ pub struct FileToMutate {
 /// Chunk size for DB batch inserts.
 const DB_BATCH_SIZE: usize = 100;
 
+/// Serialize execution config options into a JSON string for the runs table.
+/// Returns `None` when there is nothing worth recording.
+fn build_config_json(range_lines: Option<(usize, usize)>) -> Option<String> {
+    range_lines.map(|(start, end)| format!("{{\"range\":[{},{}]}}", start, end))
+}
+
 pub async fn run_mutation(
     pr_number: Option<u32>,
     file: Option<PathBuf>,
@@ -44,7 +50,14 @@ pub async fn run_mutation(
         let project_id = db.get_bitcoin_core_project_id()?;
         let commit_hash = get_commit_hash().await.unwrap_or_else(|_| "unknown".to_string());
         let tool_version = env!("CARGO_PKG_VERSION");
-        let run_id = db.create_run(project_id, &commit_hash, tool_version, pr_number)?;
+        let config_json = build_config_json(range_lines);
+        let run_id = db.create_run(
+            project_id,
+            &commit_hash,
+            tool_version,
+            pr_number,
+            config_json.as_deref(),
+        )?;
         println!("SQLite: created run id={} in {}", run_id, path.display());
         db_and_run = Some((db, run_id));
     }

From eb610dc8b776a687d903f7db74d6b056ad9403a1 Mon Sep 17 00:00:00 2001
From: Bruno Garcia <brunoely.gc@gmail.com>
Date: Sat, 28 Feb 2026 13:36:58 -0300
Subject: [PATCH 3/5] analyze: fix silent restore failure causing cascading
 patch errors

restore_file discarded the bool returned by run_command, so a failed
`git restore` was silently ignored and left the file in a mutated state.
All subsequent mutants targeting the same file then failed with
"patch does not apply" because the `-` line in their diff no longer
matched the actual file content.

Two fixes:
- restore_file now returns an error when `git restore` exits non-zero.
- run_db_analysis resets the file to HEAD at the start of each mutant
  iteration (soft pre-restore with a warning on failure) so a single
  bad restore cannot poison the rest of the run.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/analyze.rs | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/analyze.rs b/src/analyze.rs
index 07da547..8ff07f4 100644
--- a/src/analyze.rs
+++ b/src/analyze.rs
@@ -78,6 +78,17 @@ async fn run_db_analysis(
     for (i, mutant) in mutants.iter().enumerate() {
         println!("[{}/{}] Analyzing mutant id={}", i + 1, total, mutant.id);
 
+        // Determine the file path to restore later.
+        let file_path = mutant.file_path.as_deref().unwrap_or("");
+
+        // Ensure the file is at HEAD before applying the mutant diff.
+        // A previous mutant may have been left applied if restore silently failed.
+        if !file_path.is_empty() {
+            if let Err(e) = restore_file(file_path).await {
+                eprintln!("  Warning: pre-restore failed for {}: {}", file_path, e);
+            }
+        }
+
         // Update status to 'running' and record the command.
         db.update_mutant_status(mutant.id, "running", command)?;
 
@@ -89,9 +100,6 @@ async fn run_db_analysis(
             continue;
         }
 
-        // Determine the file path to restore later.
-        let file_path = mutant.file_path.as_deref().unwrap_or("");
-
         // Run the test command.
         let killed = !run_command(command, timeout_secs).await?;
 
@@ -367,7 +375,13 @@ fn get_command_to_kill(target_file_path: &str, jobs: u32) -> Result<String> {
 
 async fn restore_file(target_file_path: &str) -> Result<()> {
     let restore_command = format!("git restore {}", target_file_path);
-    run_command(&restore_command, 30).await?;
+    let success = run_command(&restore_command, 30).await?;
+    if !success {
+        return Err(MutationError::Git(format!(
+            "git restore failed for {}",
+            target_file_path
+        )));
+    }
     Ok(())
 }
 

From 1aeb25d7e923890c18f1b2a62777b07ce9bd8089 Mon Sep 17 00:00:00 2001
From: Bruno Garcia <brunoely.gc@gmail.com>
Date: Sat, 28 Feb 2026 15:46:18 -0300
Subject: [PATCH 4/5] sqlite: generate real git diff patches with context lines

Replace the hand-crafted minimal diff in generate_diff with an actual
`git diff --no-index` invocation against a temp file. This produces a
proper unified diff with 3 context lines that `git apply` can reliably
locate and apply, fixing silent patch failures during DB-based analysis.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/db.rs       | 68 ++++++++++++++++++++++++++++++++++++++++---------
 src/mutation.rs | 16 +++++++-----
 2 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/src/db.rs b/src/db.rs
index dd06f9c..c0aea35 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -1,4 +1,4 @@
-use crate::error::Result;
+use crate::error::{MutationError, Result};
 use rusqlite::{params, Connection};
 use sha2::{Digest, Sha256};
 use std::path::Path;
@@ -193,15 +193,59 @@ pub fn compute_patch_hash(diff: &str) -> String {
     format!("{:x}", hasher.finalize())
 }
 
-/// Build a minimal unified-diff patch for a single-line substitution.
-/// The produced patch is suitable for `git apply`.
-pub fn generate_diff(
-    file_path: &str,
-    line_num: usize,
-    original: &str,
-    mutated: &str,
-) -> String {
-    format!(
-        "--- a/{file_path}\n+++ b/{file_path}\n@@ -{line_num},1 +{line_num},1 @@\n-{original}\n+{mutated}\n",
-    )
+/// Generate a proper unified diff by running `git diff --no-index` between the
+/// original file on disk and a temp file containing `mutated_content`.
+/// The resulting patch includes context lines and is suitable for `git apply`.
+pub async fn generate_diff(file_path: &str, mutated_content: &str) -> Result<String> {
+    use std::io::Write;
+    use tempfile::NamedTempFile;
+    use tokio::process::Command;
+
+    let mut tmp = NamedTempFile::new()?;
+    tmp.write_all(mutated_content.as_bytes())?;
+    tmp.flush()?;
+
+    let tmp_path = tmp.path().to_string_lossy().to_string();
+
+    // `git diff --no-index` exits with 1 when differences exist — that is expected.
+    let output = Command::new("git")
+        .args(["diff", "--no-index", "--", file_path, &tmp_path])
+        .output()
+        .await
+        .map_err(|e| MutationError::Git(format!("git diff failed to spawn: {}", e)))?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+
+    if stdout.is_empty() {
+        return Err(MutationError::Git(format!(
+            "git diff produced no output for {}",
+            file_path
+        )));
+    }
+
+    // Fix the temp-file path back to the real file path in the diff headers.
+    // `git diff --no-index` shows the second argument's path in `+++ b/` and
+    // `diff --git … b/…`; replace those with `file_path`.
+    let fixed = stdout
+        .lines()
+        .map(|line| {
+            if line.starts_with("+++ ") {
+                format!("+++ b/{}", file_path)
+            } else if line.starts_with("diff --git ") {
+                format!("diff --git a/{} b/{}", file_path, file_path)
+            } else {
+                line.to_string()
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    // Preserve trailing newline present in git diff output.
+    let fixed = if stdout.ends_with('\n') {
+        fixed + "\n"
+    } else {
+        fixed
+    };
+
+    Ok(fixed)
 }
diff --git a/src/mutation.rs b/src/mutation.rs
index fdf7d54..4b0d7d5 100644
--- a/src/mutation.rs
+++ b/src/mutation.rs
@@ -328,12 +328,16 @@ pub async fn mutate_file(
                 )?;
 
                 // Collect mutant metadata for DB persistence.
-                let diff = generate_diff(
-                    file_to_mutate,
-                    line_num,
-                    line_before_mutation,
-                    &line_mutated,
-                );
+                let diff = match generate_diff(file_to_mutate, &mutated_content).await {
+                    Ok(d) => d,
+                    Err(e) => {
+                        eprintln!(
+                            "  Warning: could not generate diff for mutant at line {}: {}",
+                            line_num, e
+                        );
+                        continue;
+                    }
+                };
                 let patch_hash = compute_patch_hash(&diff);
                 let operator_label =
                     format!("{} ==> {}", operator.pattern.as_str(), operator.replacement);

From cb9c6c92f2fa7e76b6a2603995ad40f410846254 Mon Sep 17 00:00:00 2001
From: Bruno Garcia <brunoely.gc@gmail.com>
Date: Sat, 28 Feb 2026 16:31:21 -0300
Subject: [PATCH 5/5] analyze: add --file_path filter for DB-based analysis

Allows narrowing a run's mutants to a single source file, enabling
per-file analysis with different test commands on multi-file PRs.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/analyze.rs | 14 +++++++++----
 src/db.rs      | 55 ++++++++++++++++++++++++++++++++------------------
 src/main.rs    | 13 +++++++++++-
 3 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/src/analyze.rs b/src/analyze.rs
index 8ff07f4..66a2a75 100644
--- a/src/analyze.rs
+++ b/src/analyze.rs
@@ -17,6 +17,7 @@ pub async fn run_analysis(
     survival_threshold: f64,
     sqlite_path: Option<PathBuf>,
     run_id: Option<i64>,
+    file_path: Option<String>,
 ) -> Result<()> {
     // DB-based analysis mode: read mutants from DB and test them.
     if let (Some(ref path), Some(rid)) = (sqlite_path.as_ref(), run_id) {
@@ -28,7 +29,7 @@ pub async fn run_analysis(
         let db = Database::open(path)?;
         db.ensure_schema()?;
         db.seed_projects()?;
-        return run_db_analysis(&db, rid, &command, timeout_secs).await;
+        return run_db_analysis(&db, rid, &command, timeout_secs, file_path.as_deref()).await;
     }
 
     // Folder-based analysis mode (existing behaviour).
@@ -53,17 +54,22 @@ pub async fn run_analysis(
     Ok(())
 }
 
-/// Test all pending mutants in `run_id` from the database.
+/// Test all pending mutants in `run_id` from the database, optionally filtered by `file_path`.
 async fn run_db_analysis(
     db: &Database,
     run_id: i64,
     command: &str,
     timeout_secs: u64,
+    file_path: Option<&str>,
 ) -> Result<()> {
-    let mutants = db.get_mutants_for_run(run_id)?;
+    let mutants = db.get_mutants_for_run(run_id, file_path)?;
     let total = mutants.len();
 
-    println!("* {} MUTANTS in run_id={} *", total, run_id);
+    if let Some(fp) = file_path {
+        println!("* {} MUTANTS in run_id={} (file: {}) *", total, run_id, fp);
+    } else {
+        println!("* {} MUTANTS in run_id={} *", total, run_id);
+    }
 
     if total == 0 {
         return Err(MutationError::InvalidInput(format!(
diff --git a/src/db.rs b/src/db.rs
index c0aea35..7a40c3f 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -154,26 +154,41 @@ impl Database {
         Ok(())
     }
 
-    /// Return all mutants belonging to `run_id`.
-    pub fn get_mutants_for_run(&self, run_id: i64) -> Result<Vec<MutantRow>> {
-        let mut stmt = self.conn.prepare(
-            "SELECT id, diff, file_path
-             FROM mutants
-             WHERE run_id = ?1",
-        )?;
-        let rows = stmt.query_map(params![run_id], |row| {
-            Ok(MutantRow {
-                id: row.get(0)?,
-                diff: row.get(1)?,
-                file_path: row.get(2)?,
-            })
-        })?;
-
-        let mut result = Vec::new();
-        for row in rows {
-            result.push(row?);
-        }
-        Ok(result)
+    /// Return mutants belonging to `run_id`, optionally filtered by `file_path`.
+    pub fn get_mutants_for_run(
+        &self,
+        run_id: i64,
+        file_path: Option<&str>,
+    ) -> Result<Vec<MutantRow>> {
+        let sql = if file_path.is_some() {
+            "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1 AND file_path = ?2"
+        } else {
+            "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1"
+        };
+
+        let mut stmt = self.conn.prepare(sql)?;
+
+        let rows: Vec<MutantRow> = if let Some(fp) = file_path {
+            stmt.query_map(params![run_id, fp], |row| {
+                Ok(MutantRow {
+                    id: row.get(0)?,
+                    diff: row.get(1)?,
+                    file_path: row.get(2)?,
+                })
+            })?
+            .collect::<rusqlite::Result<_>>()?
+        } else {
+            stmt.query_map(params![run_id], |row| {
+                Ok(MutantRow {
+                    id: row.get(0)?,
+                    diff: row.get(1)?,
+                    file_path: row.get(2)?,
+                })
+            })?
+            .collect::<rusqlite::Result<_>>()?
+        };
+
+        Ok(rows)
     }
 
     /// Update the status and command_to_test for a single mutant.
diff --git a/src/main.rs b/src/main.rs
index 50b0845..5cdc607 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -99,6 +99,10 @@ enum Commands {
         /// Run ID to analyze from the SQLite database (requires --sqlite)
         #[arg(long)]
         run_id: Option<i64>,
+
+        /// Only analyze mutants for this file path (requires --run_id)
+        #[arg(long)]
+        file_path: Option<String>,
     },
 }
 
@@ -181,6 +185,7 @@ async fn main() -> Result<()> {
             survival_threshold,
             sqlite,
             run_id,
+            file_path,
         } => {
             if run_id.is_some() && sqlite.is_none() {
                 return Err(MutationError::InvalidInput(
@@ -188,7 +193,13 @@ async fn main() -> Result<()> {
                 ));
             }
 
-            analyze::run_analysis(folder, command, jobs, timeout, survival_threshold, sqlite, run_id)
+            if file_path.is_some() && run_id.is_none() {
+                return Err(MutationError::InvalidInput(
+                    "--file_path requires --run_id".to_string(),
+                ));
+            }
+
+            analyze::run_analysis(folder, command, jobs, timeout, survival_threshold, sqlite, run_id, file_path)
                 .await?;
         }
     }