diff --git a/reproduce_experiments.py b/reproduce_experiments.py
index 3ff1051..e6fb2ad 100644
--- a/reproduce_experiments.py
+++ b/reproduce_experiments.py
@@ -3,22 +3,22 @@
 M2Sim Reproducible Experiments Script
 
 This script reproduces all experiments from the M2Sim paper, including:
-1. Building the simulator and benchmarks
-2. Running accuracy validation experiments
+1. Building the simulator
+2. Running accuracy validation experiments (via accuracy_report.py)
 3. Generating figures and analysis
 4. Creating the final paper
 
+The accuracy experiments delegate to benchmarks/native/accuracy_report.py,
+which runs actual Go test-based timing simulations and compares CPI values
+against real M2 hardware calibration baselines.
+
 Usage:
-    python3 reproduce_experiments.py [--skip-build] [--skip-experiments] [--skip-figures]
+    python3 reproduce_experiments.py [--skip-build] [--skip-experiments] [--skip-figures] [--skip-paper]
 
 Requirements:
     - Go 1.21 or later
     - Python 3.8+ with matplotlib, seaborn, pandas, numpy
     - LaTeX distribution (for paper compilation)
-    - aarch64-linux-musl-gcc (for ARM64 cross-compilation)
-
-Authors: M2Sim Agent Team
-Date: February 12, 2026
 """
 
 import os
@@ -28,7 +28,12 @@
 import json
 import argparse
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Dict
+
+
+# Resolve repo root (directory containing this script)
+REPO_ROOT = Path(__file__).resolve().parent
+
 
 class Colors:
     """ANSI color codes for terminal output"""
@@ -39,6 +44,7 @@ class Colors:
     BOLD = '\033[1m'
     END = '\033[0m'
 
+
 def log(message: str, level: str = "INFO"):
     """Print colored log message"""
     color_map = {
@@ -48,365 +54,283 @@ def log(message: str, level: str = "INFO"):
         "ERROR": Colors.RED,
         "HEADER": Colors.BOLD
     }
-
     color = color_map.get(level, Colors.END)
     timestamp = time.strftime("%H:%M:%S")
     print(f"{color}[{timestamp}] {level}: {message}{Colors.END}")
 
-def run_command(cmd: str, cwd: Path = None, check: bool = True) -> subprocess.CompletedProcess:
-    """Run shell command with logging"""
-    log(f"Running: {cmd}")
 
+def run_command(cmd, cwd=None, check=True, timeout=600):
+    """Run a command with logging.
+
+    Args:
+        cmd: Either a list of arguments or a string (run via shell).
+        cwd: Working directory.
+        check: Raise on non-zero exit.
+        timeout: Seconds before killing the process.
+    """
+    if isinstance(cmd, str):
+        display = cmd
+    else:
+        display = " ".join(str(c) for c in cmd)
+    log(f"Running: {display}")
     if cwd:
-        log(f"Working directory: {cwd}")
+        log(f"  cwd: {cwd}")
 
+    use_shell = isinstance(cmd, str)
     try:
         result = subprocess.run(
-            cmd.split(),
+            cmd,
             cwd=cwd,
             capture_output=True,
             text=True,
-            check=check
+            check=check,
+            shell=use_shell,
+            timeout=timeout,
         )
-
         if result.stdout.strip():
-            for line in result.stdout.strip().split('\n'):
+            for line in result.stdout.strip().split('\n')[:50]:
                 log(f"  {line}")
-
+            total_lines = result.stdout.strip().count('\n') + 1
+            if total_lines > 50:
+                log(f"  ... ({total_lines - 50} more lines)")
         return result
-
     except subprocess.CalledProcessError as e:
         log(f"Command failed with exit code {e.returncode}", "ERROR")
-        log(f"Stderr: {e.stderr}", "ERROR")
+        if e.stderr:
+            for line in e.stderr.strip().split('\n')[:20]:
+                log(f"  {line}", "ERROR")
         raise
 
+
 def check_dependencies():
     """Check required dependencies"""
     log("Checking dependencies...", "HEADER")
 
-    deps = {
-        "go": "go version",
-        "python3": "python3 --version",
-        "aarch64-linux-musl-gcc": "aarch64-linux-musl-gcc --version"
-    }
+    deps = [
+        ("go", ["go", "version"]),
+        ("python3", ["python3", "--version"]),
+    ]
 
     missing = []
-    for dep, cmd in deps.items():
+    for dep, cmd in deps:
         try:
-            run_command(cmd, check=False)
-            log(f"✓ {dep} found", "SUCCESS")
+            subprocess.run(cmd, capture_output=True, check=True)
+            log(f"  {dep} found", "SUCCESS")
         except (subprocess.CalledProcessError, FileNotFoundError):
-            log(f"✗ {dep} not found", "ERROR")
+            log(f"  {dep} not found", "ERROR")
             missing.append(dep)
 
     if missing:
         log(f"Missing dependencies: {', '.join(missing)}", "ERROR")
-        log("Please install missing dependencies and retry", "ERROR")
         return False
-
     return True
 
+
 def build_simulator():
-    """Build M2Sim and all components"""
+    """Build M2Sim and run short tests to verify."""
     log("Building M2Sim simulator...", "HEADER")
 
-    # Build all packages
-    run_command("go build ./...")
-    log("✓ All packages built", "SUCCESS")
-
-    # Build main simulator binary
-    run_command("go build -o m2sim ./cmd/m2sim")
-    log("✓ M2Sim binary built", "SUCCESS")
+    run_command(["go", "build", "./..."], cwd=REPO_ROOT)
+    log("All packages built", "SUCCESS")
 
-    # Run tests to verify build
-    log("Running tests to verify build...")
+    log("Running short tests to verify build...")
     try:
-        run_command("go test ./... -short")
-        log("✓ Tests passed", "SUCCESS")
+        run_command(["go", "test", "./...", "-short", "-count=1"],
+                    cwd=REPO_ROOT, timeout=300)
+        log("Tests passed", "SUCCESS")
     except subprocess.CalledProcessError:
         log("Some tests failed - continuing anyway", "WARNING")
 
-def build_benchmarks():
-    """Build ARM64 benchmark binaries"""
-    log("Building ARM64 benchmarks...", "HEADER")
-
-    benchmark_dirs = [
-        "benchmarks/microbenchmarks",
-        "benchmarks/polybench"
-    ]
-
-    for bench_dir in benchmark_dirs:
-        bench_path = Path(bench_dir)
-        if not bench_path.exists():
-            log(f"Benchmark directory {bench_dir} not found - skipping", "WARNING")
-            continue
-
-        log(f"Building benchmarks in {bench_dir}...")
-
-        # Find C source files
-        c_files = list(bench_path.glob("*.c"))
-        if not c_files:
-            log(f"No C files found in {bench_dir}", "WARNING")
-            continue
 
-        for c_file in c_files:
-            elf_file = c_file.with_suffix(".elf")
-            cmd = f"aarch64-linux-musl-gcc -static -O2 -o {elf_file} {c_file}"
+def run_accuracy_experiments() -> Dict:
+    """Run accuracy experiments by delegating to accuracy_report.py.
 
-            try:
-                run_command(cmd, check=False)
-                log(f"✓ Built {elf_file.name}", "SUCCESS")
-            except subprocess.CalledProcessError:
-                log(f"✗ Failed to build {c_file.name}", "WARNING")
+    accuracy_report.py:
+      - Runs Go test-based timing simulations for all benchmarks
+      - Compares simulated CPI against real M2 hardware calibration baselines
+      - Generates accuracy_results.json, accuracy_report.md, accuracy_figure.png
 
-def run_accuracy_experiments() -> Dict:
-    """Run accuracy validation experiments"""
+    Returns a results dict with 'summary' and 'benchmarks' keys.
+    """
     log("Running accuracy validation experiments...", "HEADER")
+    log("Delegating to benchmarks/native/accuracy_report.py (runs real simulations)")
 
-    # Define benchmark suite
-    microbenchmarks = [
-        "arithmetic", "dependency", "branch", "memorystrided",
-        "loadheavy", "storeheavy", "branchheavy", "vectorsum",
-        "vectoradd", "reductiontree", "strideindirect"
-    ]
+    accuracy_script = REPO_ROOT / "benchmarks" / "native" / "accuracy_report.py"
+    if not accuracy_script.exists():
+        log(f"accuracy_report.py not found at {accuracy_script}", "ERROR")
+        sys.exit(1)
 
-    polybench = [
-        "atax", "bicg", "gemm", "mvt", "jacobi-1d", "2mm", "3mm"
-    ]
+    # Run accuracy_report.py — it produces accuracy_results.json
+    # Allow long timeout since each benchmark may take minutes
+    try:
+        run_command(
+            [sys.executable, str(accuracy_script)],
+            cwd=REPO_ROOT,
+            check=False,
+            timeout=3600,  # 1 hour for full benchmark suite
+        )
+    except subprocess.TimeoutExpired:
+        log("Accuracy experiments timed out after 1 hour", "ERROR")
+        sys.exit(1)
 
-    results = {"benchmarks": [], "summary": {}}
+    # Read the JSON results produced by accuracy_report.py
+    json_path = REPO_ROOT / "benchmarks" / "native" / "accuracy_results.json"
+    if not json_path.exists():
+        log(f"accuracy_results.json not found at {json_path}", "ERROR")
+        log("accuracy_report.py may have failed to produce results", "ERROR")
+        sys.exit(1)
 
-    # Run microbenchmarks
-    log("Running microbenchmarks...")
-    for bench in microbenchmarks:
-        elf_path = Path(f"benchmarks/microbenchmarks/{bench}.elf")
-        if elf_path.exists():
-            result = run_benchmark_timing(bench, elf_path)
-            if result:
-                results["benchmarks"].append(result)
-        else:
-            log(f"Benchmark {bench}.elf not found - using cached results", "WARNING")
-
-    # Run PolyBench
-    log("Running PolyBench suite...")
-    for bench in polybench:
-        elf_path = Path(f"benchmarks/polybench/{bench}.elf")
-        if elf_path.exists():
-            result = run_benchmark_timing(bench, elf_path)
-            if result:
-                results["benchmarks"].append(result)
-        else:
-            log(f"Benchmark {bench}.elf not found - using cached results", "WARNING")
-
-    # Calculate summary statistics
-    if results["benchmarks"]:
-        errors = [b["error"] for b in results["benchmarks"]]
-        results["summary"] = {
-            "total_benchmarks": len(errors),
-            "average_error": sum(errors) / len(errors),
-            "max_error": max(errors),
-            "min_error": min(errors)
+    with open(json_path) as f:
+        accuracy_data = json.load(f)
+
+    # Convert to the results format used by this script
+    benchmarks = []
+    for bench in accuracy_data.get("benchmarks", []):
+        benchmarks.append({
+            "name": bench["name"],
+            "error": bench["error"],
+            "sim_cpi": bench.get("sim_cpi", 0),
+            "sim_latency_ns": bench.get("sim_latency_ns", 0),
+            "real_latency_ns": bench.get("real_latency_ns", 0),
+            "calibrated": bench.get("calibrated", True),
+            "status": "completed",
+        })
+
+    summary = accuracy_data.get("summary", {})
+    errors = [b["error"] for b in benchmarks]
+
+    results = {
+        "benchmarks": benchmarks,
+        "summary": {
+            "total_benchmarks": summary.get("benchmark_count", len(benchmarks)),
+            "calibrated_benchmarks": summary.get("calibrated_count", len(benchmarks)),
+            "average_error": summary.get("average_error", sum(errors) / len(errors) if errors else 0),
+            "max_error": summary.get("max_error", max(errors) if errors else 0),
+            "min_error": min(errors) if errors else 0,
         }
+    }
 
-        log(f"Accuracy validation complete: {results['summary']['average_error']:.3f} average error", "SUCCESS")
-    else:
-        # Use cached results if no experiments ran
-        log("Using cached accuracy results", "WARNING")
-        results = load_cached_results()
+    log(f"Accuracy validation complete: {len(benchmarks)} benchmarks, "
+        f"{results['summary']['average_error'] * 100:.1f}% average error", "SUCCESS")
 
-    # Save results
-    with open("accuracy_results.json", "w") as f:
+    # Copy results to repo root for convenience
+    with open(REPO_ROOT / "accuracy_results.json", "w") as f:
         json.dump(results, f, indent=2)
 
     return results
 
-def run_benchmark_timing(name: str, elf_path: Path) -> Dict:
-    """Run timing experiment for a single benchmark"""
-    log(f"Running {name}...")
-
-    try:
-        # Run with timing simulation (fast timing mode for speed)
-        cmd = f"./m2sim -elf {elf_path} -fasttiming -limit 100000"
-        result = run_command(cmd, check=False)
-
-        if result.returncode == 0:
-            # Parse timing output (simplified - would need actual parser)
-            # For demo purposes, use synthetic data
-            simulated_error = generate_synthetic_error(name)
-
-            return {
-                "name": name,
-                "error": simulated_error,
-                "status": "completed"
-            }
-        else:
-            log(f"Simulation failed for {name}", "WARNING")
-            return None
-
-    except Exception as e:
-        log(f"Error running {name}: {e}", "ERROR")
-        return None
-
-def generate_synthetic_error(name: str) -> float:
-    """Generate synthetic error data for demonstration"""
-    # This would be replaced with actual simulation output parsing
-    error_map = {
-        "arithmetic": 0.0955,
-        "dependency": 0.0666,
-        "branch": 0.0127,
-        "memorystrided": 0.1077,
-        "loadheavy": 0.0342,
-        "storeheavy": 0.4743,
-        "branchheavy": 0.1613,
-        "vectorsum": 0.296,
-        "vectoradd": 0.2429,
-        "reductiontree": 0.061,
-        "strideindirect": 0.0312,
-        "atax": 0.3357,
-        "bicg": 0.2931,
-        "gemm": 0.1947,
-        "mvt": 0.2259,
-        "jacobi-1d": 0.1113,
-        "2mm": 0.1740,
-        "3mm": 0.1237
-    }
-
-    return error_map.get(name, 0.15)  # Default to 15% error
-
-def load_cached_results() -> Dict:
-    """Load cached accuracy results"""
-    try:
-        with open("h5_accuracy_results.json", "r") as f:
-            return json.load(f)
-    except FileNotFoundError:
-        # Return minimal results structure
-        return {
-            "summary": {
-                "total_benchmarks": 18,
-                "average_error": 0.169,
-                "max_error": 0.4743,
-                "min_error": 0.0127
-            },
-            "benchmarks": []
-        }
 
 def generate_figures():
-    """Generate paper figures"""
+    """Generate paper figures using paper/generate_figures.py."""
     log("Generating paper figures...", "HEADER")
 
-    figure_script = Path("paper/generate_figures.py")
+    figure_script = REPO_ROOT / "paper" / "generate_figures.py"
     if figure_script.exists():
         try:
-            run_command(f"python3 {figure_script}", cwd=Path("paper"))
-            log("✓ Paper figures generated", "SUCCESS")
+            run_command([sys.executable, str(figure_script)],
+                        cwd=REPO_ROOT / "paper", timeout=120)
+            log("Paper figures generated", "SUCCESS")
         except subprocess.CalledProcessError:
             log("Figure generation failed", "ERROR")
             raise
     else:
-        log("Figure generation script not found", "WARNING")
+        log("Figure generation script not found at paper/generate_figures.py", "WARNING")
+
 
 def compile_paper():
-    """Compile LaTeX paper"""
+    """Compile LaTeX paper with bibtex."""
     log("Compiling LaTeX paper...", "HEADER")
 
-    paper_tex = Path("paper/m2sim_micro2026.tex")
-    if paper_tex.exists():
-        try:
-            # Run pdflatex multiple times for references
-            for i in range(3):
-                run_command(f"pdflatex m2sim_micro2026.tex", cwd=Path("paper"))
-
-            # Check if PDF was generated
-            pdf_path = Path("paper/m2sim_micro2026.pdf")
-            if pdf_path.exists():
-                log("✓ Paper compiled successfully", "SUCCESS")
-                log(f"PDF available at: {pdf_path.absolute()}", "SUCCESS")
-            else:
-                log("PDF compilation failed", "ERROR")
+    paper_dir = REPO_ROOT / "paper"
+    paper_tex = paper_dir / "m2sim_micro2026.tex"
+    if not paper_tex.exists():
+        log("LaTeX source not found at paper/m2sim_micro2026.tex", "WARNING")
+        return
+
+    try:
+        # pdflatex → bibtex → pdflatex × 2 (standard LaTeX build)
+        run_command(["pdflatex", "-interaction=nonstopmode", "m2sim_micro2026.tex"],
+                    cwd=paper_dir, check=False)
+        run_command(["bibtex", "m2sim_micro2026"],
+                    cwd=paper_dir, check=False)
+        run_command(["pdflatex", "-interaction=nonstopmode", "m2sim_micro2026.tex"],
+                    cwd=paper_dir, check=False)
+        run_command(["pdflatex", "-interaction=nonstopmode", "m2sim_micro2026.tex"],
+                    cwd=paper_dir, check=False)
+
+        pdf_path = paper_dir / "m2sim_micro2026.pdf"
+        if pdf_path.exists():
+            log(f"Paper compiled: {pdf_path}", "SUCCESS")
+        else:
+            log("PDF not produced — check LaTeX logs", "ERROR")
+    except subprocess.CalledProcessError:
+        log("LaTeX compilation failed — is a TeX distribution installed?", "WARNING")
 
-        except subprocess.CalledProcessError:
-            log("LaTeX compilation failed - check for LaTeX installation", "WARNING")
-    else:
-        log("LaTeX source not found", "WARNING")
 
 def generate_experiment_report(results: Dict):
-    """Generate comprehensive experiment report"""
+    """Generate a human-readable experiment report from real results."""
     log("Generating experiment report...", "HEADER")
 
-    report_content = f"""# M2Sim Experiment Report
-
-**Generated:** {time.strftime("%Y-%m-%d %H:%M:%S")}
-**Reproducibility Script Version:** 1.0
-
-## Summary
-
-- **Total Benchmarks:** {results['summary']['total_benchmarks']}
-- **Average Error:** {results['summary']['average_error']:.3f} ({results['summary']['average_error']*100:.1f}%)
-- **Maximum Error:** {results['summary']['max_error']:.3f} ({results['summary']['max_error']*100:.1f}%)
-- **Minimum Error:** {results['summary']['min_error']:.3f} ({results['summary']['min_error']*100:.1f}%)
-
-## Target Achievement
-
-✅ **H5 Target Met**: Average error {results['summary']['average_error']*100:.1f}% < 20% target
-
-## Detailed Results
-
-| Benchmark | Error | Category |
-|-----------|--------|----------|
-"""
-
-    for bench in results['benchmarks']:
-        category = "Microbenchmark" if bench['name'] in [
-            "arithmetic", "dependency", "branch", "memorystrided",
-            "loadheavy", "storeheavy", "branchheavy", "vectorsum",
-            "vectoradd", "reductiontree", "strideindirect"
-        ] else "PolyBench"
-
-        report_content += f"| {bench['name']} | {bench['error']:.3f} ({bench['error']*100:.1f}%) | {category} |\n"
-
-    report_content += f"""
-
-## Reproduction Environment
-
-- **Operating System:** {os.uname().sysname} {os.uname().release}
-- **Architecture:** {os.uname().machine}
-- **Working Directory:** {Path.cwd().absolute()}
-- **Timestamp:** {time.strftime("%Y-%m-%d %H:%M:%S")}
-
-## Files Generated
-
-- `accuracy_results.json` - Raw experimental data
-- `paper/accuracy_overview.pdf` - Accuracy distribution figures
-- `paper/performance_characteristics.pdf` - Performance analysis figures
-- `paper/validation_methodology.pdf` - Methodology validation figures
-- `paper/simulation_architecture.pdf` - Architecture diagrams
-- `paper/m2sim_micro2026.pdf` - Complete research paper
-- `experiment_report.md` - This report
-
-## Reproducibility Notes
-
-This experiment reproduces the accuracy validation results from the M2Sim paper.
-All benchmarks were executed using the exact configuration described in the methodology.
-Results may vary slightly due to system differences but should remain within 1-2% of reported values.
-
-## Citation
-
-If you use M2Sim in your research, please cite:
+    summary = results["summary"]
+    benchmarks = results["benchmarks"]
+    avg_error = summary["average_error"]
+
+    # Determine target achievement
+    target_met = avg_error < 0.2
+    target_line = (f"{'PASS' if target_met else 'FAIL'}: "
+                   f"Average error {avg_error * 100:.1f}% {'<' if target_met else '>='} 20% target")
+
+    report_lines = [
+        "# M2Sim Experiment Report",
+        "",
+        f"**Generated:** {time.strftime('%Y-%m-%d %H:%M:%S')}",
+        "",
+        "## Summary",
+        "",
+        f"- **Total Benchmarks:** {summary['total_benchmarks']}",
+        f"- **Average Error:** {avg_error:.3f} ({avg_error * 100:.1f}%)",
+        f"- **Maximum Error:** {summary['max_error']:.3f} ({summary['max_error'] * 100:.1f}%)",
+        f"- **Minimum Error:** {summary['min_error']:.3f} ({summary['min_error'] * 100:.1f}%)",
+        "",
+        f"## Target: {target_line}",
+        "",
+        "## Detailed Results",
+        "",
+        "| Benchmark | Sim CPI | Sim (ns/inst) | Real (ns/inst) | Error | Calibrated |",
+        "|-----------|---------|---------------|----------------|-------|------------|",
+    ]
 
-```
-@inproceedings{{m2sim2026,
-  title={{M2Sim: Cycle-Accurate Apple M2 CPU Simulation with 16.9\% Average Timing Error}},
-  author={{M2Sim Team}},
-  booktitle={{Proceedings of the 59th IEEE/ACM International Symposium on Microarchitecture}},
-  year={{2026}}
-}}
-```
-"""
+    for bench in sorted(benchmarks, key=lambda b: b["name"]):
+        cal = "yes" if bench.get("calibrated", True) else "no"
+        report_lines.append(
+            f"| {bench['name']} | {bench.get('sim_cpi', 0):.3f} | "
+            f"{bench.get('sim_latency_ns', 0):.4f} | "
+            f"{bench.get('real_latency_ns', 0):.4f} | "
+            f"{bench['error'] * 100:.1f}% | {cal} |"
+        )
 
-    with open("experiment_report.md", "w") as f:
-        f.write(report_content)
+    report_lines.extend([
+        "",
+        "## Reproduction Environment",
+        "",
+        f"- **OS:** {os.uname().sysname} {os.uname().release}",
+        f"- **Arch:** {os.uname().machine}",
+        f"- **Directory:** {REPO_ROOT}",
+        "",
+        "## How Results Were Obtained",
+        "",
+        "Accuracy experiments were run by `benchmarks/native/accuracy_report.py`, which:",
+        "1. Runs `go test` timing simulations for each benchmark",
+        "2. Extracts simulated CPI from test output",
+        "3. Compares against real M2 hardware calibration baselines",
+        "4. Computes error = abs(t_sim - t_real) / min(t_sim, t_real)",
+        "",
+    ])
+
+    report_path = REPO_ROOT / "experiment_report.md"
+    report_path.write_text('\n'.join(report_lines))
+    log(f"Experiment report: {report_path}", "SUCCESS")
 
-    log("✓ Experiment report generated: experiment_report.md", "SUCCESS")
 
 def main():
     """Main experiment reproduction workflow"""
@@ -415,58 +339,65 @@ def main():
     parser.add_argument("--skip-experiments", action="store_true", help="Skip experiment execution")
     parser.add_argument("--skip-figures", action="store_true", help="Skip figure generation")
     parser.add_argument("--skip-paper", action="store_true", help="Skip paper compilation")
-
     args = parser.parse_args()
 
     log("M2Sim Reproducible Experiments", "HEADER")
-    log("==============================", "HEADER")
+    log("=" * 40, "HEADER")
 
     start_time = time.time()
 
     try:
-        # Check dependencies
         if not check_dependencies():
             sys.exit(1)
 
-        # Build phase
+        # Build
         if not args.skip_build:
             build_simulator()
-            build_benchmarks()
         else:
             log("Skipping build phase", "WARNING")
 
-        # Experiment execution phase
+        # Experiments
         if not args.skip_experiments:
             results = run_accuracy_experiments()
         else:
-            log("Skipping experiments, loading cached results", "WARNING")
-            results = load_cached_results()
+            log("Skipping experiments", "WARNING")
+            # Try to load previously generated results
+            cached = REPO_ROOT / "accuracy_results.json"
+            if cached.exists():
+                with open(cached) as f:
+                    results = json.load(f)
+                log(f"Loaded cached results from {cached}")
+            else:
+                log("No cached results found — run without --skip-experiments first", "ERROR")
+                sys.exit(1)
 
-        # Figure generation phase
+        # Figures
         if not args.skip_figures:
             generate_figures()
         else:
             log("Skipping figure generation", "WARNING")
 
-        # Paper compilation phase
+        # Paper
         if not args.skip_paper:
             compile_paper()
         else:
             log("Skipping paper compilation", "WARNING")
 
-        # Generate final report
+        # Report
         generate_experiment_report(results)
 
-        # Summary
+        # Done
         duration = time.time() - start_time
-        log("==============================", "HEADER")
-        log(f"Experiment reproduction completed in {duration:.1f} seconds", "SUCCESS")
-        log(f"Average accuracy: {results['summary']['average_error']*100:.1f}%", "SUCCESS")
-        log("All outputs generated successfully", "SUCCESS")
+        log("=" * 40, "HEADER")
+        log(f"Completed in {duration:.1f}s", "SUCCESS")
+        log(f"Average accuracy error: {results['summary']['average_error'] * 100:.1f}%", "SUCCESS")
 
     except Exception as e:
         log(f"Experiment reproduction failed: {e}", "ERROR")
+        import traceback
+        traceback.print_exc()
         sys.exit(1)
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/scripts/accuracy_validation.go b/scripts/accuracy_validation.go
index 0ab747b..685a25c 100644
--- a/scripts/accuracy_validation.go
+++ b/scripts/accuracy_validation.go
@@ -213,4 +213,4 @@ func main() {
 		fmt.Println("🚨 Performance optimizations may have introduced errors")
 		os.Exit(1)
 	}
-}
\ No newline at end of file
+}
diff --git a/scripts/decoder_validation/main.go b/scripts/decoder_validation/main.go
index 48e6d70..95b2bd1 100644
--- a/scripts/decoder_validation/main.go
+++ b/scripts/decoder_validation/main.go
@@ -70,4 +70,4 @@ func main() {
 	} else {
 		fmt.Printf("\n⚠️  WARNING: High allocation rate detected\n")
 	}
-}
\ No newline at end of file
+}
diff --git a/timing/pipeline/stages.go b/timing/pipeline/stages.go
index 76b144d..19f2012 100644
--- a/timing/pipeline/stages.go
+++ b/timing/pipeline/stages.go
@@ -28,7 +28,7 @@ type DecodeStage struct {
 	decoder *insts.Decoder
 	// Pool of pre-allocated instructions to avoid heap allocations during decode
 	// Supports up to 8 concurrent decode operations (for 8-wide superscalar pipelines)
-	instPool [8]insts.Instruction
+	instPool  [8]insts.Instruction
 	poolIndex int
 }