diff --git a/.github/workflows/accuracy-embench.yml b/.github/workflows/accuracy-embench.yml
new file mode 100644
index 0000000..648a395
--- /dev/null
+++ b/.github/workflows/accuracy-embench.yml
@@ -0,0 +1,113 @@
+name: Accuracy - EmBench
+
+on:
+ push:
+ branches: [main]
+ paths:
+ - 'benchmarks/aha-mont64-m2sim/**'
+ - 'benchmarks/crc32-m2sim/**'
+ - 'benchmarks/edn-m2sim/**'
+ - 'benchmarks/huffbench-m2sim/**'
+ - 'benchmarks/matmult-int-m2sim/**'
+ - 'benchmarks/statemate-m2sim/**'
+ - 'benchmarks/primecount-m2sim/**'
+ - 'benchmarks/embench_test.go'
+ - 'timing/**'
+ workflow_dispatch:
+
+concurrency:
+ group: accuracy-embench-${{ github.ref }}
+ cancel-in-progress: false
+
+jobs:
+ embench-accuracy:
+ name: EmBench Accuracy
+ runs-on: macos-14
+ timeout-minutes: 30
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-go@v5
+ with:
+ go-version: '1.25'
+
+ - name: Verify EmBench ELFs
+ run: |
+ echo "Checking EmBench ELF files..."
+ ls -la benchmarks/aha-mont64-m2sim/*.elf 2>/dev/null || echo "aha-mont64 missing"
+ ls -la benchmarks/crc32-m2sim/*.elf 2>/dev/null || echo "crc32 missing"
+ ls -la benchmarks/edn-m2sim/*.elf 2>/dev/null || echo "edn missing"
+ ls -la benchmarks/huffbench-m2sim/*.elf 2>/dev/null || echo "huffbench missing"
+ ls -la benchmarks/matmult-int-m2sim/*.elf 2>/dev/null || echo "matmult-int missing"
+ ls -la benchmarks/statemate-m2sim/*.elf 2>/dev/null || echo "statemate missing"
+ ls -la benchmarks/primecount-m2sim/*.elf 2>/dev/null || echo "primecount missing"
+
+ - name: Run EmBench tests
+ run: |
+ TESTS=(
+ TestEmbenchAhaMont64
+ TestEmbenchCRC32
+ TestEmbenchEDN
+ TestEmbenchHuffbench
+ TestEmbenchMatmultInt
+ TestEmbenchStatemate
+ TestEmbenchPrimecount
+ )
+
+ > embench_output.txt
+ for TEST in "${TESTS[@]}"; do
+ echo "--- $TEST ---"
+ go test -v -run "^${TEST}$" -count=1 -timeout 5m ./benchmarks/ 2>&1 | tee -a embench_output.txt || true
+ done
+
+ - name: Extract CPI results
+ if: always()
+ run: |
+ python3 - <<'PYEOF'
+ import json, re
+
+ results = {}
+ with open("embench_output.txt") as f:
+ for line in f:
+ if "CPI=" not in line:
+ continue
+ # Try to extract benchmark name and CPI
+ match = re.search(r'(\w+):\s+.*CPI=([\d.]+)', line)
+ if match:
+ name = match.group(1)
+ cpi = float(match.group(2))
+ results[name] = {"cpi": cpi}
+
+ output = {"benchmarks_run": len(results), "results": results}
+ with open("embench_results.json", "w") as f:
+ json.dump(output, f, indent=2)
+ print(json.dumps(output, indent=2))
+ PYEOF
+
+ - name: Post summary
+ if: always()
+ run: |
+ echo "## EmBench Accuracy Results" >> $GITHUB_STEP_SUMMARY
+ if [ -f embench_results.json ]; then
+ python3 -c "
+ import json
+ d = json.load(open('embench_results.json'))
+ print(f'**Benchmarks measured:** {d[\"benchmarks_run\"]}/7')
+ if d['results']:
+ print()
+ print('| Benchmark | CPI |')
+ print('|-----------|-----|')
+ for name, r in sorted(d['results'].items()):
+ print(f'| {name} | {r[\"cpi\"]:.3f} |')
+ " >> $GITHUB_STEP_SUMMARY
+ fi
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: embench-accuracy
+ path: |
+ embench_results.json
+ embench_output.txt
+ retention-days: 90
diff --git a/.github/workflows/accuracy-microbench.yml b/.github/workflows/accuracy-microbench.yml
new file mode 100644
index 0000000..552a56a
--- /dev/null
+++ b/.github/workflows/accuracy-microbench.yml
@@ -0,0 +1,82 @@
+name: Accuracy - Microbenchmarks
+
+on:
+ push:
+ branches: [main]
+ paths:
+ - 'benchmarks/**'
+ - 'timing/**'
+ - 'emu/**'
+ workflow_dispatch:
+
+concurrency:
+ group: accuracy-microbench-${{ github.ref }}
+ cancel-in-progress: false
+
+jobs:
+ microbench-accuracy:
+ name: Microbenchmark Accuracy
+ runs-on: macos-14
+ timeout-minutes: 15
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: '1.25'
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+
+ - name: Install Python dependencies
+ run: pip install matplotlib numpy scipy
+
+ - name: Run microbenchmark CPI tests
+ run: |
+ cd benchmarks
+ echo "=== Running microbenchmark CPI tests ==="
+
+ # Without D-cache (ALU, branch, throughput benchmarks)
+ go test -v -run TestTimingPredictions_CPIBounds -count=1 -timeout 5m ./ 2>&1 | tee micro_no_cache.txt
+
+ # With D-cache (memory-latency benchmarks)
+ go test -v -run TestAccuracyCPI_WithDCache -count=1 -timeout 5m ./ 2>&1 | tee micro_dcache.txt
+
+ - name: Generate accuracy report
+ run: |
+ python3 benchmarks/native/accuracy_report.py --suite microbench 2>&1 || true
+ # If the script doesn't support --suite yet, run it and it will
+ # naturally process microbenchmarks from test output
+
+ - name: Post summary
+ if: always()
+ run: |
+ echo "## Microbenchmark Accuracy Results" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ if [ -f benchmarks/native/accuracy_results.json ]; then
+ python3 -c "
+ import json
+ d = json.load(open('benchmarks/native/accuracy_results.json'))
+ print(f\"**Average Error:** {d['summary']['average_error']*100:.1f}%\")
+ print(f\"**Benchmarks:** {d['summary']['benchmark_count']}\")
+ " >> $GITHUB_STEP_SUMMARY
+ else
+ echo "No results generated." >> $GITHUB_STEP_SUMMARY
+ fi
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: accuracy-microbench
+ path: |
+ benchmarks/native/accuracy_report.md
+ benchmarks/native/accuracy_results.json
+ benchmarks/native/accuracy_figure.png
+ benchmarks/native/accuracy_normalized.pdf
+ retention-days: 90
diff --git a/.github/workflows/accuracy-polybench.yml b/.github/workflows/accuracy-polybench.yml
new file mode 100644
index 0000000..0de2e4d
--- /dev/null
+++ b/.github/workflows/accuracy-polybench.yml
@@ -0,0 +1,176 @@
+name: Accuracy - PolyBench
+
+on:
+ push:
+ branches: [main]
+ paths:
+ - 'benchmarks/polybench/**'
+ - 'benchmarks/polybench_test.go'
+ - 'benchmarks/timing_harness.go'
+ - 'timing/**'
+ workflow_dispatch:
+
+concurrency:
+ group: accuracy-polybench-${{ github.ref }}
+ cancel-in-progress: false
+
+jobs:
+ polybench-group-1:
+ name: PolyBench Group 1 (ATAX, BiCG, Jacobi1D)
+ runs-on: macos-14
+ timeout-minutes: 30
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-go@v5
+ with:
+ go-version: '1.25'
+
+ - name: Verify ELFs
+ run: |
+ ls -la benchmarks/polybench/atax_m2sim.elf
+ ls -la benchmarks/polybench/bicg_m2sim.elf
+ ls -la benchmarks/polybench/jacobi-1d_m2sim.elf
+
+ - name: Run tests
+ run: |
+ for TEST in TestPolybenchATAX TestPolybenchBiCG TestPolybenchJacobi1D; do
+ echo "--- $TEST ---"
+ go test -v -run "^${TEST}$" -count=1 -timeout 8m ./benchmarks/ 2>&1 | tee -a group1_output.txt || true
+ done
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: polybench-group-1
+ path: group1_output.txt
+ retention-days: 30
+
+ polybench-group-2:
+ name: PolyBench Group 2 (MVT, GEMM)
+ runs-on: macos-14
+ timeout-minutes: 30
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-go@v5
+ with:
+ go-version: '1.25'
+
+ - name: Verify ELFs
+ run: |
+ ls -la benchmarks/polybench/mvt_m2sim.elf
+ ls -la benchmarks/polybench/gemm_m2sim.elf
+
+ - name: Run tests
+ run: |
+ for TEST in TestPolybenchMVT TestPolybenchGEMM; do
+ echo "--- $TEST ---"
+ go test -v -run "^${TEST}$" -count=1 -timeout 8m ./benchmarks/ 2>&1 | tee -a group2_output.txt || true
+ done
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: polybench-group-2
+ path: group2_output.txt
+ retention-days: 30
+
+ polybench-group-3:
+ name: PolyBench Group 3 (2MM, 3MM)
+ runs-on: macos-14
+ timeout-minutes: 30
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-go@v5
+ with:
+ go-version: '1.25'
+
+ - name: Verify ELFs
+ run: |
+ ls -la benchmarks/polybench/2mm_m2sim.elf
+ ls -la benchmarks/polybench/3mm_m2sim.elf
+
+ - name: Run tests
+ run: |
+ for TEST in TestPolybench2MM TestPolybench3MM; do
+ echo "--- $TEST ---"
+ go test -v -run "^${TEST}$" -count=1 -timeout 8m ./benchmarks/ 2>&1 | tee -a group3_output.txt || true
+ done
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: polybench-group-3
+ path: group3_output.txt
+ retention-days: 30
+
+ consolidate:
+ name: Consolidate PolyBench Results
+ runs-on: ubuntu-latest
+ needs: [polybench-group-1, polybench-group-2, polybench-group-3]
+ if: always()
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Download results
+ uses: actions/download-artifact@v4
+ with:
+ path: group-results
+
+ - name: Extract CPI results
+ run: |
+ cat group-results/polybench-group-1/group1_output.txt > combined.txt 2>/dev/null || true
+ cat group-results/polybench-group-2/group2_output.txt >> combined.txt 2>/dev/null || true
+ cat group-results/polybench-group-3/group3_output.txt >> combined.txt 2>/dev/null || true
+
+ python3 - <<'PYEOF'
+ import json, re
+
+ results = {}
+ with open("combined.txt") as f:
+ for line in f:
+ if "CPI=" not in line:
+ continue
+ match = re.search(r'(polybench_\w+):\s+cycles=(\d+),\s+insts=(\d+),\s+CPI=([\d.]+)', line)
+ if match:
+ name = match.group(1).replace("polybench_", "")
+ if name == "jacobi1d":
+ name = "jacobi-1d"
+ results[name] = {
+ "cycles": int(match.group(2)),
+ "instructions": int(match.group(3)),
+ "cpi": float(match.group(4)),
+ }
+
+ output = {"benchmarks_run": len(results), "results": results}
+ with open("polybench_results.json", "w") as f:
+ json.dump(output, f, indent=2)
+ print(json.dumps(output, indent=2))
+ PYEOF
+
+ - name: Post summary
+ if: always()
+ run: |
+ echo "## PolyBench Accuracy Results" >> $GITHUB_STEP_SUMMARY
+ if [ -f polybench_results.json ]; then
+ python3 -c "
+ import json
+ d = json.load(open('polybench_results.json'))
+ print(f'**Benchmarks measured:** {d[\"benchmarks_run\"]}/7')
+ if d['results']:
+ print()
+ print('| Benchmark | CPI |')
+ print('|-----------|-----|')
+ for name, r in sorted(d['results'].items()):
+ print(f'| {name} | {r[\"cpi\"]:.3f} |')
+ " >> $GITHUB_STEP_SUMMARY
+ fi
+
+ - name: Upload consolidated results
+ uses: actions/upload-artifact@v4
+ with:
+ name: polybench-consolidated
+ path: polybench_results.json
+ retention-days: 90
diff --git a/.github/workflows/accuracy-report.yml b/.github/workflows/accuracy-report.yml
deleted file mode 100644
index ede2f02..0000000
--- a/.github/workflows/accuracy-report.yml
+++ /dev/null
@@ -1,156 +0,0 @@
-name: Accuracy Report
-
-on:
- push:
- branches: [main]
- workflow_dispatch: # Allow manual triggering
-
-concurrency:
- group: accuracy-report-${{ github.ref }}
- cancel-in-progress: false
-
-jobs:
- accuracy-report:
- name: Generate Accuracy Report
- runs-on: macos-14 # Apple Silicon runner
- timeout-minutes: 120 # Extended timeout for PolyBench + EmBench simulations
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install Python dependencies
- run: |
- pip install matplotlib numpy scipy
-
- - name: Install Ginkgo
- run: go install github.com/onsi/ginkgo/v2/ginkgo@latest
-
- - name: Run Accuracy Report
- run: |
- cd benchmarks/native
- echo "Starting accuracy report generation at $(date)"
- START_TIME=$(date +%s)
-
- # Run with timing and enhanced error reporting
- if ! python3 accuracy_report.py; then
- END_TIME=$(date +%s)
- RUNTIME=$((END_TIME - START_TIME))
- echo "❌ Accuracy report failed after ${RUNTIME}s"
- echo "Recent system performance metrics:"
- echo " - Available memory: $(vm_stat | grep 'free\|active\|inactive' | head -3)"
- echo " - CPU load: $(uptime)"
- echo "Checking for generated files:"
- ls -la accuracy_* || echo "No accuracy files found"
- exit 1
- fi
-
- END_TIME=$(date +%s)
- RUNTIME=$((END_TIME - START_TIME))
- echo "✅ Accuracy report completed successfully in ${RUNTIME}s"
-
- - name: Upload Accuracy Report
- uses: actions/upload-artifact@v4
- with:
- name: accuracy-report
- path: |
- benchmarks/native/accuracy_report.md
- benchmarks/native/accuracy_figure.png
- benchmarks/native/accuracy_results.json
- benchmarks/native/accuracy_normalized.pdf
- retention-days: 90
-
- - name: Post Report Summary
- if: always()
- run: |
- echo "## M2Sim Accuracy Report" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ -f benchmarks/native/accuracy_results.json ]; then
- AVG_ERROR=$(python3 -c "import json; d=json.load(open('benchmarks/native/accuracy_results.json')); print(f\"{d['summary']['average_error']*100:.1f}%\")")
- MAX_ERROR=$(python3 -c "import json; d=json.load(open('benchmarks/native/accuracy_results.json')); print(f\"{d['summary']['max_error']*100:.1f}%\")")
-
- echo "- **Average Error:** $AVG_ERROR" >> $GITHUB_STEP_SUMMARY
- echo "- **Max Error:** $MAX_ERROR" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- echo "See the uploaded artifacts for the full report and figures." >> $GITHUB_STEP_SUMMARY
- else
- echo "⚠️ Accuracy report generation failed." >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Commit updated reports to reports branch
- if: github.ref == 'refs/heads/main'
- run: |
- git config user.name "github-actions[bot]"
- git config user.email "github-actions[bot]@users.noreply.github.com"
-
- # Save generated report files to temp directory BEFORE switching branches
- # This avoids "local changes would be overwritten" errors
- TEMP_REPORTS=$(mktemp -d)
- cp benchmarks/native/accuracy_report.md "$TEMP_REPORTS/" 2>/dev/null || true
- cp benchmarks/native/accuracy_figure.png "$TEMP_REPORTS/" 2>/dev/null || true
- cp benchmarks/native/accuracy_results.json "$TEMP_REPORTS/" 2>/dev/null || true
- cp benchmarks/native/accuracy_normalized.pdf "$TEMP_REPORTS/" 2>/dev/null || true
-
- DATE=$(date +%Y-%m-%d)
- COMMIT_SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "initial")
-
- # Fetch the reports branch or create it
- git fetch origin reports || true
-
- # Stash any local changes before switching branches
- git stash --include-untracked || true
-
- if git show-ref --verify --quiet refs/remotes/origin/reports; then
- git checkout reports --
- git pull origin reports
- else
- git checkout --orphan reports
- git rm -rf . || true
- echo "# M2Sim Accuracy Reports" > README.md
- echo "" >> README.md
- echo "This branch contains historical accuracy reports." >> README.md
- git add README.md
- fi
-
- # Create dated directory
- REPORT_DIR="reports/${DATE}-${COMMIT_SHA}"
- mkdir -p "$REPORT_DIR"
-
- # Copy report files from temp directory
- cp "$TEMP_REPORTS/accuracy_report.md" "$REPORT_DIR/" 2>/dev/null || true
- cp "$TEMP_REPORTS/accuracy_figure.png" "$REPORT_DIR/" 2>/dev/null || true
- cp "$TEMP_REPORTS/accuracy_results.json" "$REPORT_DIR/" 2>/dev/null || true
- cp "$TEMP_REPORTS/accuracy_normalized.pdf" "$REPORT_DIR/" 2>/dev/null || true
-
- # Clean up temp directory
- rm -rf "$TEMP_REPORTS"
-
- # Update index
- echo "# M2Sim Accuracy Reports" > README.md
- echo "" >> README.md
- echo "| Date | Commit | Average Error | Max Error | Report |" >> README.md
- echo "|------|--------|---------------|-----------|--------|" >> README.md
-
- for dir in reports/*/; do
- if [ -f "${dir}accuracy_results.json" ]; then
- DIRNAME=$(basename "$dir")
- AVG=$(python3 -c "import json; d=json.load(open('${dir}accuracy_results.json')); print(f\"{d['summary']['average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
- MAX=$(python3 -c "import json; d=json.load(open('${dir}accuracy_results.json')); print(f\"{d['summary']['max_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
- echo "| ${DIRNAME%-*} | ${DIRNAME##*-} | $AVG | $MAX | [Report](reports/${DIRNAME}/accuracy_report.md) |" >> README.md
- fi
- done
-
- git add -A
- git commit -m "Update accuracy report for ${DATE}" || echo "No changes to commit"
- git push origin reports || echo "Failed to push (may need permissions)"
diff --git a/.github/workflows/calibration.yml b/.github/workflows/calibration.yml
new file mode 100644
index 0000000..a9538e3
--- /dev/null
+++ b/.github/workflows/calibration.yml
@@ -0,0 +1,125 @@
+name: Hardware Calibration
+
+on:
+ workflow_dispatch:
+ inputs:
+ suite:
+ description: 'Calibration suite to run'
+ type: choice
+ required: true
+ default: 'all'
+ options:
+ - all
+ - microbench
+ - polybench
+ - embench
+
+jobs:
+ microbench-calibration:
+ name: Microbenchmark Calibration
+ if: inputs.suite == 'all' || inputs.suite == 'microbench'
+ runs-on: macos-14
+ timeout-minutes: 60
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+ - run: pip install numpy scipy
+
+ - name: Verify ARM64
+ run: |
+ if [ "$(uname -m)" != "arm64" ]; then
+ echo "ERROR: Requires ARM64 (Apple Silicon)"
+ exit 1
+ fi
+
+ - name: Run memory benchmark calibration
+ run: |
+ cd benchmarks/native
+ python3 linear_calibration.py \
+ --benchmarks memorystrided loadheavy storeheavy branchheavy \
+ --runs 15 \
+ --output memory_calibration_results.json
+ timeout-minutes: 45
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: microbench-calibration
+ path: benchmarks/native/memory_calibration_results.json
+ retention-days: 90
+
+ polybench-calibration:
+ name: PolyBench Calibration
+ if: inputs.suite == 'all' || inputs.suite == 'polybench'
+ runs-on: macos-14
+ timeout-minutes: 45
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+ - run: pip install numpy scipy
+
+ - name: Verify ARM64
+ run: |
+ if [ "$(uname -m)" != "arm64" ]; then
+ echo "ERROR: Requires ARM64 (Apple Silicon)"
+ exit 1
+ fi
+
+ - name: Run PolyBench calibration
+ run: |
+ cd benchmarks/native
+ python3 polybench_calibration.py \
+ --runs 15 \
+ --output polybench_calibration_results.json
+ timeout-minutes: 40
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: polybench-calibration
+ path: benchmarks/native/polybench_calibration_results.json
+ retention-days: 90
+
+ embench-calibration:
+ name: EmBench Calibration
+ if: inputs.suite == 'all' || inputs.suite == 'embench'
+ runs-on: macos-14
+ timeout-minutes: 45
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+ - run: pip install numpy scipy
+
+ - name: Verify ARM64
+ run: |
+ if [ "$(uname -m)" != "arm64" ]; then
+ echo "ERROR: Requires ARM64 (Apple Silicon)"
+ exit 1
+ fi
+
+ - name: Run EmBench calibration
+ run: |
+ cd benchmarks/native
+ python3 embench_calibration.py \
+ --runs 15 \
+ --output embench_calibration_results.json
+ timeout-minutes: 40
+
+ - name: Upload results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: embench-calibration
+ path: benchmarks/native/embench_calibration_results.json
+ retention-days: 90
diff --git a/.github/workflows/ci-health-monitor.yml b/.github/workflows/ci-health-monitor.yml
deleted file mode 100644
index bb1b8ae..0000000
--- a/.github/workflows/ci-health-monitor.yml
+++ /dev/null
@@ -1,272 +0,0 @@
-name: CI Health Monitor
-
-on:
- schedule:
- - cron: '0 */6 * * *' # Every 6 hours
- workflow_dispatch:
-
-jobs:
- ci-health-check:
- name: CI Health Assessment
- runs-on: ubuntu-latest
- timeout-minutes: 10
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Analyze recent CI runs
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- # Get recent workflow runs for all critical workflows
- echo "Analyzing CI health for the last 24 hours..."
-
- python3 - <<'PYEOF'
- import json
- import subprocess
- import sys
- from datetime import datetime, timedelta
-
- def get_recent_runs(workflow_name, hours=24):
- """Get recent runs for a specific workflow"""
- cmd = ['gh', 'run', 'list', '--workflow', workflow_name, '--json', 'status,conclusion,createdAt,number,name', '--limit', '20']
- result = subprocess.run(cmd, capture_output=True, text=True)
- if result.returncode != 0:
- print(f"Warning: Could not fetch runs for {workflow_name}")
- return []
-
- runs = json.loads(result.stdout)
- cutoff = datetime.now().replace(tzinfo=None) - timedelta(hours=hours)
-
- recent_runs = []
- for run in runs:
- created_at = datetime.fromisoformat(run['createdAt'].replace('Z', '+00:00')).replace(tzinfo=None)
- if created_at > cutoff:
- recent_runs.append(run)
-
- return recent_runs
-
- # Critical workflows to monitor (using display names from workflow YAML)
- workflows = [
- 'CI',
- 'H5 Accuracy Report',
- 'PolyBench Simulation Measurements',
- 'CPI Comparison',
- 'Matmul Calibration'
- ]
-
- health_report = {
- "timestamp": datetime.now().isoformat(),
- "period_hours": 24,
- "workflows": {},
- "overall_health": "unknown",
- "alerts": []
- }
-
- total_runs = 0
- failed_runs = 0
- timeout_failures = 0
-
- for workflow in workflows:
- runs = get_recent_runs(workflow)
- if not runs:
- health_report["workflows"][workflow] = {
- "runs": 0,
- "success_rate": 0.0,
- "status": "no_data"
- }
- continue
-
- completed_runs = [r for r in runs if r['status'] == 'completed']
- successful_runs = [r for r in completed_runs if r['conclusion'] == 'success']
- failed_runs_count = len([r for r in completed_runs if r['conclusion'] in ['failure', 'cancelled', 'timed_out']])
-
- success_rate = len(successful_runs) / len(completed_runs) if completed_runs else 0.0
-
- health_report["workflows"][workflow] = {
- "runs": len(runs),
- "completed": len(completed_runs),
- "successful": len(successful_runs),
- "failed": failed_runs_count,
- "success_rate": success_rate,
- "status": "healthy" if success_rate >= 0.8 else "degraded" if success_rate >= 0.5 else "critical"
- }
-
- total_runs += len(completed_runs)
- failed_runs += failed_runs_count
-
- # Check for timeout patterns
- timeout_runs = [r for r in completed_runs if r['conclusion'] == 'timed_out']
- if timeout_runs:
- timeout_failures += len(timeout_runs)
- health_report["alerts"].append(f"{workflow}: {len(timeout_runs)} timeout failures detected")
-
- # Check for low success rates
- if success_rate < 0.7:
- health_report["alerts"].append(f"{workflow}: Low success rate ({success_rate:.1%})")
-
- # Calculate overall health
- overall_success_rate = (total_runs - failed_runs) / total_runs if total_runs > 0 else 0.0
-
- if overall_success_rate >= 0.9:
- health_report["overall_health"] = "healthy"
- elif overall_success_rate >= 0.7:
- health_report["overall_health"] = "degraded"
- else:
- health_report["overall_health"] = "critical"
-
- health_report["summary"] = {
- "total_runs": total_runs,
- "failed_runs": failed_runs,
- "timeout_failures": timeout_failures,
- "overall_success_rate": overall_success_rate
- }
-
- # Write health report
- with open("ci_health_report.json", "w") as f:
- json.dump(health_report, f, indent=2)
-
- print("=== CI Health Report ===")
- print(json.dumps(health_report, indent=2))
-
- # Exit with error code if critical issues detected
- if health_report["overall_health"] == "critical" or timeout_failures > 3:
- print(f"\n❌ CRITICAL: CI health is {health_report['overall_health']}")
- if timeout_failures > 3:
- print(f"❌ CRITICAL: {timeout_failures} timeout failures detected")
- sys.exit(1)
- elif health_report["overall_health"] == "degraded":
- print(f"\n⚠️ WARNING: CI health is degraded")
- else:
- print(f"\n✅ CI health is good")
- PYEOF
-
- - name: Check for silent failures
- run: |
- echo "Checking for silent failure patterns..."
-
- # Check if any critical workflows haven't run recently when they should have
- python3 - <<'PYEOF'
- import json
- import subprocess
- from datetime import datetime, timedelta
-
- def check_workflow_freshness():
- """Check if critical workflows have run recently enough"""
- alerts = []
-
- # Workflows that should run on every push to main (using display names)
- critical_workflows = ['CI', 'H5 Accuracy Report']
-
- # Get recent commits to main
- result = subprocess.run(['git', 'log', '--since=1 day ago', '--oneline', 'main'],
- capture_output=True, text=True)
- recent_commits = len(result.stdout.strip().split('\n')) if result.stdout.strip() else 0
-
- if recent_commits > 0:
- for workflow in critical_workflows:
- cmd = ['gh', 'run', 'list', '--workflow', workflow, '--json', 'createdAt', '--limit', '5']
- result = subprocess.run(cmd, capture_output=True, text=True)
-
- if result.returncode == 0:
- runs = json.loads(result.stdout)
- if runs:
- latest_run = datetime.fromisoformat(runs[0]['createdAt'].replace('Z', '+00:00')).replace(tzinfo=None)
- hours_since = (datetime.now().replace(tzinfo=None) - latest_run).total_seconds() / 3600
-
- if hours_since > 12: # No runs in 12+ hours despite commits
- alerts.append(f"{workflow}: No runs in {hours_since:.1f} hours despite recent commits")
- else:
- alerts.append(f"{workflow}: No recent runs found")
-
- return alerts
-
- silent_failure_alerts = check_workflow_freshness()
-
- if silent_failure_alerts:
- print("🔍 SILENT FAILURE DETECTION:")
- for alert in silent_failure_alerts:
- print(f" ❌ {alert}")
- else:
- print("✅ No silent failures detected")
-
- # Append to health report
- try:
- with open("ci_health_report.json", "r") as f:
- health_report = json.load(f)
-
- health_report["silent_failures"] = silent_failure_alerts
-
- with open("ci_health_report.json", "w") as f:
- json.dump(health_report, f, indent=2)
- except:
- pass
- PYEOF
-
- - name: Generate health summary
- if: always()
- run: |
- echo "## CI Health Monitor Report" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ -f ci_health_report.json ]; then
- python3 -c "
- import json
- try:
- d = json.load(open('ci_health_report.json'))
- print(f\"**Overall Health:** {d['overall_health'].title()}\")
- print(f\"**Success Rate:** {d['summary']['overall_success_rate']:.1%} ({d['summary']['failed_runs']} failures out of {d['summary']['total_runs']} runs)\")
- print(f\"**Timeout Failures:** {d['summary']['timeout_failures']}\")
- print()
-
- print('### Workflow Status')
- for workflow, status in d['workflows'].items():
- icon = '✅' if status['status'] == 'healthy' else '⚠️' if status['status'] == 'degraded' else '❌'
- print(f'{icon} **{workflow}**: {status[\"success_rate\"]:.1%} success rate ({status[\"successful\"]}/{status[\"completed\"]} runs)')
-
- if d.get('alerts'):
- print()
- print('### Alerts')
- for alert in d['alerts']:
- print(f'⚠️ {alert}')
-
- if d.get('silent_failures'):
- print()
- print('### Silent Failure Detection')
- for failure in d['silent_failures']:
- print(f'🔍 {failure}')
- except Exception as e:
- print(f'Error processing health report: {e}')
- " >> $GITHUB_STEP_SUMMARY
- else
- echo "❌ Health report generation failed." >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Upload health report
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: ci-health-report
- path: ci_health_report.json
- retention-days: 30
-
- - name: Comment on infrastructure issues if critical
- if: always()
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- if [ -f ci_health_report.json ]; then
- HEALTH_STATUS=$(python3 -c "import json; d=json.load(open('ci_health_report.json')); print(d['overall_health'])")
- TIMEOUT_FAILURES=$(python3 -c "import json; d=json.load(open('ci_health_report.json')); print(d['summary']['timeout_failures'])")
-
- if [ "$HEALTH_STATUS" = "critical" ] || [ "$TIMEOUT_FAILURES" -gt 3 ]; then
- echo "Critical CI health detected, would create infrastructure issue if not already exists..."
- # Note: In production, this would create an issue or comment on existing infrastructure issues
- fi
- fi
\ No newline at end of file
diff --git a/.github/workflows/ci-metrics-dashboard.yml b/.github/workflows/ci-metrics-dashboard.yml
deleted file mode 100644
index f7fcbd8..0000000
--- a/.github/workflows/ci-metrics-dashboard.yml
+++ /dev/null
@@ -1,498 +0,0 @@
-name: CI Metrics Dashboard
-
-on:
- schedule:
- - cron: '0 8 * * *' # Daily at 8 AM UTC
- workflow_dispatch:
-
-jobs:
- generate-ci-dashboard:
- name: Generate CI Metrics Dashboard
- runs-on: ubuntu-latest
- timeout-minutes: 20
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install Python dependencies
- run: |
- pip install matplotlib numpy pandas requests plotly kaleido
-
- - name: Generate comprehensive CI metrics
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- python3 - <<'PYEOF'
- import json
- import subprocess
- import pandas as pd
- import matplotlib.pyplot as plt
- import plotly.graph_objects as go
- import plotly.express as px
- from plotly.subplots import make_subplots
- from datetime import datetime, timedelta
- import numpy as np
-
- def get_workflow_runs(workflow_name, days=30):
- """Get workflow runs for the past N days"""
- cmd = ['gh', 'run', 'list', '--workflow', workflow_name, '--json',
- 'status,conclusion,createdAt,updatedAt,runNumber,id,name', '--limit', '100']
- result = subprocess.run(cmd, capture_output=True, text=True)
-
- if result.returncode != 0:
- print(f"Warning: Could not fetch runs for {workflow_name}")
- return []
-
- runs = json.loads(result.stdout)
- cutoff = datetime.now() - timedelta(days=days)
-
- recent_runs = []
- for run in runs:
- created_at = datetime.fromisoformat(run['createdAt'].replace('Z', '+00:00'))
- if created_at > cutoff:
- recent_runs.append(run)
-
- return recent_runs
-
- def calculate_duration_seconds(created_at, updated_at):
- """Calculate run duration in seconds"""
- try:
- start = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
- end = datetime.fromisoformat(updated_at.replace('Z', '+00:00'))
- return (end - start).total_seconds()
- except:
- return 0
-
- # Workflows to analyze
- critical_workflows = [
- 'ci.yml',
- 'h5-accuracy-report.yml',
- 'polybench-sim.yml',
- 'cpi-comparison.yml',
- 'matmul-calibration.yml',
- 'polybench-segmented.yml',
- 'h5-parallel-accuracy.yml'
- ]
-
- # Collect data for all workflows
- dashboard_data = {
- "generated_at": datetime.now().isoformat(),
- "period_days": 30,
- "workflows": {},
- "summary": {},
- "trends": {}
- }
-
- all_runs_data = []
-
- for workflow in critical_workflows:
- print(f"Analyzing {workflow}...")
- runs = get_workflow_runs(workflow, days=30)
-
- if not runs:
- dashboard_data["workflows"][workflow] = {
- "total_runs": 0,
- "success_rate": 0.0,
- "avg_duration": 0.0,
- "failure_rate": 0.0
- }
- continue
-
- # Process runs data
- completed_runs = [r for r in runs if r['status'] == 'completed']
- successful_runs = [r for r in completed_runs if r['conclusion'] == 'success']
- failed_runs = [r for r in completed_runs if r['conclusion'] in ['failure', 'cancelled', 'timed_out']]
- timeout_runs = [r for r in completed_runs if r['conclusion'] == 'timed_out']
-
- # Calculate metrics
- success_rate = len(successful_runs) / len(completed_runs) if completed_runs else 0.0
- failure_rate = len(failed_runs) / len(completed_runs) if completed_runs else 0.0
-
- # Duration analysis
- durations = [calculate_duration_seconds(r['createdAt'], r['updatedAt']) for r in completed_runs]
- avg_duration = np.mean(durations) if durations else 0.0
-
- dashboard_data["workflows"][workflow] = {
- "total_runs": len(runs),
- "completed_runs": len(completed_runs),
- "successful_runs": len(successful_runs),
- "failed_runs": len(failed_runs),
- "timeout_runs": len(timeout_runs),
- "success_rate": success_rate,
- "failure_rate": failure_rate,
- "avg_duration_seconds": avg_duration,
- "avg_duration_minutes": avg_duration / 60.0
- }
-
- # Add to aggregate data for trending
- for run in completed_runs:
- duration = calculate_duration_seconds(run['createdAt'], run['updatedAt'])
- all_runs_data.append({
- 'workflow': workflow,
- 'date': datetime.fromisoformat(run['createdAt'].replace('Z', '+00:00')).date(),
- 'status': run['conclusion'],
- 'duration': duration,
- 'success': 1 if run['conclusion'] == 'success' else 0
- })
-
- # Calculate summary metrics
- total_runs = sum([w['total_runs'] for w in dashboard_data["workflows"].values()])
- total_completed = sum([w['completed_runs'] for w in dashboard_data["workflows"].values()])
- total_successful = sum([w['successful_runs'] for w in dashboard_data["workflows"].values()])
- total_failed = sum([w['failed_runs'] for w in dashboard_data["workflows"].values()])
-
- dashboard_data["summary"] = {
- "total_runs": total_runs,
- "total_completed": total_completed,
- "total_successful": total_successful,
- "total_failed": total_failed,
- "overall_success_rate": total_successful / total_completed if total_completed > 0 else 0.0,
- "overall_failure_rate": total_failed / total_completed if total_completed > 0 else 0.0,
- "avg_daily_runs": total_runs / 30.0
- }
-
- # Generate visualizations
- print("Generating CI metrics dashboard...")
-
- # 1. Success Rate by Workflow (Bar chart)
- workflows_list = list(dashboard_data["workflows"].keys())
- success_rates = [dashboard_data["workflows"][w]["success_rate"] * 100 for w in workflows_list]
-
- fig = make_subplots(
- rows=2, cols=2,
- subplot_titles=('Success Rate by Workflow', 'Average Duration by Workflow',
- 'Daily Success Rate Trend', 'Workflow Run Volume'),
- specs=[[{"type": "bar"}, {"type": "bar"}],
- [{"type": "scatter"}, {"type": "bar"}]]
- )
-
- # Success rate chart
- fig.add_trace(
- go.Bar(x=workflows_list, y=success_rates, name="Success Rate (%)",
- marker_color=['green' if sr >= 90 else 'orange' if sr >= 70 else 'red' for sr in success_rates]),
- row=1, col=1
- )
-
- # Duration chart
- durations = [dashboard_data["workflows"][w]["avg_duration_minutes"] for w in workflows_list]
- fig.add_trace(
- go.Bar(x=workflows_list, y=durations, name="Avg Duration (min)",
- marker_color='blue'),
- row=1, col=2
- )
-
- # Daily trend analysis
- if all_runs_data:
- df = pd.DataFrame(all_runs_data)
- daily_success = df.groupby('date')['success'].mean() * 100
- dates = daily_success.index
- success_trend = daily_success.values
-
- fig.add_trace(
- go.Scatter(x=dates, y=success_trend, mode='lines+markers',
- name="Daily Success Rate (%)", line_color='green'),
- row=2, col=1
- )
-
- # Volume chart
- daily_volume = df.groupby('date').size()
- fig.add_trace(
- go.Bar(x=daily_volume.index, y=daily_volume.values,
- name="Daily Runs", marker_color='purple'),
- row=2, col=2
- )
-
- fig.update_layout(
- title=f"CI Health Dashboard - {datetime.now().strftime('%Y-%m-%d')}",
- showlegend=False,
- height=800
- )
-
- # Save dashboard
- fig.write_html("ci_dashboard.html")
- fig.write_image("ci_dashboard.png", width=1200, height=800)
-
- # Create detailed metrics table
- with open("ci_metrics_table.html", "w") as f:
- f.write("""
-
CI Metrics Table
-
- CI Metrics Detailed Table
-
- | Workflow |
- Total Runs |
- Success Rate |
- Avg Duration |
- Failures |
- Timeouts |
- Health Status |
-
""")
-
- for workflow, data in dashboard_data["workflows"].items():
- success_rate = data["success_rate"] * 100
- status_class = "success" if success_rate >= 90 else "warning" if success_rate >= 70 else "danger"
- health = "Healthy" if success_rate >= 90 else "Degraded" if success_rate >= 70 else "Critical"
-
- f.write(f"""
- | {workflow} |
- {data["total_runs"]} |
- {success_rate:.1f}% |
- {data["avg_duration_minutes"]:.1f} min |
- {data["failed_runs"]} |
- {data["timeout_runs"]} |
- {health} |
-
""")
-
- f.write("
")
-
- # Save data
- with open("ci_dashboard_data.json", "w") as f:
- json.dump(dashboard_data, f, indent=2)
-
- print("Dashboard generation complete!")
- print(f"Overall CI Health: {dashboard_data['summary']['overall_success_rate']*100:.1f}% success rate")
- PYEOF
-
- - name: Generate CI health trends
- run: |
- python3 - <<'PYEOF'
- import json
- import matplotlib.pyplot as plt
- from datetime import datetime
-
- # Load dashboard data
- with open("ci_dashboard_data.json", "r") as f:
- data = json.load(f)
-
- # Generate simple matplotlib charts as backup
- workflows = list(data["workflows"].keys())
- success_rates = [data["workflows"][w]["success_rate"] * 100 for w in workflows]
-
- # Create figure with subplots
- fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
-
- # Success rates
- colors = ['green' if sr >= 90 else 'orange' if sr >= 70 else 'red' for sr in success_rates]
- ax1.bar(range(len(workflows)), success_rates, color=colors)
- ax1.set_title('Success Rate by Workflow (%)')
- ax1.set_xticks(range(len(workflows)))
- ax1.set_xticklabels([w.replace('.yml', '') for w in workflows], rotation=45)
- ax1.set_ylabel('Success Rate (%)')
- ax1.axhline(y=90, color='green', linestyle='--', alpha=0.7, label='Target (90%)')
- ax1.legend()
-
- # Duration analysis
- durations = [data["workflows"][w]["avg_duration_minutes"] for w in workflows]
- ax2.bar(range(len(workflows)), durations, color='blue')
- ax2.set_title('Average Duration by Workflow (minutes)')
- ax2.set_xticks(range(len(workflows)))
- ax2.set_xticklabels([w.replace('.yml', '') for w in workflows], rotation=45)
- ax2.set_ylabel('Duration (minutes)')
-
- # Failure counts
- failed_counts = [data["workflows"][w]["failed_runs"] for w in workflows]
- timeout_counts = [data["workflows"][w]["timeout_runs"] for w in workflows]
-
- x = range(len(workflows))
- ax3.bar(x, failed_counts, label='Failed', color='red', alpha=0.7)
- ax3.bar(x, timeout_counts, bottom=failed_counts, label='Timeouts', color='darkred', alpha=0.7)
- ax3.set_title('Failure Breakdown by Workflow')
- ax3.set_xticks(x)
- ax3.set_xticklabels([w.replace('.yml', '') for w in workflows], rotation=45)
- ax3.set_ylabel('Failure Count')
- ax3.legend()
-
- # Summary metrics
- summary_labels = ['Total Runs', 'Successful', 'Failed']
- summary_values = [
- data["summary"]["total_runs"],
- data["summary"]["total_successful"],
- data["summary"]["total_failed"]
- ]
- ax4.pie(summary_values[1:], labels=['Successful', 'Failed'], autopct='%1.1f%%',
- colors=['green', 'red'], startangle=90)
- ax4.set_title(f'Overall CI Health\n(30-day period)')
-
- plt.tight_layout()
- plt.savefig('ci_metrics_charts.png', dpi=300, bbox_inches='tight')
- plt.savefig('ci_metrics_charts.pdf', bbox_inches='tight')
-
- print("Additional charts generated successfully!")
- PYEOF
-
- - name: Create CI performance report
- run: |
- cat > ci_performance_report.md << 'MEOF'
- # CI Infrastructure Performance Report
-
- **Generated:** $(date -u)
- **Period:** Last 30 days
- **Dashboard Version:** Phase 3 CI Hardening Implementation
-
- ## Executive Summary
-
- MEOF
-
- # Add dynamic content to report
- python3 - <<'PYEOF'
- import json
-
- with open("ci_dashboard_data.json", "r") as f:
- data = json.load(f)
-
- summary = data["summary"]
-
- with open("ci_performance_report.md", "a") as f:
- f.write(f"""
- - **Overall Success Rate:** {summary['overall_success_rate']*100:.1f}%
- - **Total Runs Analyzed:** {summary['total_runs']}
- - **Average Daily Runs:** {summary['avg_daily_runs']:.1f}
- - **Total Failures:** {summary['total_failed']}
-
- ## Workflow Health Status
-
- | Workflow | Success Rate | Avg Duration | Status |
- |----------|-------------|--------------|---------|
- """)
-
- for workflow, metrics in data["workflows"].items():
- success_rate = metrics["success_rate"] * 100
- duration = metrics["avg_duration_minutes"]
-
- if success_rate >= 90:
- status = "✅ Healthy"
- elif success_rate >= 70:
- status = "⚠️ Degraded"
- else:
- status = "❌ Critical"
-
- f.write(f"| {workflow} | {success_rate:.1f}% | {duration:.1f}m | {status} |\n")
-
- f.write(f"""
- ## CI Hardening Implementation Status
-
- ### ✅ Phase 1: Immediate Stability
- - Extended timeouts implemented across all workflows
- - H5 Accuracy Report: 60m → 90m timeout
- - PolyBench Simulation: 20m → 35m timeout
- - All core CI jobs have explicit timeout configurations
-
- ### ✅ Phase 2: Performance Optimization
- - Parallel execution framework deployed
- - Test segmentation architecture implemented
- - PolyBench tests split into 3 groups for reliability
- - Multi-runner parallel H5 accuracy testing available
-
- ### ✅ Phase 3: Long-term Resilience
- - CI health monitoring dashboard operational
- - Daily metrics collection and trending
- - Automated performance tracking
- - Silent failure detection mechanisms
-
- ## Recommendations
-
- """)
-
- # Add recommendations based on data
- critical_workflows = [w for w, m in data["workflows"].items() if m["success_rate"] < 0.7]
- degraded_workflows = [w for w, m in data["workflows"].items() if 0.7 <= m["success_rate"] < 0.9]
-
- if critical_workflows:
- f.write("### ⚠️ Critical Issues\n")
- for workflow in critical_workflows:
- f.write(f"- **{workflow}**: Success rate below 70%, requires immediate attention\n")
- f.write("\n")
-
- if degraded_workflows:
- f.write("### 📈 Performance Improvements\n")
- for workflow in degraded_workflows:
- f.write(f"- **{workflow}**: Success rate below 90%, consider optimization\n")
- f.write("\n")
-
- if not critical_workflows and len(degraded_workflows) <= 1:
- f.write("### ✅ System Health\n")
- f.write("CI infrastructure is performing well with no critical issues detected.\n")
- f.write("Continue monitoring and consider proactive optimizations.\n\n")
-
- f.write("""
- ## Dashboard Artifacts
-
- - **Interactive Dashboard:** `ci_dashboard.html`
- - **Metrics Charts:** `ci_metrics_charts.png`
- - **Performance Data:** `ci_dashboard_data.json`
- - **Detailed Table:** `ci_metrics_table.html`
-
- ---
- *Report generated by CI Metrics Dashboard workflow - Phase 3 CI Infrastructure Hardening*
- """)
-
- print("Performance report generated successfully!")
- PYEOF
-
- - name: Upload comprehensive dashboard artifacts
- uses: actions/upload-artifact@v4
- with:
- name: ci-dashboard-complete
- path: |
- ci_dashboard.html
- ci_dashboard.png
- ci_metrics_charts.png
- ci_metrics_charts.pdf
- ci_metrics_table.html
- ci_dashboard_data.json
- ci_performance_report.md
- retention-days: 90
-
- - name: Post dashboard summary
- if: always()
- run: |
- echo "## CI Metrics Dashboard Generated" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ -f ci_dashboard_data.json ]; then
- python3 -c "
- import json
- d = json.load(open('ci_dashboard_data.json'))
- s = d['summary']
- print(f\"**Overall CI Health:** {s['overall_success_rate']*100:.1f}% success rate\")
- print(f\"**Total Runs (30d):** {s['total_runs']}\")
- print(f\"**Average Daily Runs:** {s['avg_daily_runs']:.1f}\")
- print(f\"**Total Failures:** {s['total_failed']}\")
- print()
-
- critical = [w for w, m in d['workflows'].items() if m['success_rate'] < 0.7]
- degraded = [w for w, m in d['workflows'].items() if 0.7 <= m['success_rate'] < 0.9]
-
- if critical:
- print('### ⚠️ Critical Workflows (< 70%):')
- for w in critical:
- print(f'- {w}: {d[\"workflows\"][w][\"success_rate\"]*100:.1f}%')
-
- if degraded:
- print('### 📊 Degraded Workflows (70-90%):')
- for w in degraded:
- print(f'- {w}: {d[\"workflows\"][w][\"success_rate\"]*100:.1f}%')
-
- if not critical and len(degraded) <= 1:
- print('### ✅ All workflows performing well!')
-
- print()
- print('📊 [View Interactive Dashboard](artifacts/ci-dashboard-complete/ci_dashboard.html)')
- print('📈 [Download Performance Report](artifacts/ci-dashboard-complete/ci_performance_report.md)')
- " >> $GITHUB_STEP_SUMMARY
- else
- echo "❌ Dashboard generation failed." >> $GITHUB_STEP_SUMMARY
- fi
\ No newline at end of file
diff --git a/.github/workflows/cpi-comparison.yml b/.github/workflows/cpi-comparison.yml
deleted file mode 100644
index 0bc6937..0000000
--- a/.github/workflows/cpi-comparison.yml
+++ /dev/null
@@ -1,78 +0,0 @@
-name: CPI Comparison
-
-on:
- push:
- branches: [main]
- workflow_dispatch:
-
-jobs:
- cpi-comparison:
- name: Fast Timing vs Full Pipeline CPI
- runs-on: ubuntu-latest
- timeout-minutes: 30
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Run CPI comparison
- run: |
- go test -v -run TestCPIComparison -timeout 20m ./benchmarks/ 2>&1 | tee cpi_comparison_output.txt
-
- - name: Post summary
- if: always()
- run: |
- echo "## CPI Comparison: Fast Timing vs Full Pipeline" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ -f benchmarks/cpi_three_way_results.json ]; then
- echo "### Three-Way Comparison (M2 Hardware vs Full Pipeline vs Fast Timing)" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- echo '```' >> $GITHUB_STEP_SUMMARY
- python3 -c "
- import json
- with open('benchmarks/cpi_three_way_results.json') as f:
- data = json.load(f)
- print(f\"{'Benchmark':<15} {'M2 CPI':>10} {'Full CPI':>10} {'Fast CPI':>10} {'Full Err%':>12} {'Fast Err%':>12}\")
- print('-' * 72)
- for r in data:
- print(f\"{r['name']:<15} {r['m2_cpi']:>10.3f} {r['full_pipeline_cpi']:>10.3f} {r['fast_timing_cpi']:>10.3f} {r['full_error_pct']:>11.1f}% {r['fast_error_pct']:>11.1f}%\")
- " >> $GITHUB_STEP_SUMMARY
- echo '```' >> $GITHUB_STEP_SUMMARY
- fi
-
- if [ -f benchmarks/cpi_comparison_results.json ]; then
- echo "" >> $GITHUB_STEP_SUMMARY
- echo "### Full Benchmark Comparison" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- echo '```' >> $GITHUB_STEP_SUMMARY
- python3 -c "
- import json
- with open('benchmarks/cpi_comparison_results.json') as f:
- data = json.load(f)
- print(f\"{'Benchmark':<30} {'Full CPI':>10} {'Fast CPI':>10} {'Divergence':>12}\")
- print('-' * 65)
- for r in data:
- print(f\"{r['name']:<30} {r['full_pipeline_cpi']:>10.3f} {r['fast_timing_cpi']:>10.3f} {r['divergence_pct']:>11.1f}%\")
- total_abs = sum(abs(r['divergence_pct']) for r in data)
- avg = total_abs / len(data) if data else 0
- print(f\"\nAverage |divergence|: {avg:.1f}%\")
- " >> $GITHUB_STEP_SUMMARY
- echo '```' >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Upload results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: cpi-comparison
- path: |
- benchmarks/cpi_comparison_results.json
- benchmarks/cpi_three_way_results.json
- cpi_comparison_output.txt
- retention-days: 90
diff --git a/.github/workflows/diana-statistical-validation.yml b/.github/workflows/diana-statistical-validation.yml
deleted file mode 100644
index 2ec8968..0000000
--- a/.github/workflows/diana-statistical-validation.yml
+++ /dev/null
@@ -1,363 +0,0 @@
-name: Statistical Validation Framework (Diana)
-
-on:
- push:
- branches: [ main ]
- pull_request:
- branches: [ main ]
- schedule:
- # Run weekly comprehensive validation
- - cron: '0 10 * * 1' # Monday 10 AM UTC
- workflow_dispatch:
- inputs:
- benchmark:
- description: 'Specific benchmark to validate (gemm, atax, gesummv) or "full-suite"'
- required: true
- default: 'gemm'
- validation_mode:
- description: 'Validation mode'
- required: true
- default: 'standard'
- type: choice
- options:
- - 'standard'
- - 'comprehensive'
- - 'regression-only'
-
-jobs:
- statistical-validation:
- name: Diana's Statistical Validation Framework
- runs-on: ubuntu-latest
- timeout-minutes: 45
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
- with:
- fetch-depth: 50
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.11'
-
- - name: Install Python dependencies
- run: |
- python3 -m pip install --upgrade pip
- pip install numpy scipy matplotlib
- # Verify scientific computing dependencies
- python3 -c "import numpy as np; import scipy.stats as stats; print('Scientific dependencies ready')"
-
- - name: Build M2Sim tools
- run: |
- echo "Building M2Sim profile tool for validation..."
- go build -o profile-tool ./cmd/profile
- chmod +x profile-tool
- ls -la profile-tool
-
- - name: Prepare PolyBench benchmarks
- run: |
- echo "Preparing PolyBench benchmarks for statistical validation..."
-
- # Ensure PolyBench directory exists and is accessible
- if [ ! -d "benchmarks/polybench" ]; then
- echo "Warning: PolyBench directory not found, creating minimal test structure"
- mkdir -p benchmarks/polybench/gemm
- mkdir -p benchmarks/polybench/atax
- mkdir -p benchmarks/polybench/gesummv
- fi
-
- # Check for available benchmarks
- echo "Available benchmarks:"
- find benchmarks/polybench -maxdepth 1 -type d | head -10
-
- - name: Create validation results directory
- run: |
- mkdir -p validation_results
- mkdir -p validation_results/plots
- mkdir -p validation_results/reports
-
- - name: Run Statistical Validation Framework
- id: validation
- run: |
- echo "Starting Diana's Statistical Validation Framework"
-
- # Determine validation parameters
- BENCHMARK="${{ github.event.inputs.benchmark || 'gemm' }}"
- VALIDATION_MODE="${{ github.event.inputs.validation_mode || 'standard' }}"
-
- echo "Validation parameters:"
- echo " Benchmark: $BENCHMARK"
- echo " Mode: $VALIDATION_MODE"
- echo " Trigger: ${{ github.event_name }}"
-
- # Create validation execution script
- cat > run_validation.py << 'EOF'
- #!/usr/bin/env python3
- import sys
- import subprocess
- import os
- from pathlib import Path
-
- def run_validation():
- try:
- benchmark = os.environ.get('BENCHMARK', 'gemm')
- validation_mode = os.environ.get('VALIDATION_MODE', 'standard')
-
- # Basic validation check for quick feedback
- if validation_mode == 'regression-only':
- cmd = [
- 'python3', 'scripts/performance_optimization_validation.py'
- ]
- print("Running performance regression validation only...")
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
-
- if result.returncode == 0:
- print("✅ Performance regression validation passed")
- return 0
- else:
- print("❌ Performance regression validation failed")
- print("STDOUT:", result.stdout)
- print("STDERR:", result.stderr)
- return 1
-
- # Full statistical validation framework
- cmd = [
- 'python3', 'scripts/diana_comprehensive_qa_validation.py'
- ]
-
- if benchmark == 'full-suite':
- cmd.extend(['--full-suite'])
- else:
- cmd.extend(['--benchmark', benchmark])
-
- cmd.extend(['--output', 'validation_results'])
-
- print(f"Running: {' '.join(cmd)}")
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=1800) # 30 min timeout
-
- # Output results for CI visibility
- if result.stdout:
- print("=== VALIDATION OUTPUT ===")
- print(result.stdout)
-
- if result.stderr:
- print("=== VALIDATION WARNINGS/ERRORS ===")
- print(result.stderr)
-
- # Check for validation results
- results_dir = Path('validation_results')
- if results_dir.exists():
- reports = list(results_dir.glob('*.md'))
- print(f"Generated {len(reports)} validation reports:")
- for report in reports:
- print(f" - {report}")
-
- return result.returncode
-
- except subprocess.TimeoutExpired:
- print("❌ Validation timed out - framework may need optimization")
- return 124
- except Exception as e:
- print(f"❌ Validation failed with exception: {e}")
- return 1
-
- if __name__ == '__main__':
- exit(run_validation())
- EOF
-
- # Execute validation
- export BENCHMARK="$BENCHMARK"
- export VALIDATION_MODE="$VALIDATION_MODE"
-
- python3 run_validation.py > validation_output.txt 2>&1
- VALIDATION_EXIT_CODE=$?
-
- # Always output the results for CI visibility
- cat validation_output.txt
-
- # Set outputs for subsequent steps
- echo "validation_exit_code=$VALIDATION_EXIT_CODE" >> $GITHUB_OUTPUT
-
- # Extract key validation metrics if available
- if [ -f "validation_results/diana_qa_suite_summary.md" ]; then
- echo "suite_summary_available=true" >> $GITHUB_OUTPUT
- elif ls validation_results/diana_qa_*.md 2>/dev/null; then
- echo "individual_reports_available=true" >> $GITHUB_OUTPUT
- fi
-
- exit $VALIDATION_EXIT_CODE
-
- - name: Analyze validation results
- if: always()
- run: |
- echo "Analyzing Diana's QA validation results..."
-
- # Generate validation summary
- cat > validation_results/VALIDATION_SUMMARY.md << 'HEADER'
- # Diana's Statistical Validation Summary
-
- **Date:** $(date -u)
- **Commit:** ${{ github.sha }}
- **Branch:** ${{ github.ref_name }}
- **Trigger:** ${{ github.event_name }}
- **Validation Framework:** Issue #486 - Statistical Validation for Performance Enhancement
-
- ## Validation Framework Overview
-
- Diana's comprehensive QA framework validates Alex's Performance Optimization Enhancement (Issue #481) with:
- - **R² >95% correlation analysis** for calibration parameter generalization
- - **Cross-scale accuracy verification** (64³ → 1024³ progressive scaling)
- - **Development velocity validation** (≥3x improvement target)
- - **Performance regression monitoring** integration with Maya's optimizations
-
- ## Results Summary
-
- HEADER
-
- # Add validation results
- if [ -f "validation_results/diana_qa_suite_summary.md" ]; then
- echo "### Suite Validation Results" >> validation_results/VALIDATION_SUMMARY.md
- echo "" >> validation_results/VALIDATION_SUMMARY.md
- # Extract key metrics from suite summary
- grep -E "PASSED|WARNING|FAILED|R² Correlation|Max Error|Velocity" validation_results/diana_qa_suite_summary.md >> validation_results/VALIDATION_SUMMARY.md || true
- fi
-
- # List individual validation reports
- echo "" >> validation_results/VALIDATION_SUMMARY.md
- echo "### Generated Reports" >> validation_results/VALIDATION_SUMMARY.md
- echo "" >> validation_results/VALIDATION_SUMMARY.md
-
- for report in validation_results/*.md; do
- if [ -f "$report" ]; then
- report_name=$(basename "$report")
- echo "- \`$report_name\`" >> validation_results/VALIDATION_SUMMARY.md
- fi
- done
-
- # Add validation status
- echo "" >> validation_results/VALIDATION_SUMMARY.md
- echo "## CI Integration Status" >> validation_results/VALIDATION_SUMMARY.md
- echo "" >> validation_results/VALIDATION_SUMMARY.md
-
- if [ "${{ steps.validation.outputs.validation_exit_code }}" = "0" ]; then
- echo "✅ **VALIDATION PASSED** - All QA criteria satisfied" >> validation_results/VALIDATION_SUMMARY.md
- else
- echo "❌ **VALIDATION FAILED** - QA criteria not met" >> validation_results/VALIDATION_SUMMARY.md
- fi
-
- echo "" >> validation_results/VALIDATION_SUMMARY.md
- echo "### Integration with Existing CI:" >> validation_results/VALIDATION_SUMMARY.md
- echo "- **Accuracy Report:** Validated against current baseline" >> validation_results/VALIDATION_SUMMARY.md
- echo "- **Performance Regression:** Monitored for Maya's optimizations" >> validation_results/VALIDATION_SUMMARY.md
- echo "- **CPI Comparison:** Statistical correlation with hardware baselines" >> validation_results/VALIDATION_SUMMARY.md
- echo "- **Matmul Calibration:** Cross-scale accuracy verification" >> validation_results/VALIDATION_SUMMARY.md
-
- - name: Upload validation artifacts
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: diana-statistical-validation-${{ github.sha }}
- path: |
- validation_results/
- validation_output.txt
- *.prof
- retention-days: 30
-
- - name: Comment on PR (if applicable)
- if: github.event_name == 'pull_request'
- uses: actions/github-script@v7
- with:
- script: |
- const fs = require('fs');
- const path = 'validation_results/VALIDATION_SUMMARY.md';
-
- if (fs.existsSync(path)) {
- const summary = fs.readFileSync(path, 'utf8');
- const exitCode = '${{ steps.validation.outputs.validation_exit_code }}';
-
- const status = exitCode === '0' ? '✅ PASSED' : '❌ FAILED';
- const icon = exitCode === '0' ? '🔬' : '⚠️';
-
- github.rest.issues.createComment({
- issue_number: context.issue.number,
- owner: context.repo.owner,
- repo: context.repo.repo,
- body: `## ${icon} Diana's Statistical Validation Results - ${status}\n\n${summary}`
- });
- }
-
- - name: Check validation status
- run: |
- if [ "${{ steps.validation.outputs.validation_exit_code }}" != "0" ]; then
- echo "❌ Statistical validation failed - critical QA requirements not met"
- echo " Review validation reports for specific failures:"
- echo " - R² correlation analysis (target: ≥95%)"
- echo " - Cross-scale accuracy verification (target: ≤20% error)"
- echo " - Development velocity validation (target: ≥3x improvement)"
- echo " - Performance regression monitoring"
- exit 1
- else
- echo "✅ Statistical validation passed - QA framework validated successfully"
- echo " Diana's comprehensive validation confirms:"
- echo " - Alex's statistical framework meets scientific rigor standards"
- echo " - Maya's performance optimizations preserve accuracy"
- echo " - Development velocity improvements quantified and verified"
- echo " - Performance regression monitoring operational"
- fi
-
- integration-validation:
- name: QA Integration with Existing CI
- runs-on: ubuntu-latest
- needs: statistical-validation
- if: github.event_name == 'pull_request'
- timeout-minutes: 10
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Validate CI integration
- run: |
- echo "Validating Diana's QA framework integration with existing CI workflows..."
-
- # Check for critical CI workflow files
- workflows=(
- ".github/workflows/accuracy-report.yml"
- ".github/workflows/performance-regression-monitoring.yml"
- ".github/workflows/cpi-comparison.yml"
- ".github/workflows/matmul-calibration.yml"
- )
-
- echo "Verifying integration points:"
- for workflow in "${workflows[@]}"; do
- if [ -f "$workflow" ]; then
- echo "✅ $workflow - Available for integration"
- else
- echo "⚠️ $workflow - Not found"
- fi
- done
-
- # Verify statistical validation script exists
- if [ -f "scripts/diana_comprehensive_qa_validation.py" ]; then
- echo "✅ Diana's QA validation framework - Deployed"
- else
- echo "❌ Diana's QA validation framework - Missing"
- exit 1
- fi
-
- # Verify Alex's statistical framework integration
- if [ -f "scripts/incremental_testing_validation.py" ]; then
- echo "✅ Alex's statistical framework - Available for integration"
- else
- echo "⚠️ Alex's statistical framework - Not found"
- fi
-
- echo ""
- echo "🔬 QA Integration Status: OPERATIONAL"
- echo " Diana's statistical validation framework successfully integrated with existing CI infrastructure"
\ No newline at end of file
diff --git a/.github/workflows/embench-calibration.yml b/.github/workflows/embench-calibration.yml
deleted file mode 100644
index 61c9a1e..0000000
--- a/.github/workflows/embench-calibration.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: EmBench Hardware Calibration
-
-on:
- workflow_dispatch:
-
-jobs:
- calibrate:
- name: EmBench Linear Regression Calibration on Apple Silicon
- runs-on: macos-14
- timeout-minutes: 45
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install Python dependencies
- run: pip install numpy scipy
-
- - name: Verify ARM64 architecture
- run: |
- ARCH=$(uname -m)
- echo "Architecture: $ARCH"
- if [ "$ARCH" != "arm64" ]; then
- echo "ERROR: This workflow requires ARM64 (Apple Silicon)"
- exit 1
- fi
-
- - name: Run EmBench calibration
- run: |
- cd benchmarks/native
- python3 embench_calibration.py \
- --runs 15 \
- --output embench_calibration_results.json
- timeout-minutes: 40
-
- - name: Display results
- if: always()
- run: |
- echo "## EmBench Calibration Results" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- if [ -f benchmarks/native/embench_calibration_results.json ]; then
- echo '```json' >> $GITHUB_STEP_SUMMARY
- cat benchmarks/native/embench_calibration_results.json >> $GITHUB_STEP_SUMMARY
- echo '```' >> $GITHUB_STEP_SUMMARY
- else
- echo "Calibration results not found." >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Upload calibration results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: embench-calibration-results
- path: benchmarks/native/embench_calibration_results.json
- retention-days: 90
-
- - name: Clean up build artifacts
- if: always()
- run: rm -rf benchmarks/embench-native-build
diff --git a/.github/workflows/h4-multicore-accuracy.yml b/.github/workflows/h4-multicore-accuracy.yml
deleted file mode 100644
index 72f9560..0000000
--- a/.github/workflows/h4-multicore-accuracy.yml
+++ /dev/null
@@ -1,352 +0,0 @@
-name: H4 Multi-Core Accuracy Validation
-
-on:
- workflow_dispatch: # Allow manual triggering for H4 development
- inputs:
- core_count:
- description: 'Core count for validation (2, 4, or 8)'
- required: false
- default: '2'
- type: choice
- options:
- - '2'
- - '4'
- - '8'
- validation_mode:
- description: 'Validation mode'
- required: false
- default: 'full'
- type: choice
- options:
- - 'quick' # 2-core validation only
- - 'full' # Comprehensive multi-core analysis
- - 'benchmark' # Benchmark development validation
- push:
- branches: [main]
- paths:
- - 'scripts/h4_multicore_analysis.py'
- - 'scripts/h4_2core_validation.py'
- - 'benchmarks/multicore/**'
- - 'timing/multicore/**'
- - 'docs/h4_multicore_statistical_methodology.md'
- pull_request:
- paths:
- - 'scripts/h4_multicore_analysis.py'
- - 'scripts/h4_2core_validation.py'
- - 'benchmarks/multicore/**'
-
-jobs:
- h4-2core-validation:
- name: H4 2-Core Framework Validation
- runs-on: macos-14 # Apple Silicon runner for M2 hardware baseline compatibility
- timeout-minutes: 90 # Extended timeout for multi-core benchmark compilation and execution
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install Python dependencies for multi-core analysis
- run: |
- pip install numpy scipy matplotlib pandas sqlite3
- pip install scikit-learn # For enhanced statistical modeling
-
- - name: Install development tools
- run: |
- # OpenMP support for multi-core benchmark compilation
- brew install libomp
- export LDFLAGS="-L$(brew --prefix libomp)/lib"
- export CPPFLAGS="-I$(brew --prefix libomp)/include"
-
- - name: Verify M2 hardware environment
- run: |
- echo "=== M2 Hardware Validation ==="
- sysctl hw.ncpu
- sysctl hw.memsize
- sysctl machdep.cpu.brand_string
- echo ""
- echo "OpenMP verification:"
- echo '#include ' | gcc -fopenmp -E - >/dev/null 2>&1 && echo "✅ OpenMP available" || echo "❌ OpenMP not available"
-
- - name: Build M2Sim with multi-core support verification
- run: |
- echo "=== M2Sim Build Verification ==="
- go build ./cmd/m2sim/main.go
- echo "✅ M2Sim builds successfully"
-
- - name: Create and compile 2-core benchmarks
- run: |
- echo "=== H4 2-Core Benchmark Setup ==="
- cd scripts
- python3 h4_2core_validation.py create-benchmarks
-
- echo "Compiling benchmarks with OpenMP support..."
- cd ../benchmarks/multicore
- export CC=gcc
- export LDFLAGS="-L$(brew --prefix libomp)/lib"
- export CPPFLAGS="-I$(brew --prefix libomp)/include"
- make all || echo "⚠️ Some benchmarks failed to compile"
-
- echo "Available benchmarks:"
- ls -la cache_coherence_intensive memory_bandwidth_stress compute_intensive_parallel 2>/dev/null || echo "No benchmarks compiled successfully"
-
- - name: Run H4 2-core validation framework
- run: |
- echo "=== H4 2-Core Validation Execution ==="
- cd scripts
- python3 h4_2core_validation.py validate || echo "⚠️ Validation completed with issues"
-
- - name: Run H4 multi-core analysis (if validation passes)
- run: |
- echo "=== H4 Multi-Core Analysis Framework ==="
- cd scripts
- if [ -f "../benchmarks/multicore/cache_coherence_intensive" ]; then
- echo "Running multi-core analysis on available benchmarks..."
- python3 h4_multicore_analysis.py 2core-validation || echo "⚠️ Multi-core analysis completed with issues"
- else
- echo "⚠️ No compiled benchmarks available for analysis"
- fi
-
- - name: Generate H4 accuracy report
- run: |
- echo "=== H4 Accuracy Report Generation ==="
- cd scripts
- python3 h4_multicore_analysis.py report || echo "⚠️ Report generation completed with issues"
-
- - name: Collect H4 validation artifacts
- run: |
- echo "=== Collecting H4 Artifacts ==="
- mkdir -p h4_artifacts
-
- # Copy validation reports
- find . -name "*h4*validation*report*.json" -exec cp {} h4_artifacts/ \; 2>/dev/null || echo "No validation reports found"
- find . -name "*h4*multicore*report*.json" -exec cp {} h4_artifacts/ \; 2>/dev/null || echo "No multicore reports found"
-
- # Copy statistical model database
- find . -name "h4_multicore_results.db" -exec cp {} h4_artifacts/ \; 2>/dev/null || echo "No database found"
-
- # Copy benchmark compilation logs
- find benchmarks/multicore -name "*.log" -exec cp {} h4_artifacts/ \; 2>/dev/null || echo "No compilation logs found"
-
- # Create summary file
- echo "H4 Multi-Core Accuracy Validation Artifacts" > h4_artifacts/README.txt
- echo "Generated: $(date)" >> h4_artifacts/README.txt
- echo "Commit: $GITHUB_SHA" >> h4_artifacts/README.txt
- ls -la h4_artifacts/
-
- - name: Upload H4 validation artifacts
- uses: actions/upload-artifact@v4
- if: always()
- with:
- name: h4-multicore-validation-artifacts
- path: |
- h4_artifacts/
- reports/*h4*multicore*.json
- reports/*h4*multicore*.md
- docs/h4_multicore_statistical_methodology.md
- scripts/h4_multicore_analysis.py
- scripts/h4_2core_validation.py
- benchmarks/multicore/README.md
- retention-days: 90
-
- - name: Parse H4 validation results for summary
- id: h4_results
- if: always()
- run: |
- echo "=== Parsing H4 Results ==="
-
- # Find most recent validation report
- VALIDATION_REPORT=$(find . -name "*h4*validation*report*.json" -type f | head -1)
- MULTICORE_REPORT=$(find . -name "*h4*multicore*accuracy*report*.json" -type f | head -1)
-
- if [ -f "$VALIDATION_REPORT" ]; then
- echo "Found validation report: $VALIDATION_REPORT"
-
- SUCCESSFUL_BENCHMARKS=$(python3 -c "import json; d=json.load(open('$VALIDATION_REPORT')); print(d['summary']['successful_validations'])" 2>/dev/null || echo "0")
- TOTAL_BENCHMARKS=$(python3 -c "import json; d=json.load(open('$VALIDATION_REPORT')); print(d['summary']['total_benchmarks'])" 2>/dev/null || echo "0")
-
- echo "successful_benchmarks=$SUCCESSFUL_BENCHMARKS" >> $GITHUB_OUTPUT
- echo "total_benchmarks=$TOTAL_BENCHMARKS" >> $GITHUB_OUTPUT
- echo "validation_report_exists=true" >> $GITHUB_OUTPUT
- else
- echo "validation_report_exists=false" >> $GITHUB_OUTPUT
- fi
-
- if [ -f "$MULTICORE_REPORT" ]; then
- echo "Found multicore report: $MULTICORE_REPORT"
-
- H4_STATUS=$(python3 -c "import json; d=json.load(open('$MULTICORE_REPORT')); print('ACHIEVED' if d['summary']['h4_target_met'] else 'NOT_ACHIEVED')" 2>/dev/null || echo "UNKNOWN")
- OVERALL_ERROR=$(python3 -c "import json; d=json.load(open('$MULTICORE_REPORT')); print(f\"{d['overall_accuracy']['average_error_pct']:.1f}%\")" 2>/dev/null || echo "N/A")
-
- echo "h4_status=$H4_STATUS" >> $GITHUB_OUTPUT
- echo "overall_error=$OVERALL_ERROR" >> $GITHUB_OUTPUT
- echo "multicore_report_exists=true" >> $GITHUB_OUTPUT
- else
- echo "multicore_report_exists=false" >> $GITHUB_OUTPUT
- fi
-
- - name: Post H4 validation summary
- if: always()
- run: |
- echo "## H4 Multi-Core Accuracy Validation Report" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ "${{ steps.h4_results.outputs.validation_report_exists }}" = "true" ]; then
- echo "### 2-Core Validation Framework" >> $GITHUB_STEP_SUMMARY
- echo "- **Benchmarks Validated:** ${{ steps.h4_results.outputs.successful_benchmarks }}/${{ steps.h4_results.outputs.total_benchmarks }}" >> $GITHUB_STEP_SUMMARY
- echo "- **Target:** Minimum 3 successful validations" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ "${{ steps.h4_results.outputs.successful_benchmarks }}" -ge "3" ]; then
- echo "✅ **2-Core Framework:** Validation PASSED" >> $GITHUB_STEP_SUMMARY
- else
- echo "❌ **2-Core Framework:** Validation FAILED" >> $GITHUB_STEP_SUMMARY
- fi
- else
- echo "⚠️ **2-Core Validation:** No validation report generated" >> $GITHUB_STEP_SUMMARY
- fi
-
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ "${{ steps.h4_results.outputs.multicore_report_exists }}" = "true" ]; then
- echo "### Multi-Core Accuracy Analysis" >> $GITHUB_STEP_SUMMARY
- echo "- **H4 Status:** ${{ steps.h4_results.outputs.h4_status }}" >> $GITHUB_STEP_SUMMARY
- echo "- **Overall Error:** ${{ steps.h4_results.outputs.overall_error }} (Target: <20%)" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ "${{ steps.h4_results.outputs.h4_status }}" = "ACHIEVED" ]; then
- echo "✅ **H4 Accuracy Target:** ACHIEVED" >> $GITHUB_STEP_SUMMARY
- else
- echo "❌ **H4 Accuracy Target:** NOT ACHIEVED" >> $GITHUB_STEP_SUMMARY
- fi
- else
- echo "⚠️ **Multi-Core Analysis:** No analysis report generated" >> $GITHUB_STEP_SUMMARY
- fi
-
- echo "" >> $GITHUB_STEP_SUMMARY
- echo "### Next Steps" >> $GITHUB_STEP_SUMMARY
-
- if [ "${{ steps.h4_results.outputs.validation_report_exists }}" = "true" ] && [ "${{ steps.h4_results.outputs.successful_benchmarks }}" -ge "3" ]; then
- echo "- ✅ 2-core framework validated - ready for 4-core extension" >> $GITHUB_STEP_SUMMARY
- else
- echo "- 🔧 2-core framework needs refinement - check benchmark compilation and M2Sim multi-core support" >> $GITHUB_STEP_SUMMARY
- fi
-
- echo "- 📊 Download artifacts for detailed analysis and statistical models" >> $GITHUB_STEP_SUMMARY
- echo "- 📈 See uploaded reports for accuracy breakdowns by benchmark category" >> $GITHUB_STEP_SUMMARY
-
- - name: Comment on H4 issue
- if: github.ref == 'refs/heads/main' && always()
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- echo "=== Posting H4 Issue Comment ==="
-
- if [ "${{ steps.h4_results.outputs.validation_report_exists }}" = "true" ] || [ "${{ steps.h4_results.outputs.multicore_report_exists }}" = "true" ]; then
- # Determine overall status
- VALIDATION_STATUS="UNKNOWN"
- if [ "${{ steps.h4_results.outputs.successful_benchmarks }}" -ge "3" ]; then
- VALIDATION_STATUS="PASSED"
- else
- VALIDATION_STATUS="PARTIAL"
- fi
-
- ACCURACY_STATUS="${{ steps.h4_results.outputs.h4_status }}"
-
- COMMENT_BODY="# [CI] H4 Multi-Core Accuracy Framework Results
-
- ## H4 Implementation Status Update
-
- **Validation Framework**: ${VALIDATION_STATUS}
- - **2-Core Benchmarks**: ${{ steps.h4_results.outputs.successful_benchmarks }}/${{ steps.h4_results.outputs.total_benchmarks }} validated
- - **Statistical Framework**: Multi-dimensional regression implemented
- - **Accuracy Target**: ${ACCURACY_STATUS} (Target: <20% error)
-
- **Framework Components**:
- - ✅ Multi-dimensional regression framework
- - ✅ Cache coherence timing methodology
- - ✅ 2-core validation suite
- - ✅ CI integration pipeline
-
- **Technical Details**:
- - **Commit**: ${GITHUB_SHA:0:8}
- - **Workflow**: [View Details]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID)
- - **Artifacts**: [Download H4 Reports]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID)
-
- **Next Implementation Phase**:
- $(if [ "$VALIDATION_STATUS" = "PASSED" ]; then echo "Ready for 4-core framework extension and M2Sim multi-core integration"; else echo "2-core framework refinement and benchmark compilation fixes needed"; fi)"
-
- gh issue comment 474 --body "$COMMENT_BODY" || echo "Failed to comment on issue #474 - issue may not exist yet"
- else
- echo "⚠️ No validation results to report"
- fi
-
- h4-statistical-validation:
- name: H4 Statistical Framework Validation
- runs-on: ubuntu-latest
- timeout-minutes: 30
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install Python dependencies
- run: |
- pip install numpy scipy matplotlib pandas sqlite3 scikit-learn pytest
-
- - name: Validate H4 statistical framework
- run: |
- echo "=== H4 Statistical Framework Validation ==="
- cd scripts
-
- # Test statistical framework components
- python3 -c "
- from h4_multicore_analysis import H4MultiCoreAnalyzer
- analyzer = H4MultiCoreAnalyzer()
- print('✅ H4MultiCoreAnalyzer class loads successfully')
-
- # Test multi-dimensional regression framework
- import numpy as np
- X = np.array([[0.05, 1.2, 0.02, 1.1], [0.08, 1.5, 0.03, 1.3]])
- y = np.array([1.0, 1.2])
- print('✅ Multi-dimensional regression framework validated')
-
- print('✅ H4 statistical methodology ready for implementation')
- "
-
- - name: Validate documentation completeness
- run: |
- echo "=== Documentation Validation ==="
-
- # Check for required H4 documentation
- test -f "docs/h4_multicore_statistical_methodology.md" && echo "✅ Statistical methodology documented" || echo "❌ Missing methodology documentation"
- test -f "scripts/h4_multicore_analysis.py" && echo "✅ Analysis framework implemented" || echo "❌ Missing analysis framework"
- test -f "scripts/h4_2core_validation.py" && echo "✅ Validation framework implemented" || echo "❌ Missing validation framework"
-
- # Verify documentation quality
- grep -q "Multi-dimensional regression" docs/h4_multicore_statistical_methodology.md && echo "✅ Statistical methodology documented" || echo "⚠️ Statistical details may be incomplete"
- grep -q "Cache coherence" docs/h4_multicore_statistical_methodology.md && echo "✅ Cache coherence methodology documented" || echo "⚠️ Coherence methodology may be incomplete"
-
- - name: Upload framework validation results
- uses: actions/upload-artifact@v4
- with:
- name: h4-framework-validation
- path: |
- docs/h4_multicore_statistical_methodology.md
- scripts/h4_multicore_analysis.py
- scripts/h4_2core_validation.py
- retention-days: 30
\ No newline at end of file
diff --git a/.github/workflows/h5-accuracy-report.yml b/.github/workflows/h5-accuracy-report.yml
deleted file mode 100644
index 3994b44..0000000
--- a/.github/workflows/h5-accuracy-report.yml
+++ /dev/null
@@ -1,120 +0,0 @@
-name: H5 Accuracy Report
-
-on:
- workflow_dispatch: # Allow manual triggering
- push:
- branches: [main]
- paths:
- - 'benchmarks/**'
- - 'h5_accuracy_report.py'
- - 'timing/**'
-
-concurrency:
- group: h5-accuracy-report-${{ github.ref }}
- cancel-in-progress: false
-
-jobs:
- h5-accuracy-report:
- name: Generate H5 Milestone Accuracy Report
- runs-on: macos-14 # Apple Silicon runner for native M2 matching
- timeout-minutes: 120 # Extended timeout for comprehensive accuracy testing
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install Python dependencies
- run: |
- pip install matplotlib numpy scipy
-
- - name: Install Ginkgo
- run: go install github.com/onsi/ginkgo/v2/ginkgo@latest
-
- - name: Verify PolyBench ELFs exist
- run: |
- echo "Checking PolyBench ELF files..."
- ls -la benchmarks/polybench/*.elf || echo "Some ELF files missing - will skip those benchmarks"
-
- - name: Verify EmBench ELFs exist
- run: |
- echo "Checking EmBench ELF files..."
- ls -la benchmarks/aha-mont64-m2sim/*.elf benchmarks/crc32-m2sim/*.elf benchmarks/edn-m2sim/*.elf benchmarks/huffbench-m2sim/*.elf benchmarks/matmult-int-m2sim/*.elf benchmarks/statemate-m2sim/*.elf benchmarks/primecount-m2sim/*.elf || echo "Some EmBench ELF files missing - will skip those benchmarks"
-
- - name: Run H5 Accuracy Report
- run: |
- echo "Running H5 accuracy framework..."
- python3 h5_accuracy_report.py
-
- - name: Upload H5 Accuracy Report
- uses: actions/upload-artifact@v4
- with:
- name: h5-accuracy-report
- path: |
- h5_accuracy_report.md
- h5_accuracy_results.json
- benchmarks/native/accuracy_report.md
- benchmarks/native/accuracy_figure.png
- benchmarks/native/accuracy_results.json
- benchmarks/native/accuracy_normalized.pdf
- retention-days: 90
-
- - name: Post H5 Report Summary
- if: always()
- run: |
- echo "## H5 Milestone Accuracy Report" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ -f h5_accuracy_results.json ]; then
- TOTAL_BENCHMARKS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['total_benchmarks'])" 2>/dev/null || echo "0")
- OVERALL_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['h5_milestone']['overall_average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
- H5_STATUS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['status'])" 2>/dev/null || echo "unknown")
- MICRO_COUNT=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['categories']['microbenchmarks']['count'])" 2>/dev/null || echo "0")
- POLYBENCH_COUNT=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['categories']['polybench']['count'])" 2>/dev/null || echo "0")
- MICRO_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['categories']['microbenchmarks']['average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
- POLYBENCH_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['categories']['polybench']['average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
-
- echo "### H5 Status: $H5_STATUS" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- echo "- **Total Benchmarks:** $TOTAL_BENCHMARKS (Target: 15+)" >> $GITHUB_STEP_SUMMARY
- echo "- **Overall Average Error:** $OVERALL_ERROR (Target: <20%)" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- echo "### Breakdown by Category" >> $GITHUB_STEP_SUMMARY
- echo "- **Microbenchmarks:** $MICRO_COUNT benchmarks, $MICRO_ERROR average error" >> $GITHUB_STEP_SUMMARY
- echo "- **PolyBench Intermediate:** $POLYBENCH_COUNT benchmarks, $POLYBENCH_ERROR average error" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- echo "See the uploaded artifacts for the detailed H5 accuracy report." >> $GITHUB_STEP_SUMMARY
- else
- echo "⚠️ H5 accuracy report generation failed." >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Comment on H5 Issue
- if: github.ref == 'refs/heads/main' && always()
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- if [ -f h5_accuracy_results.json ]; then
- TOTAL_BENCHMARKS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['total_benchmarks'])" 2>/dev/null || echo "0")
- OVERALL_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['h5_milestone']['overall_average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
- H5_STATUS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['status'])" 2>/dev/null || echo "unknown")
-
- COMMENT_BODY="# [CI] H5 Accuracy Framework Results
- ## H5 Milestone Validation Complete
- **Status**: ${H5_STATUS}
- - **Total Benchmarks**: ${TOTAL_BENCHMARKS} (Target: 15+)
- - **Overall Average Error**: ${OVERALL_ERROR} (Target: <20%)
- **Commit**: ${GITHUB_SHA:0:8}
- **Workflow Run**: [View Details]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID)
- [Download H5 Accuracy Report Artifacts]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID)"
-
- gh issue comment 460 --body "$COMMENT_BODY" || echo "Failed to comment on issue #460"
- fi
\ No newline at end of file
diff --git a/.github/workflows/matmul-calibration.yml b/.github/workflows/matmul-calibration.yml
index 199d7ae..18d132c 100644
--- a/.github/workflows/matmul-calibration.yml
+++ b/.github/workflows/matmul-calibration.yml
@@ -1,10 +1,6 @@
name: Matmul Calibration
on:
- push:
- branches: [main]
- pull_request:
- branches: [main]
workflow_dispatch:
jobs:
diff --git a/.github/workflows/memory-bench-calibration.yml b/.github/workflows/memory-bench-calibration.yml
deleted file mode 100644
index dff84e5..0000000
--- a/.github/workflows/memory-bench-calibration.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-name: Memory Subsystem Calibration
-
-on:
- workflow_dispatch:
-
-jobs:
- calibrate:
- name: Run Memory Calibration on Apple Silicon
- runs-on: macos-14
- timeout-minutes: 60
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install dependencies
- run: pip install numpy scipy
-
- - name: Verify ARM64 architecture
- run: |
- ARCH=$(uname -m)
- echo "Architecture: $ARCH"
- if [ "$ARCH" != "arm64" ]; then
- echo "ERROR: This workflow requires ARM64 (Apple Silicon)"
- exit 1
- fi
-
- - name: Run memory benchmark calibration
- run: |
- cd benchmarks/native
- python3 linear_calibration.py \
- --benchmarks memorystrided loadheavy storeheavy branchheavy \
- --runs 15 \
- --output memory_calibration_results.json
- timeout-minutes: 45
-
- - name: Display calibration results
- if: always()
- run: |
- if [ -f benchmarks/native/memory_calibration_results.json ]; then
- echo "## Memory Calibration Results" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- echo '```json' >> $GITHUB_STEP_SUMMARY
- cat benchmarks/native/memory_calibration_results.json >> $GITHUB_STEP_SUMMARY
- echo '```' >> $GITHUB_STEP_SUMMARY
- else
- echo "Calibration results not found." >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Upload calibration results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: memory-calibration-results
- path: benchmarks/native/memory_calibration_results.json
- retention-days: 90
diff --git a/.github/workflows/performance-profiling.yml b/.github/workflows/performance-profiling.yml
deleted file mode 100644
index 8f07102..0000000
--- a/.github/workflows/performance-profiling.yml
+++ /dev/null
@@ -1,218 +0,0 @@
-name: Performance Profiling Analysis
-
-on:
- workflow_dispatch:
- inputs:
- bench_filter:
- description: 'Benchmark regex filter (e.g., "Pipeline", "Decoder", or ".")'
- required: false
- default: '.'
- benchtime:
- description: 'Iterations per benchmark (e.g., 1000x or 5s)'
- required: false
- default: '1000x'
- cpu_profile:
- description: 'Generate CPU profile (pprof)'
- type: boolean
- required: false
- default: true
- mem_profile:
- description: 'Generate memory profile (pprof)'
- type: boolean
- required: false
- default: true
- schedule:
- - cron: '0 2 * * 0' # Sunday 2 AM UTC
-
-jobs:
- profile-pipeline:
- name: Profile Pipeline Benchmarks
- runs-on: ubuntu-latest
- timeout-minutes: 30
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Create output directory
- run: mkdir -p profiling-results
-
- - name: Run benchmarks with timing
- run: |
- FILTER="${{ github.event.inputs.bench_filter || '.' }}"
- BENCHTIME="${{ github.event.inputs.benchtime || '1000x' }}"
-
- go test -run='XXX_NO_MATCH' -bench="$FILTER" \
- -benchtime="$BENCHTIME" -count=5 \
- -benchmem \
- ./timing/pipeline/ | tee profiling-results/bench-output.txt
-
- - name: Generate CPU profile
- if: github.event.inputs.cpu_profile != 'false'
- run: |
- # Run a representative benchmark for CPU profiling
- go test -run='XXX_NO_MATCH' -bench=BenchmarkPipelineTick8Wide \
- -benchtime=5s -count=1 \
- -cpuprofile=profiling-results/cpu.prof \
- ./timing/pipeline/
-
- # Generate text report from CPU profile
- go tool pprof -text profiling-results/cpu.prof > profiling-results/cpu-profile-text.txt 2>&1 || true
- go tool pprof -top profiling-results/cpu.prof > profiling-results/cpu-top.txt 2>&1 || true
-
- - name: Generate memory profile
- if: github.event.inputs.mem_profile != 'false'
- run: |
- go test -run='XXX_NO_MATCH' -bench=BenchmarkPipelineTick8Wide \
- -benchtime=5s -count=1 \
- -memprofile=profiling-results/mem.prof \
- ./timing/pipeline/
-
- go tool pprof -text -alloc_space profiling-results/mem.prof > profiling-results/mem-alloc-text.txt 2>&1 || true
- go tool pprof -top -alloc_space profiling-results/mem.prof > profiling-results/mem-alloc-top.txt 2>&1 || true
-
- - name: Generate summary report
- if: always()
- run: |
- cat > profiling-results/SUMMARY.md << 'HEADER'
- # Performance Profiling Results
- HEADER
-
- echo "**Date:** $(date -u)" >> profiling-results/SUMMARY.md
- echo "**Commit:** ${{ github.sha }}" >> profiling-results/SUMMARY.md
- echo "" >> profiling-results/SUMMARY.md
-
- echo "## Benchmark Results" >> profiling-results/SUMMARY.md
- echo '```' >> profiling-results/SUMMARY.md
- cat profiling-results/bench-output.txt >> profiling-results/SUMMARY.md
- echo '```' >> profiling-results/SUMMARY.md
- echo "" >> profiling-results/SUMMARY.md
-
- if [ -f profiling-results/cpu-top.txt ]; then
- echo "## CPU Profile (Top Functions)" >> profiling-results/SUMMARY.md
- echo '```' >> profiling-results/SUMMARY.md
- head -30 profiling-results/cpu-top.txt >> profiling-results/SUMMARY.md
- echo '```' >> profiling-results/SUMMARY.md
- echo "" >> profiling-results/SUMMARY.md
- fi
-
- if [ -f profiling-results/mem-alloc-top.txt ]; then
- echo "## Memory Allocation Profile (Top)" >> profiling-results/SUMMARY.md
- echo '```' >> profiling-results/SUMMARY.md
- head -30 profiling-results/mem-alloc-top.txt >> profiling-results/SUMMARY.md
- echo '```' >> profiling-results/SUMMARY.md
- fi
-
- - name: Upload profiling results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: performance-profiling-results
- path: profiling-results/
- retention-days: 30
-
- - name: Display summary
- if: always()
- run: |
- if [ -f profiling-results/SUMMARY.md ]; then
- cat profiling-results/SUMMARY.md >> "$GITHUB_STEP_SUMMARY"
- fi
-
- profile-cmd:
- name: Profile via cmd/profile tool
- runs-on: ubuntu-latest
- timeout-minutes: 30
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Build profile tool
- run: go build -o profile-tool ./cmd/profile
-
- - name: Create output directory
- run: mkdir -p cmd-profiling-results
-
- - name: Profile emulation mode
- run: |
- # Use a microbenchmark ELF if available, otherwise skip
- ELF=$(find benchmarks -name '*.elf' -type f | head -1)
- if [ -z "$ELF" ]; then
- echo "No ELF binaries found, skipping cmd/profile tests"
- exit 0
- fi
-
- echo "Profiling: $ELF"
- ./profile-tool \
- -cpuprofile=cmd-profiling-results/emu-cpu.prof \
- -memprofile=cmd-profiling-results/emu-mem.prof \
- -duration=10s \
- "$ELF" > cmd-profiling-results/emu-output.txt 2>&1 || true
-
- - name: Profile timing mode
- run: |
- ELF=$(find benchmarks -name '*.elf' -type f | head -1)
- if [ -z "$ELF" ]; then
- exit 0
- fi
-
- ./profile-tool -timing \
- -cpuprofile=cmd-profiling-results/timing-cpu.prof \
- -memprofile=cmd-profiling-results/timing-mem.prof \
- -duration=10s \
- "$ELF" > cmd-profiling-results/timing-output.txt 2>&1 || true
-
- - name: Profile fast-timing mode
- run: |
- ELF=$(find benchmarks -name '*.elf' -type f | head -1)
- if [ -z "$ELF" ]; then
- exit 0
- fi
-
- ./profile-tool -fast-timing \
- -cpuprofile=cmd-profiling-results/fast-timing-cpu.prof \
- -memprofile=cmd-profiling-results/fast-timing-mem.prof \
- -duration=10s \
- "$ELF" > cmd-profiling-results/fast-timing-output.txt 2>&1 || true
-
- - name: Generate mode comparison
- if: always()
- run: |
- echo "# cmd/profile Mode Comparison" > cmd-profiling-results/SUMMARY.md
- echo "" >> cmd-profiling-results/SUMMARY.md
-
- for mode in emu timing fast-timing; do
- outfile="cmd-profiling-results/${mode}-output.txt"
- if [ -f "$outfile" ]; then
- echo "## ${mode}" >> cmd-profiling-results/SUMMARY.md
- echo '```' >> cmd-profiling-results/SUMMARY.md
- cat "$outfile" >> cmd-profiling-results/SUMMARY.md
- echo '```' >> cmd-profiling-results/SUMMARY.md
- echo "" >> cmd-profiling-results/SUMMARY.md
- fi
- done
-
- - name: Upload results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: cmd-profiling-results
- path: cmd-profiling-results/
- retention-days: 30
-
- - name: Display summary
- if: always()
- run: |
- if [ -f cmd-profiling-results/SUMMARY.md ]; then
- cat cmd-profiling-results/SUMMARY.md >> "$GITHUB_STEP_SUMMARY"
- fi
diff --git a/.github/workflows/performance-regression-monitoring.yml b/.github/workflows/performance-regression-monitoring.yml
deleted file mode 100644
index edb22a5..0000000
--- a/.github/workflows/performance-regression-monitoring.yml
+++ /dev/null
@@ -1,282 +0,0 @@
-name: Performance Regression Monitoring
-
-on:
- push:
- branches: [ main ]
- pull_request:
- branches: [ main ]
- schedule:
- # Run nightly performance monitoring
- - cron: '0 6 * * *' # 6 AM UTC daily
-
-jobs:
- performance-baseline:
- name: Performance Baseline Monitoring
- runs-on: ubuntu-latest
- timeout-minutes: 20
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
- with:
- # Get enough history for comparison
- fetch-depth: 100
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Create results directory
- run: mkdir -p performance-results
-
- - name: Run performance validation
- id: validation
- run: |
- # Install Python dependencies if needed
- python3 -m pip install --upgrade pip
-
- # Run the performance validation script
- cd ${{ github.workspace }}
- python3 scripts/performance_optimization_validation.py > performance-results/validation-output.txt 2>&1 || true
-
- # Extract key metrics for comparison
- echo "benchmark_results<> $GITHUB_OUTPUT
- if [ -f "performance-results/validation-output.txt" ]; then
- grep -E "ns/op|allocs/op" performance-results/validation-output.txt | head -10 >> $GITHUB_OUTPUT
- fi
- echo "EOF" >> $GITHUB_OUTPUT
-
- - name: Run critical path benchmarks
- run: |
- echo "Running critical path benchmarks for regression detection..."
-
- # Core pipeline benchmarks
- go test -bench=BenchmarkPipelineTick8Wide -benchtime=5000x -count=3 \
- ./timing/pipeline/ | tee performance-results/pipeline-benchmarks.txt
-
- # Decoder benchmarks (optimization target)
- go test -bench='BenchmarkDecoder.*' -benchtime=5000x -count=3 \
- ./timing/pipeline/ | tee performance-results/decoder-benchmarks.txt || true
-
- # Memory-intensive benchmarks
- go test -bench='BenchmarkPipeline.*LoadHeavy.*' -benchtime=1000x -count=3 \
- ./timing/pipeline/ | tee performance-results/memory-benchmarks.txt || true
-
- - name: Analyze performance trends
- run: |
- cat > performance-results/analysis.py << 'EOF'
- #!/usr/bin/env python3
- import re
- import sys
- from pathlib import Path
-
- def parse_benchmark_line(line):
- """Parse a Go benchmark line to extract metrics."""
- if 'ns/op' not in line:
- return None
-
- parts = line.strip().split()
- if len(parts) < 3:
- return None
-
- name = parts[0]
- try:
- ns_per_op = float(parts[2])
- return (name, ns_per_op)
- except ValueError:
- return None
-
- def analyze_benchmarks(benchmark_file):
- """Analyze benchmark results for performance regression."""
- if not Path(benchmark_file).exists():
- return {}
-
- with open(benchmark_file, 'r') as f:
- lines = f.readlines()
-
- benchmarks = {}
- for line in lines:
- result = parse_benchmark_line(line)
- if result:
- name, ns_per_op = result
- if name not in benchmarks:
- benchmarks[name] = []
- benchmarks[name].append(ns_per_op)
-
- # Calculate averages
- averages = {}
- for name, values in benchmarks.items():
- if values:
- averages[name] = sum(values) / len(values)
-
- return averages
-
- # Analyze all benchmark files
- files = ['pipeline-benchmarks.txt', 'decoder-benchmarks.txt', 'memory-benchmarks.txt']
- all_results = {}
-
- for filename in files:
- results = analyze_benchmarks(f'performance-results/{filename}')
- all_results.update(results)
-
- # Generate performance summary
- print("=== Performance Analysis Summary ===")
- print(f"Commit: {sys.argv[1] if len(sys.argv) > 1 else 'unknown'}")
- print(f"Total benchmarks analyzed: {len(all_results)}")
- print()
-
- # Performance thresholds (in ns/op)
- thresholds = {
- 'BenchmarkPipelineTick8Wide': 5000, # 5μs threshold
- 'BenchmarkDecoderDecode': 1000, # 1μs threshold
- 'BenchmarkDecoderDecodeInto': 500, # 0.5μs threshold
- }
-
- # Check for regressions
- regressions = []
- improvements = []
-
- print("| Benchmark | Performance (ns/op) | Status |")
- print("|-----------|-------------------|--------|")
-
- for name, avg_time in sorted(all_results.items()):
- threshold = thresholds.get(name, 10000) # Default 10μs threshold
-
- if avg_time > threshold:
- status = "⚠️ REGRESSION"
- regressions.append(name)
- elif avg_time < threshold * 0.7: # 30% better than threshold
- status = "✅ IMPROVEMENT"
- improvements.append(name)
- else:
- status = "✅ NORMAL"
-
- print(f"| {name} | {avg_time:.1f} | {status} |")
-
- print()
-
- # Summary
- if regressions:
- print(f"🚨 PERFORMANCE REGRESSIONS DETECTED: {len(regressions)}")
- for name in regressions:
- print(f" - {name}: {all_results[name]:.1f} ns/op")
- sys.exit(1)
- elif improvements:
- print(f"🚀 PERFORMANCE IMPROVEMENTS DETECTED: {len(improvements)}")
- for name in improvements:
- print(f" - {name}: {all_results[name]:.1f} ns/op")
- else:
- print("✅ No significant performance changes detected")
-
- EOF
-
- python3 performance-results/analysis.py "${{ github.sha }}" | tee performance-results/analysis-summary.txt
-
- - name: Generate performance report
- if: always()
- run: |
- cat > performance-results/PERFORMANCE_REPORT.md << 'HEADER'
- # Performance Monitoring Report
-
- **Date:** $(date -u)
- **Commit:** ${{ github.sha }}
- **Branch:** ${{ github.ref_name }}
- **Trigger:** ${{ github.event_name }}
-
- ## Optimization Status
-
- This report validates the performance optimizations implemented in Issue #487:
- - ✅ Instruction decoder memory allocation optimization
- - ✅ Branch predictor reuse enhancement
- - ✅ Critical path bottleneck elimination
-
- ## Performance Results
-
- HEADER
-
- # Add benchmark results
- if [ -f performance-results/analysis-summary.txt ]; then
- echo "" >> performance-results/PERFORMANCE_REPORT.md
- echo "### Benchmark Analysis" >> performance-results/PERFORMANCE_REPORT.md
- echo '```' >> performance-results/PERFORMANCE_REPORT.md
- cat performance-results/analysis-summary.txt >> performance-results/PERFORMANCE_REPORT.md
- echo '```' >> performance-results/PERFORMANCE_REPORT.md
- fi
-
- # Add detailed benchmark data
- echo "" >> performance-results/PERFORMANCE_REPORT.md
- echo "### Detailed Results" >> performance-results/PERFORMANCE_REPORT.md
- echo "" >> performance-results/PERFORMANCE_REPORT.md
-
- for file in pipeline-benchmarks.txt decoder-benchmarks.txt memory-benchmarks.txt; do
- if [ -f "performance-results/$file" ]; then
- echo "#### ${file}" >> performance-results/PERFORMANCE_REPORT.md
- echo '```' >> performance-results/PERFORMANCE_REPORT.md
- grep 'Benchmark' "performance-results/$file" >> performance-results/PERFORMANCE_REPORT.md
- echo '```' >> performance-results/PERFORMANCE_REPORT.md
- echo "" >> performance-results/PERFORMANCE_REPORT.md
- fi
- done
-
- # Add optimization impact summary
- cat >> performance-results/PERFORMANCE_REPORT.md << 'FOOTER'
-
- ## Optimization Impact Assessment
-
- ### Success Metrics (from Issue #487):
- - **Target**: 50-80% reduction in calibration iteration time
- - **Approach**: Data-driven optimization based on profiling results
- - **Focus**: Critical path optimization while preserving accuracy
-
- ### Implementation Status:
- - ✅ **Critical Path Analysis**: Bottlenecks identified via systematic profiling
- - ✅ **Memory Optimization**: Decoder allocation hotspot eliminated
- - ✅ **Performance Monitoring**: CI integration for regression detection
- - ✅ **Validation Framework**: Automated optimization impact measurement
-
- ### Next Steps:
- - Monitor performance trends across commits
- - Validate calibration workflow speed improvements
- - Extend optimization to additional bottlenecks as identified
-
- FOOTER
-
- - name: Upload performance results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: performance-monitoring-results-${{ github.sha }}
- path: performance-results/
- retention-days: 30
-
- - name: Comment on PR (if applicable)
- if: github.event_name == 'pull_request'
- uses: actions/github-script@v7
- with:
- script: |
- const fs = require('fs');
- const path = 'performance-results/PERFORMANCE_REPORT.md';
-
- if (fs.existsSync(path)) {
- const report = fs.readFileSync(path, 'utf8');
-
- github.rest.issues.createComment({
- issue_number: context.issue.number,
- owner: context.repo.owner,
- repo: context.repo.repo,
- body: `## 🚀 Performance Monitoring Results\n\n${report}`
- });
- }
-
- - name: Check performance regression status
- run: |
- # This step will fail the workflow if regressions are detected
- # The analysis.py script exits with code 1 if regressions are found
- if grep -q "PERFORMANCE REGRESSIONS DETECTED" performance-results/analysis-summary.txt; then
- echo "❌ Performance regressions detected - failing the build"
- cat performance-results/analysis-summary.txt
- exit 1
- else
- echo "✅ No performance regressions detected"
- fi
\ No newline at end of file
diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
index e606aa6..3661607 100644
--- a/.github/workflows/performance-regression.yml
+++ b/.github/workflows/performance-regression.yml
@@ -3,8 +3,6 @@ name: Performance Regression Detection
on:
pull_request:
types: [opened, synchronize, ready_for_review]
- push:
- branches: [main]
jobs:
performance-regression:
diff --git a/.github/workflows/polybench-calibration.yml b/.github/workflows/polybench-calibration.yml
deleted file mode 100644
index 4caaf59..0000000
--- a/.github/workflows/polybench-calibration.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: PolyBench Hardware Calibration
-
-on:
- workflow_dispatch:
-
-jobs:
- calibrate:
- name: PolyBench Linear Regression Calibration on Apple Silicon
- runs-on: macos-14
- timeout-minutes: 45
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.12'
-
- - name: Install Python dependencies
- run: pip install numpy scipy
-
- - name: Verify ARM64 architecture
- run: |
- ARCH=$(uname -m)
- echo "Architecture: $ARCH"
- if [ "$ARCH" != "arm64" ]; then
- echo "ERROR: This workflow requires ARM64 (Apple Silicon)"
- exit 1
- fi
-
- - name: Run PolyBench calibration
- run: |
- cd benchmarks/native
- python3 polybench_calibration.py \
- --runs 15 \
- --output polybench_calibration_results.json
- timeout-minutes: 40
-
- - name: Display results
- if: always()
- run: |
- echo "## PolyBench Calibration Results" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
- if [ -f benchmarks/native/polybench_calibration_results.json ]; then
- echo '```json' >> $GITHUB_STEP_SUMMARY
- cat benchmarks/native/polybench_calibration_results.json >> $GITHUB_STEP_SUMMARY
- echo '```' >> $GITHUB_STEP_SUMMARY
- else
- echo "Calibration results not found." >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Upload calibration results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: polybench-calibration-results
- path: benchmarks/native/polybench_calibration_results.json
- retention-days: 90
diff --git a/.github/workflows/polybench-segmented.yml b/.github/workflows/polybench-segmented.yml
deleted file mode 100644
index c85bd67..0000000
--- a/.github/workflows/polybench-segmented.yml
+++ /dev/null
@@ -1,217 +0,0 @@
-name: PolyBench Segmented Testing
-
-on:
- workflow_dispatch:
- push:
- branches: [main]
- paths:
- - 'benchmarks/polybench_test.go'
- - 'benchmarks/timing_harness.go'
- - 'timing/**'
-
-jobs:
- polybench-group-1:
- name: PolyBench Group 1 (ATAX, BiCG, Jacobi1D)
- runs-on: macos-14
- timeout-minutes: 30
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Verify PolyBench ELFs exist
- run: |
- echo "Checking required ELF files for Group 1..."
- ls -la benchmarks/polybench/atax_m2sim.elf
- ls -la benchmarks/polybench/bicg_m2sim.elf
- ls -la benchmarks/polybench/jacobi-1d_m2sim.elf
-
- - name: Run Group 1 tests
- run: |
- echo "Running PolyBench Group 1: ATAX, BiCG, Jacobi1D"
- go test -v -run "TestPolybenchATAX|TestPolybenchBiCG|TestPolybenchJacobi1D" -count=1 -timeout 25m ./benchmarks/ 2>&1 | tee group1_output.txt
-
- - name: Upload Group 1 results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: polybench-group-1-results
- path: group1_output.txt
- retention-days: 7
-
- polybench-group-2:
- name: PolyBench Group 2 (MVT, GEMM)
- runs-on: macos-14
- timeout-minutes: 30
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Verify PolyBench ELFs exist
- run: |
- echo "Checking required ELF files for Group 2..."
- ls -la benchmarks/polybench/mvt_m2sim.elf
- ls -la benchmarks/polybench/gemm_m2sim.elf
-
- - name: Run Group 2 tests
- run: |
- echo "Running PolyBench Group 2: MVT, GEMM"
- go test -v -run "TestPolybenchMVT|TestPolybenchGEMM" -count=1 -timeout 25m ./benchmarks/ 2>&1 | tee group2_output.txt
-
- - name: Upload Group 2 results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: polybench-group-2-results
- path: group2_output.txt
- retention-days: 7
-
- polybench-group-3:
- name: PolyBench Group 3 (2MM, 3MM)
- runs-on: macos-14
- timeout-minutes: 30
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Verify PolyBench ELFs exist
- run: |
- echo "Checking required ELF files for Group 3..."
- ls -la benchmarks/polybench/2mm_m2sim.elf
- ls -la benchmarks/polybench/3mm_m2sim.elf
-
- - name: Run Group 3 tests
- run: |
- echo "Running PolyBench Group 3: 2MM, 3MM"
- go test -v -run "TestPolybench2MM|TestPolybench3MM" -count=1 -timeout 25m ./benchmarks/ 2>&1 | tee group3_output.txt
-
- - name: Upload Group 3 results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: polybench-group-3-results
- path: group3_output.txt
- retention-days: 7
-
- consolidate-results:
- name: Consolidate PolyBench Results
- runs-on: ubuntu-latest
- needs: [polybench-group-1, polybench-group-2, polybench-group-3]
- if: always()
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Download all group results
- uses: actions/download-artifact@v4
- with:
- path: group-results
-
- - name: Consolidate and analyze results
- run: |
- echo "Consolidating results from all PolyBench groups..."
-
- # Combine all output files
- cat group-results/polybench-group-1-results/group1_output.txt > consolidated_polybench.txt || echo "Group 1 results missing"
- cat group-results/polybench-group-2-results/group2_output.txt >> consolidated_polybench.txt || echo "Group 2 results missing"
- cat group-results/polybench-group-3-results/group3_output.txt >> consolidated_polybench.txt || echo "Group 3 results missing"
-
- # Extract CPI data
- python3 - <<'PYEOF'
- import json
- import re
- import os
-
- results = {}
- try:
- with open("consolidated_polybench.txt") as f:
- for line in f:
- if "CPI=" not in line:
- continue
- match = re.search(r'(polybench_\w+):\s+cycles=(\d+),\s+insts=(\d+),\s+CPI=([\d.]+)', line)
- if match:
- name = match.group(1)
- cycles = int(match.group(2))
- insts = int(match.group(3))
- cpi = float(match.group(4))
- short_name = name.replace("polybench_", "")
- if short_name == "jacobi1d":
- short_name = "jacobi-1d"
- results[short_name] = {
- "sim_name": name,
- "cycles": cycles,
- "instructions": insts,
- "cpi": cpi,
- }
- except FileNotFoundError:
- print("WARNING: No consolidated results file found")
-
- output = {
- "source": "polybench_segmented_testing",
- "benchmarks_run": len(results),
- "results": results,
- "segmented_execution": True,
- "groups_completed": []
- }
-
- if os.path.exists("group-results/polybench-group-1-results/group1_output.txt"):
- output["groups_completed"].append("group-1")
- if os.path.exists("group-results/polybench-group-2-results/group2_output.txt"):
- output["groups_completed"].append("group-2")
- if os.path.exists("group-results/polybench-group-3-results/group3_output.txt"):
- output["groups_completed"].append("group-3")
-
- with open("polybench_segmented_results.json", "w") as f:
- json.dump(output, f, indent=2)
-
- print(json.dumps(output, indent=2))
- print(f"\nSegmented testing results: {len(results)} benchmarks from {len(output['groups_completed'])} groups")
- PYEOF
-
- - name: Upload consolidated results
- uses: actions/upload-artifact@v4
- with:
- name: polybench-segmented-consolidated
- path: |
- polybench_segmented_results.json
- consolidated_polybench.txt
- retention-days: 30
-
- - name: Post segmented testing summary
- if: always()
- run: |
- echo "## PolyBench Segmented Testing Results" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ -f polybench_segmented_results.json ]; then
- python3 -c "
- import json
- d = json.load(open('polybench_segmented_results.json'))
- print(f'**Benchmarks measured:** {d[\"benchmarks_run\"]}/7 (from {len(d[\"groups_completed\"])} groups)')
- print(f'**Groups completed:** {\", \".join(d[\"groups_completed\"])}')
- print()
- if d['results']:
- print('| Benchmark | Cycles | Instructions | CPI |')
- print('|-----------|--------|--------------|-----|')
- for name, r in sorted(d['results'].items()):
- print(f'| {name} | {r[\"cycles\"]} | {r[\"instructions\"]} | {r[\"cpi\"]:.3f} |')
- else:
- print('No benchmark results extracted.')
- " >> $GITHUB_STEP_SUMMARY
- else
- echo "Segmented testing consolidation failed." >> $GITHUB_STEP_SUMMARY
- fi
diff --git a/.github/workflows/polybench-sim.yml b/.github/workflows/polybench-sim.yml
deleted file mode 100644
index 8d5f794..0000000
--- a/.github/workflows/polybench-sim.yml
+++ /dev/null
@@ -1,152 +0,0 @@
-name: PolyBench Simulation Measurements
-
-on:
- workflow_dispatch:
- push:
- branches: [main]
- paths:
- - 'benchmarks/polybench_test.go'
- - 'benchmarks/timing_harness.go'
- - 'timing/**'
-
-concurrency:
- group: polybench-sim-${{ github.ref }}
- cancel-in-progress: true
-
-jobs:
- polybench-sim:
- name: Run PolyBench Timing Simulations
- runs-on: macos-14 # ARM runner required for ARM64 ELFs
- timeout-minutes: 60
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.25'
-
- - name: Verify PolyBench ELFs exist
- run: |
- echo "Checking PolyBench ELF files..."
- ls -la benchmarks/polybench/*.elf
- echo ""
- echo "ELF count: $(ls benchmarks/polybench/*.elf 2>/dev/null | wc -l)"
-
- - name: Run PolyBench timing simulations
- id: sim
- run: |
- echo "Running PolyBench benchmarks individually..."
- echo ""
-
- # Run each test individually with its own timeout so completed
- # tests produce CPI output even if later tests timeout.
- TESTS=(
- TestPolybenchATAX
- TestPolybenchBiCG
- TestPolybenchMVT
- TestPolybenchJacobi1D
- TestPolybenchGEMM
- TestPolybench2MM
- TestPolybench3MM
- )
-
- > polybench_output.txt
-
- for TEST in "${TESTS[@]}"; do
- echo "--- Running $TEST ---"
- if go test -v -run "^${TEST}$" -count=1 -timeout 8m ./benchmarks/ 2>&1 | tee -a polybench_output.txt; then
- echo "--- $TEST completed ---"
- else
- echo "--- $TEST failed or timed out ---"
- fi
- echo ""
- done
-
- echo ""
- echo "=== Raw CPI Output ==="
- grep "CPI=" polybench_output.txt || echo "No CPI lines found"
-
- - name: Extract CPI results as JSON
- if: always()
- run: |
- python3 - <<'PYEOF'
- import json
- import re
- import sys
-
- results = {}
- with open("polybench_output.txt") as f:
- for line in f:
- # Match lines like: polybench_atax: cycles=1234, insts=5678, CPI=1.234, ...
- if "CPI=" not in line:
- continue
- # Extract benchmark name and CPI
- match = re.search(r'(polybench_\w+):\s+cycles=(\d+),\s+insts=(\d+),\s+CPI=([\d.]+)', line)
- if match:
- name = match.group(1)
- cycles = int(match.group(2))
- insts = int(match.group(3))
- cpi = float(match.group(4))
- # Map polybench_X -> X for calibration naming
- short_name = name.replace("polybench_", "")
- # Fix jacobi1d -> jacobi-1d
- if short_name == "jacobi1d":
- short_name = "jacobi-1d"
- results[short_name] = {
- "sim_name": name,
- "cycles": cycles,
- "instructions": insts,
- "cpi": cpi,
- }
-
- output = {
- "source": "polybench_timing_simulation",
- "benchmarks_run": len(results),
- "results": results,
- }
-
- with open("polybench_sim_cpis.json", "w") as f:
- json.dump(output, f, indent=2)
-
- print(json.dumps(output, indent=2))
-
- if len(results) == 0:
- print("\nWARNING: No PolyBench CPI results extracted!", file=sys.stderr)
- # Exit 0 so partial results are still uploaded
- else:
- print(f"\nSuccessfully extracted {len(results)} PolyBench CPI values")
- PYEOF
-
- - name: Post results summary
- if: always()
- run: |
- echo "## PolyBench Simulation Results" >> $GITHUB_STEP_SUMMARY
- echo "" >> $GITHUB_STEP_SUMMARY
-
- if [ -f polybench_sim_cpis.json ]; then
- python3 -c "
- import json
- d = json.load(open('polybench_sim_cpis.json'))
- print(f'**Benchmarks measured:** {d[\"benchmarks_run\"]}/7')
- print()
- print('| Benchmark | Cycles | Instructions | CPI |')
- print('|-----------|--------|--------------|-----|')
- for name, r in sorted(d['results'].items()):
- print(f'| {name} | {r[\"cycles\"]} | {r[\"instructions\"]} | {r[\"cpi\"]:.3f} |')
- " >> $GITHUB_STEP_SUMMARY
- else
- echo "No results generated." >> $GITHUB_STEP_SUMMARY
- fi
-
- - name: Upload CPI results
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: polybench-sim-results
- path: |
- polybench_sim_cpis.json
- polybench_output.txt
- retention-days: 90
diff --git a/.github/workflows/spec-bench.yml b/.github/workflows/spec-bench.yml
index cb749c0..72c37d7 100644
--- a/.github/workflows/spec-bench.yml
+++ b/.github/workflows/spec-bench.yml
@@ -5,10 +5,7 @@ name: SPEC Benchmark Daily
# Not blocking PRs - informational only
on:
- schedule:
- # Run at 6 AM UTC daily (1 AM EST)
- - cron: '0 6 * * *'
- workflow_dispatch: # Allow manual triggering
+ workflow_dispatch:
inputs:
benchmark:
description: 'Specific benchmark to run (blank = all available)'