luccabb · luccabb · Jan 20, 2026 · Jan 22, 2026 · Jan 24, 2026 · Jan 26, 2026
@@ -0,0 +1,336 @@
+name: Stockfish Benchmark
+
+on:
+  pull_request:
+    paths:
+      # Only run benchmarks when engine code changes
+      - 'moonfish/**'
+      - 'opening_book/**'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+
+permissions:
+  contents: read
+  pull-requests: write
+
+env:
+  GIT_LFS_SKIP_SMUDGE: 1
+  MOONFISH_OPENING_BOOK: ${{ github.workspace }}/opening_book/cerebellum.bin
+
+jobs:
+  react-start:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+    steps:
+    - name: Add eyes reaction to PR
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \
+          -f content='eyes' --silent || true
+
+  benchmark:
+    runs-on: ubuntu-latest
+    needs: react-start
+    strategy:
+      fail-fast: false
+      matrix:
+        chunk: [0, 1, 2, 3, 4]  # 5 parallel jobs, 20 rounds each = 100 total per skill level
+        skill_level: [3, 4, 5]  # Test against multiple skill levels
+    env:
+      UV_SYSTEM_PYTHON: 1
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        lfs: false
+        fetch-depth: 0
+
+    - name: Ensure opening book
+      run: |
+        set -euo pipefail
+
+        if [ -f opening_book/cerebellum.bin ]; then
+          if head -1 opening_book/cerebellum.bin | grep -q "git-lfs"; then
+            echo "LFS pointer detected; downloading opening book..."
+            rm -f opening_book/cerebellum.bin
+          else
+            echo "Opening book already present."
+            exit 0
+          fi
+        fi
+
+        echo "Downloading full opening book from release..."
+        curl -L -o opening_book/cerebellum.bin "https://github.com/luccabb/moonfish/releases/download/v1.0.0/cerebellum.bin"
+
+    - name: Verify opening book
+      run: |
+        ls -lh opening_book/cerebellum.bin
+        python - <<'PY'
+        import os, sys
+        path = "opening_book/cerebellum.bin"
+        size = os.path.getsize(path)
+        print(f"opening book size: {size} bytes")
+        if size < 10_000_000:
+            print("opening book too small; likely an LFS pointer", file=sys.stderr)
+            sys.exit(1)
+        PY
+
+    - name: Install uv
+      uses: astral-sh/setup-uv@v5
+      with:
+        enable-cache: true
+        cache-dependency-glob: "requirements.txt"
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: make install
+
+    - name: Validate opening book with python-chess
+      run: |
+        python - <<'PY'
+        import chess
+        import chess.polyglot
+        book_path = "opening_book/cerebellum.bin"
+        with chess.polyglot.MemoryMappedReader(book_path) as reader:
+            entry = reader.find(chess.Board())
+        print(f"book entry: {entry.move.uci()}")
+        PY
+
+    - name: Install Stockfish
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y stockfish
+
+    - name: Install cutechess-cli dependencies
+      run: |
+        sudo apt-get install -y cmake qt5-qmake qtbase5-dev qtbase5-dev-tools libqt5svg5-dev
+
+    - name: Cache cutechess-cli
+      id: cache-cutechess
+      uses: actions/cache@v4
+      with:
+        path: /usr/local/bin/cutechess-cli
+        key: cutechess-cli-1.4.0
+
+    - name: Build cutechess-cli
+      if: steps.cache-cutechess.outputs.cache-hit != 'true'
+      run: |
+        git clone --depth 1 --branch v1.4.0 https://github.com/cutechess/cutechess.git /tmp/cutechess
+        cd /tmp/cutechess
+        mkdir build && cd build
+        cmake ..
+        make -j$(nproc)
+        sudo cp cutechess-cli /usr/local/bin/
+
+    - name: Build moonfish binary
+      run: make build-lichess
+
+    - name: Run Stockfish benchmark
+      run: |
+        CHUNK=${{ matrix.chunk }}
+        SKILL=${{ matrix.skill_level }}
+        ROUNDS_PER_CHUNK=20
+        SEED=$((CHUNK * 1000 + SKILL * 100 + 42))  # Different seed per chunk/skill for opening variety
+
+        echo "Running moonfish vs Stockfish benchmark (chunk $CHUNK, skill $SKILL)..."
+        echo "Stockfish skill level: $SKILL"
+        echo "Moonfish: 60s per move, Stockfish: 60+5 time control"
+        echo "Rounds: $ROUNDS_PER_CHUNK, Concurrency: $(nproc), Seed: $SEED"
+        echo ""
+
+        cutechess-cli \
+          -engine name=moonfish cmd=./dist/moonfish dir=. proto=uci tc=inf st=60 timemargin=10000 \
+          -engine name=stockfish cmd=stockfish proto=uci option.Skill\ Level=$SKILL option.Threads=1 tc=60+5 timemargin=10000 \
+          -rounds $ROUNDS_PER_CHUNK \
+          -repeat \
+          -concurrency 20 \
+          -pgnout benchmark-skill$SKILL-chunk$CHUNK.pgn \
+          -srand $SEED \
+          -recover \
+          2>&1 | tee benchmark-skill$SKILL-chunk$CHUNK.log
+
+        echo ""
+        echo "=== Benchmark Results (Skill $SKILL, Chunk $CHUNK) ==="
+        tail -20 benchmark-skill$SKILL-chunk$CHUNK.log
+
+    - name: Parse results
+      run: |
+        CHUNK=${{ matrix.chunk }}
+        SKILL=${{ matrix.skill_level }}
+        PGN="benchmark-skill$SKILL-chunk$CHUNK.pgn"
+
+        # Extract score line from log
+        SCORE=$(grep "Score of moonfish vs stockfish:" benchmark-skill$SKILL-chunk$CHUNK.log | tail -1)
+        WINS=$(echo "$SCORE" | sed -E 's/.*: ([0-9]+) - ([0-9]+) - ([0-9]+).*/\1/')
+        LOSSES=$(echo "$SCORE" | sed -E 's/.*: ([0-9]+) - ([0-9]+) - ([0-9]+).*/\2/')
+        DRAWS=$(echo "$SCORE" | sed -E 's/.*: ([0-9]+) - ([0-9]+) - ([0-9]+).*/\3/')
+
+        # Parse PGN for detailed stats
+        # Moonfish as White: wins/losses/draws
+        WHITE_WINS=$(grep -B5 'Result "1-0"' "$PGN" | grep -c 'White "moonfish"' || echo 0)
+        WHITE_LOSSES=$(grep -B5 'Result "0-1"' "$PGN" | grep -c 'White "moonfish"' || echo 0)
+        WHITE_DRAWS=$(grep -B5 'Result "1/2-1/2"' "$PGN" | grep -c 'White "moonfish"' || echo 0)
+
+        # Moonfish as Black: wins/losses/draws
+        BLACK_WINS=$(grep -B5 'Result "0-1"' "$PGN" | grep -c 'Black "moonfish"' || echo 0)
+        BLACK_LOSSES=$(grep -B5 'Result "1-0"' "$PGN" | grep -c 'Black "moonfish"' || echo 0)
+        BLACK_DRAWS=$(grep -B5 'Result "1/2-1/2"' "$PGN" | grep -c 'Black "moonfish"' || echo 0)
+
+        # Save detailed results
+        cat > results-skill$SKILL-chunk$CHUNK.txt << EOF
+        SKILL=$SKILL
+        WINS=$WINS
+        LOSSES=$LOSSES
+        DRAWS=$DRAWS
+        WHITE_WINS=$WHITE_WINS
+        WHITE_LOSSES=$WHITE_LOSSES
+        WHITE_DRAWS=$WHITE_DRAWS
+        BLACK_WINS=$BLACK_WINS
+        BLACK_LOSSES=$BLACK_LOSSES
+        BLACK_DRAWS=$BLACK_DRAWS
+        EOF
+
+        echo "Skill $SKILL, Chunk $CHUNK: W=$WINS L=$LOSSES D=$DRAWS (White: $WHITE_WINS-$WHITE_LOSSES-$WHITE_DRAWS, Black: $BLACK_WINS-$BLACK_LOSSES-$BLACK_DRAWS)"
+
+    - name: Upload chunk results
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: benchmark-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }}
+        path: |
+          benchmark-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }}.pgn
+          benchmark-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }}.log
+          results-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }}.txt
+
+  aggregate:
+    runs-on: ubuntu-latest
+    needs: benchmark
+    if: ${{ !cancelled() && contains(join(needs.benchmark.result, ','), 'success') }}
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Download all chunk results
+      uses: actions/download-artifact@v4
+      with:
+        pattern: benchmark-skill*-chunk*
+        merge-multiple: true
+
+    - name: Merge all PGN files
+      run: |
+        for SKILL in 3 4 5; do
+          cat benchmark-skill$SKILL-chunk*.pgn > benchmark-skill$SKILL-all.pgn 2>/dev/null || echo "No PGN files for skill $SKILL"
+        done
+
+    - name: Aggregate results
+      run: |
+        echo "Aggregating results from all chunks..."
+
+        # Build comment body
+        {
+          echo "## 🔬 Stockfish Benchmark Results"
+          echo ""
+
+          for SKILL in 3 4 5; do
+            # Initialize counters
+            TOTAL_WINS=0 TOTAL_LOSSES=0 TOTAL_DRAWS=0
+            TOTAL_WHITE_WINS=0 TOTAL_WHITE_LOSSES=0 TOTAL_WHITE_DRAWS=0
+            TOTAL_BLACK_WINS=0 TOTAL_BLACK_LOSSES=0 TOTAL_BLACK_DRAWS=0
+
+            for f in results-skill$SKILL-chunk*.txt; do
+              if [ -f "$f" ]; then
+                eval "$(grep -E '^[A-Z_]+=' "$f" | sed 's/^[[:space:]]*//')"
+                TOTAL_WINS=$((TOTAL_WINS + WINS))
+                TOTAL_LOSSES=$((TOTAL_LOSSES + LOSSES))
+                TOTAL_DRAWS=$((TOTAL_DRAWS + DRAWS))
+                TOTAL_WHITE_WINS=$((TOTAL_WHITE_WINS + WHITE_WINS))
+                TOTAL_WHITE_LOSSES=$((TOTAL_WHITE_LOSSES + WHITE_LOSSES))
+                TOTAL_WHITE_DRAWS=$((TOTAL_WHITE_DRAWS + WHITE_DRAWS))
+                TOTAL_BLACK_WINS=$((TOTAL_BLACK_WINS + BLACK_WINS))
+                TOTAL_BLACK_LOSSES=$((TOTAL_BLACK_LOSSES + BLACK_LOSSES))
+                TOTAL_BLACK_DRAWS=$((TOTAL_BLACK_DRAWS + BLACK_DRAWS))
+              fi
+            done
+
+            TOTAL=$((TOTAL_WINS + TOTAL_LOSSES + TOTAL_DRAWS))
+            WHITE_TOTAL=$((TOTAL_WHITE_WINS + TOTAL_WHITE_LOSSES + TOTAL_WHITE_DRAWS))
+            BLACK_TOTAL=$((TOTAL_BLACK_WINS + TOTAL_BLACK_LOSSES + TOTAL_BLACK_DRAWS))
+
+            echo "### vs Stockfish Skill Level $SKILL"
+            echo ""
+            echo "| Metric | Wins | Losses | Draws | Total | Win % |"
+            echo "|--------|------|--------|-------|-------|-------|"
+
+            if [ "$TOTAL" -gt 0 ]; then
+              WIN_RATE=$(echo "scale=1; $TOTAL_WINS * 100 / $TOTAL" | bc)
+              echo "| **Overall** | $TOTAL_WINS | $TOTAL_LOSSES | $TOTAL_DRAWS | $TOTAL | ${WIN_RATE}% |"
+            fi
+            if [ "$WHITE_TOTAL" -gt 0 ]; then
+              WHITE_WIN_RATE=$(echo "scale=1; $TOTAL_WHITE_WINS * 100 / $WHITE_TOTAL" | bc)
+              echo "| As White | $TOTAL_WHITE_WINS | $TOTAL_WHITE_LOSSES | $TOTAL_WHITE_DRAWS | $WHITE_TOTAL | ${WHITE_WIN_RATE}% |"
+            fi
+            if [ "$BLACK_TOTAL" -gt 0 ]; then
+              BLACK_WIN_RATE=$(echo "scale=1; $TOTAL_BLACK_WINS * 100 / $BLACK_TOTAL" | bc)
+              echo "| As Black | $TOTAL_BLACK_WINS | $TOTAL_BLACK_LOSSES | $TOTAL_BLACK_DRAWS | $BLACK_TOTAL | ${BLACK_WIN_RATE}% |"
+            fi
+
+            # Parse game endings (excluding checkmates, which are covered by win/loss stats)
+            PGN="benchmark-skill$SKILL-all.pgn"
+            if [ -f "$PGN" ]; then
+              ENDINGS=$(grep -oE ', [^}]+\}' "$PGN" | sed 's/, //; s/}//' | grep -v 'mates' | sort | uniq -c | sort -rn)
+              if [ -n "$ENDINGS" ]; then
+                echo ""
+                echo "**Non-checkmate endings:**"
+                echo "$ENDINGS" | while read count ending; do
+                  echo "- $ending: $count"
+                done
+              fi
+            fi
+            echo ""
+          done
+
+          echo "<details><summary>Configuration</summary>"
+          echo ""
+          echo "- 5 chunks × 20 rounds × 3 skill levels = 300 total games"
+          echo "- Each opening played with colors reversed (-repeat) for fairness"
+          echo "- Moonfish: 60s per move"
+          echo "- Stockfish: 60+5 time control"
+          echo ""
+          echo "</details>"
+        } > pr-comment.md
+
+        # Also write to step summary
+        cat pr-comment.md >> $GITHUB_STEP_SUMMARY
+
+    - name: Comment on PR
+      if: github.event_name == 'pull_request'
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        gh pr comment ${{ github.event.pull_request.number }} --body-file pr-comment.md
+
+    - name: Update PR reaction (eyes -> thumbs up)
+      if: github.event_name == 'pull_request'
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        # Remove eyes reaction
+        REACTIONS=$(gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions --jq '.[] | select(.content == "eyes") | .id' || true)
+        for ID in $REACTIONS; do
+          gh api -X DELETE repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions/$ID --silent || true
+        done
+        # Add thumbs up
+        gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \
+          -f content='+1' --silent || true
+
+    - name: Upload aggregated results
+      uses: actions/upload-artifact@v4
+      with:
+        name: benchmark-aggregated
+        path: |
+          benchmark-skill*-all.pgn
+          results-*.txt
@@ -4,6 +4,9 @@ on:
   pull_request:
     branches: [ master ]
 
+env:
+  GIT_LFS_SKIP_SMUDGE: 1
+
 jobs:
   test:
     runs-on: ${{ matrix.os }}
@@ -16,6 +19,8 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+      with:
+        lfs: false
 
     - name: Install uv
       uses: astral-sh/setup-uv@v5
@@ -55,6 +60,8 @@ jobs:
       UV_SYSTEM_PYTHON: 1
     steps:
       - uses: actions/checkout@v4
+        with:
+          lfs: false
 
       - name: Install uv
         uses: astral-sh/setup-uv@v5
@@ -81,6 +88,8 @@ jobs:
       UV_SYSTEM_PYTHON: 1
     steps:
       - uses: actions/checkout@v4
+        with:
+          lfs: false
 
       - name: Install uv
         uses: astral-sh/setup-uv@v5
@@ -107,6 +116,8 @@ jobs:
       UV_SYSTEM_PYTHON: 1
     steps:
       - uses: actions/checkout@v4
+        with:
+          lfs: false
 
       - name: Install uv
         uses: astral-sh/setup-uv@v5