simonw · simonw · Oct 20, 2025 · Oct 21, 2025
diff --git a/build.sh b/build.sh
@@ -8,12 +8,15 @@ echo "=== Building tools.simonwillison.net for Cloudflare Pages ==="
 
 # Install Python dependencies
 echo "Installing Python dependencies..."
-pip install --quiet markdown
+pip install --quiet markdown shot-scraper
 
 # Run Python build scripts (but NOT write_docs.py which generates LLM descriptions)
 echo "Gathering links and metadata..."
 python gather_links.py
 
+echo "Generating screenshots..."
+python generate_screenshots.py
+
 echo "Building colophon page..."
 python build_colophon.py
 

diff --git a/build_colophon.py b/build_colophon.py
@@ -5,6 +5,36 @@
 import html
 from pathlib import Path
 import markdown
+import hashlib
+
+
+def get_file_hash(file_path):
+    """Calculate SHA256 hash of file content."""
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+
+def get_screenshot_path(page_name):
+    """Get the screenshot path for a given HTML file."""
+    # Check if the HTML file exists
+    html_path = Path(page_name)
+    if not html_path.exists():
+        return None
+
+    # Calculate the hash of the HTML file
+    file_hash = get_file_hash(html_path)
+
+    # Build the expected screenshot path
+    screenshot_name = f"{html_path.stem}.{file_hash}.jpeg"
+    screenshot_path = Path("screenshots") / screenshot_name
+
+    # Return the path if it exists, otherwise None
+    if screenshot_path.exists():
+        return str(screenshot_path)
+    return None
 
 
 def format_commit_message(message):
@@ -99,6 +129,15 @@ def get_most_recent_date(page_data):
             margin-bottom: 2rem;
             border-bottom: 1px solid #f0f0f0;
             padding-bottom: 1rem;
+            overflow: auto;
+        }
+        .tool-screenshot {
+            float: right;
+            max-width: 50%;
+            margin-left: 1rem;
+            margin-bottom: 1rem;
+            border: 1px solid #ddd;
+            border-radius: 4px;
         }
         .tool-name {
             font-weight: bold;
@@ -210,9 +249,16 @@ def get_most_recent_date(page_data):
         commits = list(reversed(commits))
         commit_count = len(commits)
 
+        # Get screenshot path if available
+        screenshot_path = get_screenshot_path(page_name)
+        screenshot_html = ""
+        if screenshot_path:
+            screenshot_html = f'<img src="{screenshot_path}" alt="Screenshot of {page_name.replace(".html", "")}" class="tool-screenshot">'
+
         # Modified tool heading with the new structure
         html_content += f"""
     <div class="tool" id="{page_name}">
+        {screenshot_html}
         <div class="tool-name">
             <h2 class="heading">
                 <span class="hash-text"><a class="hashref" href="#{page_name}">#</a></span>

diff --git a/generate_screenshots.py b/generate_screenshots.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Generate screenshots of all HTML tools using shot-scraper."""
+import hashlib
+import subprocess
+import sys
+from pathlib import Path
+
+
+def get_file_hash(file_path):
+    """Calculate SHA256 hash of file content."""
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+
+def generate_screenshots():
+    """Generate screenshots for all HTML tools."""
+    # Create screenshots directory if it doesn't exist
+    screenshots_dir = Path("screenshots")
+    screenshots_dir.mkdir(exist_ok=True)
+
+    # Find all HTML files except index.html and colophon.html
+    html_files = [
+        f for f in Path(".").glob("*.html")
+        if f.name not in ["index.html", "colophon.html"]
+    ]
+
+    print(f"Found {len(html_files)} HTML files to screenshot")
+
+    # Check if shot-scraper is available
+    try:
+        subprocess.run(
+            ["shot-scraper", "--version"],
+            capture_output=True,
+            check=True
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        print("Error: shot-scraper is not installed or not available")
+        print("Install it with: pip install shot-scraper")
+        sys.exit(1)
+
+    for html_file in sorted(html_files):
+        # Calculate hash of the file content
+        file_hash = get_file_hash(html_file)
+
+        # Generate screenshot filename
+        screenshot_name = f"{html_file.stem}.{file_hash}.jpeg"
+        screenshot_path = screenshots_dir / screenshot_name
+
+        # Skip if screenshot already exists
+        if screenshot_path.exists():
+            print(f"Screenshot already exists: {screenshot_name}")
+            continue
+
+        print(f"Generating screenshot for {html_file.name}...")
+
+        # Generate the screenshot using shot-scraper
+        try:
+            subprocess.run(
+                [
+                    "shot-scraper",
+                    str(html_file),
+                    "--width", "1024",
+                    "--height", "768",
+                    "--quality", "90",
+                    "--output", str(screenshot_path)
+                ],
+                check=True,
+                capture_output=True
+            )
+            print(f"  Created: {screenshot_name}")
+        except subprocess.CalledProcessError as e:
+            print(f"  Error generating screenshot for {html_file.name}: {e}")
+            print(f"  stdout: {e.stdout.decode()}")
+            print(f"  stderr: {e.stderr.decode()}")
+
+    print(f"\nScreenshot generation complete!")
+
+
+if __name__ == "__main__":
+    generate_screenshots()
diff --git a/lib/README.md b/lib/README.md
@@ -0,0 +1,78 @@
+# Third-Party Libraries and Assets
+
+This directory contains third-party software components used in the SLOCCount web application.
+
+## WebPerl (v0.09-beta)
+
+**WebPerl** is a WebAssembly port of Perl 5, allowing Perl code to run in web browsers.
+
+- **Author**: Hauke Dämpfling (haukex@zero-g.net)
+- **Institution**: Leibniz Institute of Freshwater Ecology and Inland Fisheries (IGB), Berlin, Germany
+- **Copyright**: © 2018 Hauke Dämpfling
+- **Source**: https://github.com/haukex/webperl
+- **Website**: http://webperl.zero-g.net
+- **Version**: v0.09-beta (prebuilt release from March 3, 2019)
+
+### License
+
+WebPerl is dual-licensed under the same terms as Perl 5 itself:
+
+- **GNU General Public License** (GPL) version 1 or later, OR
+- **Artistic License** (which comes with Perl 5)
+
+You may choose either license. See `webperl/LICENSE_gpl.txt` and `webperl/LICENSE_artistic.txt` for full license texts.
+
+### Files Included
+
+- `webperl.js` - Main WebPerl loader
+- `emperl.js` - Emscripten-compiled Perl JavaScript
+- `emperl.wasm` - WebAssembly Perl binary
+- `emperl.data` - Perl runtime data files
+- `LICENSE_artistic.txt` - Artistic License text
+- `LICENSE_gpl.txt` - GNU GPL text
+
+---
+
+## SLOCCount
+
+**SLOCCount** (Source Lines of Code Count) is a suite of programs for counting physical source lines of code (SLOC) in large software systems.
+
+- **Original Author**: David A. Wheeler (dwheeler@dwheeler.com)
+- **Maintainer**: Jeff Licquia
+- **Copyright**: © 2001-2004 David A. Wheeler
+- **Source**: https://github.com/licquia/sloccount
+- **Commit Used**: `7220ff627334a8f646617fe0fa542d401fb5287e`
+
+### License
+
+SLOCCount is licensed under the **GNU General Public License version 2** (GPL-2.0).
+
+### Files Included
+
+- `sloccount-perl.zip` - Archive containing the Perl scripts from SLOCCount that implement the actual line counting algorithms
+
+The zip file includes various language-specific counting scripts (e.g., `python_count`, `c_count`, `perl_count`) and supporting utilities.
+
+---
+
+## Attribution
+
+This SLOCCount web application makes use of:
+
+1. **WebPerl** for running Perl in the browser via WebAssembly
+2. **SLOCCount Perl scripts** for accurate source line counting with comment filtering
+
+Both projects are used in accordance with their respective open-source licenses. No modifications have been made to the WebPerl runtime or SLOCCount scripts themselves.
+
+## Compliance Notes
+
+- WebPerl runtime files are distributed as-is from the official prebuilt v0.09-beta release
+- SLOCCount scripts are loaded from the official repository commit `7220ff6`
+- License files are preserved in their original locations
+- This application is a web interface that uses these tools, not a modification of them
+
+## More Information
+
+- **WebPerl Documentation**: http://webperl.zero-g.net/using.html
+- **SLOCCount Documentation**: https://github.com/licquia/sloccount/blob/master/README
+- **COCOMO Cost Model**: The cost estimation feature uses the Basic COCOMO model as implemented in the original SLOCCount tool
diff --git a/lib/sloccount-perl.zip b/lib/sloccount-perl.zip