Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ echo "=== Building tools.simonwillison.net for Cloudflare Pages ==="

# Install Python dependencies
echo "Installing Python dependencies..."
pip install --quiet markdown
pip install --quiet markdown shot-scraper

# Run Python build scripts (but NOT write_docs.py which generates LLM descriptions)
echo "Gathering links and metadata..."
python gather_links.py

echo "Generating screenshots..."
python generate_screenshots.py

echo "Building colophon page..."
python build_colophon.py

Expand Down
46 changes: 46 additions & 0 deletions build_colophon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,36 @@
import html
from pathlib import Path
import markdown
import hashlib


def get_file_hash(file_path):
"""Calculate SHA256 hash of file content."""
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()


def get_screenshot_path(page_name):
"""Get the screenshot path for a given HTML file."""
# Check if the HTML file exists
html_path = Path(page_name)
if not html_path.exists():
return None

# Calculate the hash of the HTML file
file_hash = get_file_hash(html_path)

# Build the expected screenshot path
screenshot_name = f"{html_path.stem}.{file_hash}.jpeg"
screenshot_path = Path("screenshots") / screenshot_name

# Return the path if it exists, otherwise None
if screenshot_path.exists():
return str(screenshot_path)
return None


def format_commit_message(message):
Expand Down Expand Up @@ -99,6 +129,15 @@ def get_most_recent_date(page_data):
margin-bottom: 2rem;
border-bottom: 1px solid #f0f0f0;
padding-bottom: 1rem;
overflow: auto;
}
.tool-screenshot {
float: right;
max-width: 50%;
margin-left: 1rem;
margin-bottom: 1rem;
border: 1px solid #ddd;
border-radius: 4px;
}
.tool-name {
font-weight: bold;
Expand Down Expand Up @@ -210,9 +249,16 @@ def get_most_recent_date(page_data):
commits = list(reversed(commits))
commit_count = len(commits)

# Get screenshot path if available
screenshot_path = get_screenshot_path(page_name)
screenshot_html = ""
if screenshot_path:
screenshot_html = f'<img src="{screenshot_path}" alt="Screenshot of {page_name.replace(".html", "")}" class="tool-screenshot">'

# Modified tool heading with the new structure
html_content += f"""
<div class="tool" id="{page_name}">
{screenshot_html}
<div class="tool-name">
<h2 class="heading">
<span class="hash-text"><a class="hashref" href="#{page_name}">#</a></span>
Expand Down
83 changes: 83 additions & 0 deletions generate_screenshots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""Generate screenshots of all HTML tools using shot-scraper."""
import hashlib
import subprocess
import sys
from pathlib import Path


def get_file_hash(file_path):
"""Calculate SHA256 hash of file content."""
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()


def generate_screenshots():
"""Generate screenshots for all HTML tools."""
# Create screenshots directory if it doesn't exist
screenshots_dir = Path("screenshots")
screenshots_dir.mkdir(exist_ok=True)

# Find all HTML files except index.html and colophon.html
html_files = [
f for f in Path(".").glob("*.html")
if f.name not in ["index.html", "colophon.html"]
]

print(f"Found {len(html_files)} HTML files to screenshot")

# Check if shot-scraper is available
try:
subprocess.run(
["shot-scraper", "--version"],
capture_output=True,
check=True
)
except (subprocess.CalledProcessError, FileNotFoundError):
print("Error: shot-scraper is not installed or not available")
print("Install it with: pip install shot-scraper")
sys.exit(1)

for html_file in sorted(html_files):
# Calculate hash of the file content
file_hash = get_file_hash(html_file)

# Generate screenshot filename
screenshot_name = f"{html_file.stem}.{file_hash}.jpeg"
screenshot_path = screenshots_dir / screenshot_name

# Skip if screenshot already exists
if screenshot_path.exists():
print(f"Screenshot already exists: {screenshot_name}")
continue

print(f"Generating screenshot for {html_file.name}...")

# Generate the screenshot using shot-scraper
try:
subprocess.run(
[
"shot-scraper",
str(html_file),
"--width", "1024",
"--height", "768",
"--quality", "90",
"--output", str(screenshot_path)
],
check=True,
capture_output=True
)
print(f" Created: {screenshot_name}")
except subprocess.CalledProcessError as e:
print(f" Error generating screenshot for {html_file.name}: {e}")
print(f" stdout: {e.stdout.decode()}")
print(f" stderr: {e.stderr.decode()}")

print(f"\nScreenshot generation complete!")


if __name__ == "__main__":
generate_screenshots()
78 changes: 78 additions & 0 deletions lib/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Third-Party Libraries and Assets

This directory contains third-party software components used in the SLOCCount web application.

## WebPerl (v0.09-beta)

**WebPerl** is a WebAssembly port of Perl 5, allowing Perl code to run in web browsers.

- **Author**: Hauke Dämpfling (haukex@zero-g.net)
- **Institution**: Leibniz Institute of Freshwater Ecology and Inland Fisheries (IGB), Berlin, Germany
- **Copyright**: © 2018 Hauke Dämpfling
- **Source**: https://github.com/haukex/webperl
- **Website**: http://webperl.zero-g.net
- **Version**: v0.09-beta (prebuilt release from March 3, 2019)

### License

WebPerl is dual-licensed under the same terms as Perl 5 itself:

- **GNU General Public License** (GPL) version 1 or later, OR
- **Artistic License** (which comes with Perl 5)

You may choose either license. See `webperl/LICENSE_gpl.txt` and `webperl/LICENSE_artistic.txt` for full license texts.

### Files Included

- `webperl.js` - Main WebPerl loader
- `emperl.js` - Emscripten-compiled Perl JavaScript
- `emperl.wasm` - WebAssembly Perl binary
- `emperl.data` - Perl runtime data files
- `LICENSE_artistic.txt` - Artistic License text
- `LICENSE_gpl.txt` - GNU GPL text

---

## SLOCCount

**SLOCCount** (Source Lines of Code Count) is a suite of programs for counting physical source lines of code (SLOC) in large software systems.

- **Original Author**: David A. Wheeler (dwheeler@dwheeler.com)
- **Maintainer**: Jeff Licquia
- **Copyright**: © 2001-2004 David A. Wheeler
- **Source**: https://github.com/licquia/sloccount
- **Commit Used**: `7220ff627334a8f646617fe0fa542d401fb5287e`

### License

SLOCCount is licensed under the **GNU General Public License version 2** (GPL-2.0).

### Files Included

- `sloccount-perl.zip` - Archive containing the Perl scripts from SLOCCount that implement the actual line counting algorithms

The zip file includes various language-specific counting scripts (e.g., `python_count`, `c_count`, `perl_count`) and supporting utilities.

---

## Attribution

This SLOCCount web application makes use of:

1. **WebPerl** for running Perl in the browser via WebAssembly
2. **SLOCCount Perl scripts** for accurate source line counting with comment filtering

Both projects are used in accordance with their respective open-source licenses. No modifications have been made to the WebPerl runtime or SLOCCount scripts themselves.

## Compliance Notes

- WebPerl runtime files are distributed as-is from the official prebuilt v0.09-beta release
- SLOCCount scripts are loaded from the official repository commit `7220ff6`
- License files are preserved in their original locations
- This application is a web interface that uses these tools, not a modification of them

## More Information

- **WebPerl Documentation**: http://webperl.zero-g.net/using.html
- **SLOCCount Documentation**: https://github.com/licquia/sloccount/blob/master/README
- **COCOMO Cost Model**: The cost estimation feature uses the Basic COCOMO model as implemented in the original SLOCCount tool
Binary file added lib/sloccount-perl.zip
Binary file not shown.
Loading