Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 91 additions & 16 deletions .github/workflows/cleanup-ghcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Cleanup GHCR Images
on:
workflow_dispatch: # Allows manual triggering
schedule:
- cron: '0 2 * * 0' # Runs every Sunday at 2:00 AM UTC
- cron: '0 0 * * 0'

jobs:
cleanup-images:
Expand Down Expand Up @@ -51,37 +51,112 @@ jobs:
env:
GH_TOKEN: ${{ secrets.PACKAGES_BOT_PAT }}

- name: Delete untagged images
- name: Delete untagged images (safe for multi-arch)
run: |
set -euo pipefail

packages="${{ steps.get-packages.outputs.packages }}"
if [ -z "$packages" ]; then
echo "No packages found to clean up"
exit 0
fi

# Login to GHCR so we can inspect manifest lists for protected tags
GH_USERNAME=$(gh api user --jq .login)
echo "$GH_TOKEN" | docker login ghcr.io -u "$GH_USERNAME" --password-stdin

# Tags we NEVER want to break by deleting their referenced manifests.
# - *-SNAPSHOT (e.g. 1.0-SNAPSHOT)
# - semver-flavored flink tags (e.g. 0.9.3-flink-2.2)
PROTECTED_TAG_REGEX='(-SNAPSHOT$)|(^v?[0-9]+\.[0-9]+\.[0-9]+-flink-[0-9]+(\.[0-9]+)*$)'

for pkg in $packages; do
echo "--- Processing package: $pkg ---"

# Try both organization and repository endpoints
versions_json=""
chosen_endpoint=""

# Pick the first endpoint that successfully returns versions
for endpoint in "/orgs/DataSQRL" "/repos/DataSQRL/flink-sql-runner"; do
echo "Trying endpoint: $endpoint"
if versions_json=$(gh api --paginate "$endpoint/packages/container/$pkg/versions" 2>/dev/null | jq -s 'add'); then
chosen_endpoint="$endpoint"
break
fi
done

if [ -z "$chosen_endpoint" ]; then
echo "Could not fetch versions for $pkg from any endpoint; skipping"
echo "--- Finished processing $pkg ---"
continue
fi

echo "Using endpoint: $chosen_endpoint"

# Find protected tags present on this package
protected_tags=$(echo "$versions_json" | jq -r --arg re "$PROTECTED_TAG_REGEX" '
.[] | .metadata.container.tags[]? | select(test($re))
' | sort -u)

# Compute all manifest digests referenced by protected tags (includes per-arch manifests)
protected_digests=""
if [ -n "$protected_tags" ]; then
echo "Protected tags for $pkg:"
echo "$protected_tags"

inspect_failed=false
for tag in $protected_tags; do
image_ref="ghcr.io/datasqrl/$pkg:$tag"

# Delete untagged images
echo "Fetching untagged images for $pkg..."
untagged_ids=$(gh api --paginate "$endpoint/packages/container/$pkg/versions" | jq '.[] | select(.metadata.container.tags | length == 0) | .id' 2>/dev/null || echo "")
if [ -n "$untagged_ids" ]; then
echo "Deleting untagged images for $pkg:"
echo "$untagged_ids" | xargs -I {} gh api --method DELETE "$endpoint/packages/container/$pkg/versions/{}" --silent || echo "Could not delete some untagged images for $pkg. This might be due to download counts or other restrictions."
else
echo "No untagged images found for $pkg."
# Top-level digest for the tag (manifest list digest for multi-arch)
top_digest=$(docker buildx imagetools inspect "$image_ref" --format '{{.Digest}}' 2>/dev/null || true)

# If multi-arch, collect child manifest digests
child_digests=$(docker buildx imagetools inspect "$image_ref" --raw 2>/dev/null | jq -r '.manifests[]?.digest' 2>/dev/null || true)

if [ -z "$top_digest" ] && [ -z "$child_digests" ]; then
echo "WARNING: failed to inspect protected tag $image_ref; skipping deletions for $pkg to avoid breaking protected tags"
inspect_failed=true
break
fi

protected_digests=$(printf "%s\n%s\n%s\n" "$protected_digests" "$top_digest" "$child_digests" | sed '/^$/d' | sort -u)
done

if [ "$inspect_failed" = "true" ]; then
echo "--- Finished processing $pkg ---"
continue
fi
fi

# If we found versions, break out of the endpoint loop
if [ -n "$untagged_ids" ]; then
break
# Consider deleting ONLY untagged versions, but skip any whose digest is referenced by protected tags
# versions_json "name" is typically the digest like "sha256:..."
candidates=$(echo "$versions_json" | jq -r '
.[] | select((.metadata.container.tags | length) == 0) | "\(.id)\t\(.name)"
')

if [ -z "$candidates" ]; then
echo "No untagged images found for $pkg."
echo "--- Finished processing $pkg ---"
continue
fi

echo "Evaluating untagged versions for deletion..."
while IFS=$'\t' read -r version_id version_digest; do
if [ -z "$version_id" ] || [ -z "$version_digest" ]; then
continue
fi
done

if [ -n "$protected_digests" ] && echo "$protected_digests" | grep -Fxq "$version_digest"; then
echo "Keeping untagged version $version_id ($version_digest) because it is referenced by a protected tag"
continue
fi

echo "Deleting untagged version $version_id ($version_digest)"
gh api --method DELETE "$chosen_endpoint/packages/container/$pkg/versions/$version_id" --silent || \
echo "Could not delete version $version_id for $pkg (may be restricted)"
done <<< "$candidates"

echo "--- Finished processing $pkg ---"
done
env:
GH_TOKEN: ${{ secrets.PACKAGES_BOT_PAT }}
4 changes: 2 additions & 2 deletions .github/workflows/cleanup-packages.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: Cleanup Package Versions

on:
workflow_dispatch: # Allow manual triggering
schedule:
# Run every Monday at 2 AM UTC
- cron: '0 2 * * 1'
workflow_dispatch: # Allow manual triggering
- cron: '0 0 * * 0'

jobs:
discover-and-cleanup-maven-packages:
Expand Down