diff --git a/.github/workflows/cleanup-ghcr.yml b/.github/workflows/cleanup-ghcr.yml index f757076d..f5d15230 100644 --- a/.github/workflows/cleanup-ghcr.yml +++ b/.github/workflows/cleanup-ghcr.yml @@ -3,7 +3,7 @@ name: Cleanup GHCR Images on: workflow_dispatch: # Allows manual triggering schedule: - - cron: '0 2 * * 0' # Runs every Sunday at 2:00 AM UTC + - cron: '0 0 * * 0' jobs: cleanup-images: @@ -51,37 +51,112 @@ jobs: env: GH_TOKEN: ${{ secrets.PACKAGES_BOT_PAT }} - - name: Delete untagged images + - name: Delete untagged images (safe for multi-arch) run: | + set -euo pipefail + packages="${{ steps.get-packages.outputs.packages }}" if [ -z "$packages" ]; then echo "No packages found to clean up" exit 0 fi + # Login to GHCR so we can inspect manifest lists for protected tags + GH_USERNAME=$(gh api user --jq .login) + echo "$GH_TOKEN" | docker login ghcr.io -u "$GH_USERNAME" --password-stdin + + # Tags we NEVER want to break by deleting their referenced manifests. + # - *-SNAPSHOT (e.g. 1.0-SNAPSHOT) + # - semver-flavored flink tags (e.g. 0.9.3-flink-2.2) + PROTECTED_TAG_REGEX='(-SNAPSHOT$)|(^v?[0-9]+\.[0-9]+\.[0-9]+-flink-[0-9]+(\.[0-9]+)*$)' + for pkg in $packages; do echo "--- Processing package: $pkg ---" - # Try both organization and repository endpoints + versions_json="" + chosen_endpoint="" + + # Pick the first endpoint that successfully returns versions for endpoint in "/orgs/DataSQRL" "/repos/DataSQRL/flink-sql-runner"; do echo "Trying endpoint: $endpoint" + if versions_json=$(gh api --paginate "$endpoint/packages/container/$pkg/versions" 2>/dev/null | jq -s 'add'); then + chosen_endpoint="$endpoint" + break + fi + done + + if [ -z "$chosen_endpoint" ]; then + echo "Could not fetch versions for $pkg from any endpoint; skipping" + echo "--- Finished processing $pkg ---" + continue + fi + + echo "Using endpoint: $chosen_endpoint" + + # Find protected tags present on this package + protected_tags=$(echo "$versions_json" | jq -r --arg re "$PROTECTED_TAG_REGEX" ' + .[] | .metadata.container.tags[]? | select(test($re)) + ' | sort -u) + + # Compute all manifest digests referenced by protected tags (includes per-arch manifests) + protected_digests="" + if [ -n "$protected_tags" ]; then + echo "Protected tags for $pkg:" + echo "$protected_tags" + + inspect_failed=false + for tag in $protected_tags; do + image_ref="ghcr.io/datasqrl/$pkg:$tag" - # Delete untagged images - echo "Fetching untagged images for $pkg..." - untagged_ids=$(gh api --paginate "$endpoint/packages/container/$pkg/versions" | jq '.[] | select(.metadata.container.tags | length == 0) | .id' 2>/dev/null || echo "") - if [ -n "$untagged_ids" ]; then - echo "Deleting untagged images for $pkg:" - echo "$untagged_ids" | xargs -I {} gh api --method DELETE "$endpoint/packages/container/$pkg/versions/{}" --silent || echo "Could not delete some untagged images for $pkg. This might be due to download counts or other restrictions." - else - echo "No untagged images found for $pkg." + # Top-level digest for the tag (manifest list digest for multi-arch) + top_digest=$(docker buildx imagetools inspect "$image_ref" --format '{{.Digest}}' 2>/dev/null || true) + + # If multi-arch, collect child manifest digests + child_digests=$(docker buildx imagetools inspect "$image_ref" --raw 2>/dev/null | jq -r '.manifests[]?.digest' 2>/dev/null || true) + + if [ -z "$top_digest" ] && [ -z "$child_digests" ]; then + echo "WARNING: failed to inspect protected tag $image_ref; skipping deletions for $pkg to avoid breaking protected tags" + inspect_failed=true + break + fi + + protected_digests=$(printf "%s\n%s\n%s\n" "$protected_digests" "$top_digest" "$child_digests" | sed '/^$/d' | sort -u) + done + + if [ "$inspect_failed" = "true" ]; then + echo "--- Finished processing $pkg ---" + continue fi + fi - # If we found versions, break out of the endpoint loop - if [ -n "$untagged_ids" ]; then - break + # Consider deleting ONLY untagged versions, but skip any whose digest is referenced by protected tags + # versions_json "name" is typically the digest like "sha256:..." + candidates=$(echo "$versions_json" | jq -r ' + .[] | select((.metadata.container.tags | length) == 0) | "\(.id)\t\(.name)" + ') + + if [ -z "$candidates" ]; then + echo "No untagged images found for $pkg." + echo "--- Finished processing $pkg ---" + continue + fi + + echo "Evaluating untagged versions for deletion..." + while IFS=$'\t' read -r version_id version_digest; do + if [ -z "$version_id" ] || [ -z "$version_digest" ]; then + continue fi - done + + if [ -n "$protected_digests" ] && echo "$protected_digests" | grep -Fxq "$version_digest"; then + echo "Keeping untagged version $version_id ($version_digest) because it is referenced by a protected tag" + continue + fi + + echo "Deleting untagged version $version_id ($version_digest)" + gh api --method DELETE "$chosen_endpoint/packages/container/$pkg/versions/$version_id" --silent || \ + echo "Could not delete version $version_id for $pkg (may be restricted)" + done <<< "$candidates" + echo "--- Finished processing $pkg ---" - done env: GH_TOKEN: ${{ secrets.PACKAGES_BOT_PAT }} diff --git a/.github/workflows/cleanup-packages.yml b/.github/workflows/cleanup-packages.yml index 70ad6724..be0a0ef1 100644 --- a/.github/workflows/cleanup-packages.yml +++ b/.github/workflows/cleanup-packages.yml @@ -1,10 +1,10 @@ name: Cleanup Package Versions on: + workflow_dispatch: # Allow manual triggering schedule: # Run every Monday at 2 AM UTC - - cron: '0 2 * * 1' - workflow_dispatch: # Allow manual triggering + - cron: '0 0 * * 0' jobs: discover-and-cleanup-maven-packages: