From bb6c828850a3137475eb9a5f661ecba2cca57d05 Mon Sep 17 00:00:00 2001 From: Matthew Westphall Date: Wed, 30 Jul 2025 09:42:17 -0500 Subject: [PATCH 1/4] Restore changes to GHA to support builds into iris-hep repo --- .github/workflows/build-containers.yml | 202 ++++++++----------------- .gitmodules | 0 iris-hep/kuantifier/build-config.json | 12 ++ requirements.txt | 1 + scripts/build-job-matrix.py | 32 +++- scripts/detect-changed-images.py | 85 +++++++++++ 6 files changed, 190 insertions(+), 142 deletions(-) create mode 100644 .gitmodules create mode 100644 iris-hep/kuantifier/build-config.json create mode 100644 requirements.txt create mode 100644 scripts/detect-changed-images.py diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index 7809dfe2..efdeffdd 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -28,77 +28,29 @@ jobs: with: fetch-depth: 0 - - id: image-list - run: | - ORG_DIR=opensciencegrid - # Get the list of files changed based on the type of event - # kicking off the GHA: - # 1. For the main branch, diff the previous state of main vs - # the current commit - # 2. For other branches (i.e., on someone's fork), diff main - # vs the current commit - # 3. For PRs, diff the base ref vs the current commit - # 4. For everything else (e.g., dispatches), build all images - if [[ $GITHUB_EVENT_NAME == 'pull_request' ]] || - [[ $GITHUB_EVENT_NAME == 'push' ]]; then - if [[ $GITHUB_EVENT_NAME == 'pull_request' ]]; then - BASE=$(git merge-base origin/$GITHUB_BASE_REF HEAD) - elif [[ $GITHUB_REF == 'refs/heads/main' ]]; then - BASE=${{github.event.before}} - else - BASE=origin/main - fi - # List image root dirs where files have changed and the - # root dir exists. Example value: - # "opensciencegrid/vo-frontend opensciencegrid/ospool-cm" - images=$(git diff --name-only \ - "$BASE" \ - "$GITHUB_SHA" | - egrep "^$ORG_DIR/" | - cut -d/ -f -2 | - sort | - uniq | - xargs -I {} find . -type d \ - -wholename ./{} \ - -printf "%P\n") - else - # List all image root dirs. Example value: - # "opensciencegrid/vo-frontend opensciencegrid/ospool-cm" - images=$(find $ORG_DIR -mindepth 1 \ - -maxdepth 1 \ - -type d \ - -printf "$ORG_DIR/%P\n") - fi + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + cache: 'pip' # caching pip dependencies + - run: pip install -r requirements.txt - image_json=$(echo -n "${images:-dummy}" | jq -Rcs '.|split("\n") | map(select(. != ""))') - echo "$image_json" > image_list.json - echo "images=$(echo $images | tr '\n' ' ')" >> $GITHUB_OUTPUT - echo "image_list=$image_json" >> $GITHUB_OUTPUT + - id: image-list + run: python3 scripts/detect-changed-images.py --before ${{github.event.before}} - name: Display image list run: cat image_list.json - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - name: Set matrix output id: set-matrix - run: | - # Run Python script and capture JSON output - matrix_json=$(python scripts/build-job-matrix.py ${{ steps.image-list.outputs.images }} | tail -n +2) - # Use jq to extract the 'include' part of the JSON - matrix=$(echo "$matrix_json" | jq -c '.include') - echo "::set-output name=matrix::$matrix" - + run: python3 scripts/build-job-matrix.py ${{ steps.image-list.outputs.images }} - name: Verify matrix content run: | echo "Content of matrix:" echo "${{ steps.set-matrix.outputs.matrix }}" - build: + build-push-image: runs-on: ubuntu-latest needs: build-image-list if: ${{ needs.build-image-list.outputs.image_list != '["dummy"]' }} @@ -111,105 +63,85 @@ jobs: include: ${{ fromJson(needs.build-image-list.outputs.matrix) }} steps: - uses: actions/checkout@v3 - # Example of a matrix configuration string: - # {el9-23-development-True-False} - name: Print raw matrix configuration run: | - echo "Raw matrix configuration: ${{ matrix.config }}" - - - name: Set environment variables - id: set-env-vars - run: | - CONFIG="${{ matrix.config }}" - BASE_OS=$(echo $CONFIG | awk -F'-' '{print $1}') - OSG_SERIES=$(echo $CONFIG | awk -F'-' '{print $2}') - BASE_REPO=$(echo $CONFIG | awk -F'-' '{print $3}') - CONTEXT="opensciencegrid/${{ matrix.name }}" - echo "BASE_OS=${BASE_OS}" >> $GITHUB_ENV - echo "OSG_SERIES=${OSG_SERIES}" >> $GITHUB_ENV - echo "BASE_REPO=${BASE_REPO}" >> $GITHUB_ENV - echo "CONTEXT=${CONTEXT}" >> $GITHUB_ENV + echo "Raw matrix configuration: ${{ matrix }}" - - name: Validate Environment Variables + - name: Validate Build Matrix run: | echo "Validating environment variables:" - if [ -z "$BASE_OS" ] || [ -z "$OSG_SERIES" ] || [ -z "$BASE_REPO" ] || [ -z "$CONTEXT" ]; then - echo "Error: One or more environment variables are not set." + if [ -z "${{ matrix.base_os }}" ] || [ -z "${{ matrix.osg_series }}" ] || [ -z "${{ matrix.base_repo }}" ] || [ -z "${{ matrix.context }}" ]; then + echo "Error: One or more build matrix variables are not set." exit 1 else - echo "All required environment variables are set." + echo "All required build matrix variables are set." fi - - name: Build Image - continue-on-error: ${{ matrix.yum_repo == 'development' }} - uses: opensciencegrid/build-container-action@v0.6.0 - with: - osg_series: ${{ env.OSG_SERIES }} - osg_repo: ${{ env.BASE_REPO }} - context: ${{ env.CONTEXT }} - base_os: ${{ env.BASE_OS }} - - push: - runs-on: ubuntu-latest - if: >- - github.ref == 'refs/heads/main' && - github.event_name != 'pull_request' && - github.repository_owner == 'opensciencegrid' && - needs.build-image-list.outputs.image_list != '["dummy"]' - needs: [make-date-tag, build-image-list, build] - strategy: - fail-fast: false - matrix: - include: ${{ fromJson(needs.build-image-list.outputs.matrix) }} - steps: - - uses: actions/checkout@v3 - # Example of a matrix configuration string: - # {el9-23-development-True-False} - - name: Print raw matrix configuration - run: | - echo "Raw matrix configuration: ${{ matrix.config }}" - - - name: Set environment variables - id: set-env-vars + - name: Set Image Name run: | - CONFIG="${{ matrix.config }}" - BASE_OS=$(echo $CONFIG | awk -F'-' '{print $1}') - OSG_SERIES=$(echo $CONFIG | awk -F'-' '{print $2}') - BASE_REPO=$(echo $CONFIG | awk -F'-' '{print $3}') - CONTEXT="opensciencegrid/${{ matrix.name }}" - echo "BASE_OS=${BASE_OS}" >> $GITHUB_ENV - echo "OSG_SERIES=${OSG_SERIES}" >> $GITHUB_ENV - echo "BASE_REPO=${BASE_REPO}" >> $GITHUB_ENV - echo "CONTEXT=${CONTEXT}" >> $GITHUB_ENV + BASE_OS=${{ matrix.base_os }} + OSG_SERIES=${{ matrix.osg_series }} + BASE_REPO=${{ matrix.base_repo }} + ORGANIZATION=$(echo ${{ matrix.context }} | cut -d'/' -f1) - - name: Validate Environment Variables - run: | - echo "Validating environment variables:" - if [ -z "$BASE_OS" ] || [ -z "$OSG_SERIES" ] || [ -z "$BASE_REPO" ] || [ -z "$CONTEXT" ]; then - echo "Error: One or more environment variables are not set." - exit 1 + if [ -n "${{ matrix.tag_override }}" ] ; then + echo "IMAGE_NAME=${ORGANIZATION}/${{ matrix.name }}:${{ matrix.tag_override }}" >> $GITHUB_ENV else - echo "All required environment variables are set." + echo "IMAGE_NAME=${ORGANIZATION}/${{ matrix.name }}:${OSG_SERIES}-${BASE_OS}-${BASE_REPO}" >> $GITHUB_ENV fi + + - name: Pull External Repo + if: ${{ matrix.upstream }} + run: | + mkdir -p ${{ matrix.context }} + git clone ${{ matrix.upstream }} ${{ matrix.context }} + (cd ${{matrix.context}} && git fetch && git reset --hard ${{ matrix.upstream_ref }}) + + + - name: Build Image + continue-on-error: ${{ matrix.yum_repo == 'development' }} + uses: opensciencegrid/build-container-action@v0.7.1 + with: + clean_before_build: false + osg_series: ${{ matrix.osg_series }} + osg_repo: ${{ matrix.base_repo }} + context: ${{ matrix.context }} + base_os: ${{ matrix.base_os }} + output_image: ${{ env.IMAGE_NAME }} - name: Push to Harbor (${OSG_SERIES}-${BASE_REPO}) - uses: opensciencegrid/push-container-action@main + if: >- + github.ref == 'refs/heads/main' && + github.event_name != 'pull_request' && + github.repository_owner == 'opensciencegrid' && + needs.build-image-list.outputs.image_list != '["dummy"]' + uses: opensciencegrid/push-container-action@v0.8.1 with: - repo: ${{ env.BASE_REPO }} - osg_series: ${{ env.OSG_SERIES }} - context: ${{ env.CONTEXT }} - base_os: ${{ env.BASE_OS }} + clean_before_build: false + repo: ${{ matrix.base_repo }} + osg_series: ${{ matrix.osg_series }} + context: ${{ matrix.context }} + base_os: ${{ matrix.base_os }} + image_name: ${{ env.IMAGE_NAME }} registry_url: hub.opensciencegrid.org registry_user: ${{ secrets.OSG_HARBOR_ROBOT_USER }} registry_pass: ${{ secrets.OSG_HARBOR_ROBOT_PASSWORD }} - name: Push to Docker Hub (${OSG_SERIES}-${BASE_REPO}) - uses: opensciencegrid/push-container-action@main + if: >- + startsWith(env.IMAGE_NAME, 'opensciencegrid') && + github.ref == 'refs/heads/main' && + github.event_name != 'pull_request' && + github.repository_owner == 'opensciencegrid' && + needs.build-image-list.outputs.image_list != '["dummy"]' + uses: opensciencegrid/push-container-action@v0.8.1 with: - repo: ${{ env.BASE_REPO }} - osg_series: ${{ env.OSG_SERIES }} - context: ${{ env.CONTEXT }} - base_os: ${{ env.BASE_OS }} + clean_before_build: false + repo: ${{ matrix.base_repo }} + osg_series: ${{ matrix.osg_series }} + context: ${{ matrix.context }} + base_os: ${{ matrix.base_os }} + image_name: ${{ env.IMAGE_NAME }} registry_url: docker.io registry_user: ${{ secrets.DOCKER_USERNAME }} registry_pass: ${{ secrets.DOCKER_PASSWORD }} diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..e69de29b diff --git a/iris-hep/kuantifier/build-config.json b/iris-hep/kuantifier/build-config.json new file mode 100644 index 00000000..22d0a487 --- /dev/null +++ b/iris-hep/kuantifier/build-config.json @@ -0,0 +1,12 @@ +{ + "standard_build": true, + "repo_build": false, + "base_os": ["el9"], + "osg_series": ["24"], + "base_repo": ["release"], + "context": "kuantifier", + + "tag": "1.0.2", + "upstream": "https://github.com/rptaylor/kuantifier.git", + "upstream_ref": "v1.3.2" +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..64b1adae --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +GitPython diff --git a/scripts/build-job-matrix.py b/scripts/build-job-matrix.py index a8b86388..a920f1c0 100644 --- a/scripts/build-job-matrix.py +++ b/scripts/build-job-matrix.py @@ -4,6 +4,7 @@ from itertools import product DEFAULT_CONFIG_PATH = 'opensciencegrid/default-build-config.json' +GITHUB_OUTPUT = os.environ['GITHUB_OUTPUT'] def load_config(config_path, default_config=None): @@ -41,6 +42,11 @@ def main(image_dirs): osg_series_list = config['osg_series'] base_repo_list = config['base_repo'] + build_context = config.get('context', '') + tag_override = config.get('tag', '') + upstream_repo = config.get('upstream', '') + upstream_ref = config.get('upstream_ref', '') + combinations = product( base_os_list, osg_series_list, @@ -55,17 +61,29 @@ def main(image_dirs): # structures in the matrix construction and it offers: # 1. Simplicity: Using a single string to represent configurations is straightforward and easy to understand. # 2. Integration: A single string is easily passed to external tools and systems that manage builds. - configuration_string = f"{base_os}-{osg_series}-{base_repo}-{config['standard_build']}-{config['repo_build']}" - include_list.append({"name": image_name, "config": configuration_string}) + include_list.append({ + "name": image_name, + "base_os": base_os, + "osg_series": osg_series, + "base_repo": base_repo, + "tag_override": tag_override, + "standard_build": config['standard_build'], + "repo_build": config['repo_build'], + "context": os.path.join(image_dir, build_context), + "upstream": upstream_repo, + "upstream_ref": upstream_ref + }) sys.stdout.flush() - json_output = json.dumps({"include": include_list}, indent=4) - print(json_output) + # Write the include list to GITHUB_OUTPUT + with open(GITHUB_OUTPUT, 'a') as github_output: + github_output.writelines([ + f"matrix={json.dumps(include_list)}\n" + ]) + print(include_list) if __name__ == "__main__": - if len(sys.argv) < 2: - sys.exit(f"Usage: {sys.argv[0]} ") - + # If run with zero arguments, output an empty list image_dirs = sys.argv[1:] main(image_dirs) diff --git a/scripts/detect-changed-images.py b/scripts/detect-changed-images.py new file mode 100644 index 00000000..c20ce0da --- /dev/null +++ b/scripts/detect-changed-images.py @@ -0,0 +1,85 @@ +""" +Util to output the list of image directories that have changed between origin/main +and the current commit into the `images` and `image_list` gha output vars +""" +import git +from pathlib import Path +import os +import argparse +import json + + +ORG_DIRS = ['opensciencegrid', 'iris-hep'] + +parser = argparse.ArgumentParser() +parser.add_argument('--before', nargs='?', help='SHA of the previous commit to compare against') +args = parser.parse_args() + + +GITHUB_EVENT_NAME = os.environ['GITHUB_EVENT_NAME'] +GITHUB_SHA = os.environ['GITHUB_SHA'] +GITHUB_REF = os.environ['GITHUB_REF'] +GITHUB_BASE_REF = os.environ['GITHUB_BASE_REF'] +GITHUB_OUTPUT = os.environ['GITHUB_OUTPUT'] + + +def _image_from_path(path_str: str): + """ + Extract the image name (eg. osg-htc/frontier-squid) from a file that changed in a subdirectory + (eg. osg-htc/frontier-squid/build/build.sh) + """ + return Path(*Path(path_str).parts[:2]) + +def get_updated_images(): + """ + Get the list of files changed based on the type of event + kicking off the GHA: + 1. For the main branch, diff the previous state of main vs + the current commit + 2. For other branches (i.e., on someone's fork), diff main + vs the current commit + 3. For PRs, diff the base ref vs the current commit + 4. For everything else (e.g., dispatches), build all images + """ + + updated_images: list[str] = [] + repo = git.Repo('.') + for org_dir in ORG_DIRS: + if GITHUB_EVENT_NAME in ['pull_request', 'push']: + base : str = 'origin/main' + if GITHUB_EVENT_NAME == 'pull_request': + base = repo.merge_base(f'origin/{GITHUB_BASE_REF}', 'HEAD')[0].hexsha + elif GITHUB_REF == 'refs/heads/main': + base = args.before + current_commit = repo.commit(GITHUB_SHA) + + diff_paths = {f"{_image_from_path(d.a_path)}" for d in current_commit.diff(base) if d.a_path.startswith(org_dir)} + # Only interested in the top two path entries + updated_images += diff_paths + + else: + # List all image root dirs. Example value: + # "opensciencegrid/vo-frontend opensciencegrid/ospool-cm" + updated_images += [f"{p}" for p in Path(org_dir).iterdir() if p.is_dir()] + + return updated_images + + +def set_image_list_output(updated_images: list[str]): + """ + Write the list of updated images, in both JSON and space-separated + """ + with open(GITHUB_OUTPUT, 'a') as github_output: + github_output.writelines([ + f"images={' '.join(updated_images)}\n", + f"image_list={json.dumps(updated_images)}\n", + ]) + + # Leave a file containing the image list as debug output + with open('image_list.json', 'w') as image_list: + image_list.write(json.dumps(updated_images)) + + +if __name__ == '__main__': + updated_images = get_updated_images() + set_image_list_output(updated_images) From 17983f7939794673405afa95ed0e7ec1fa45e1e4 Mon Sep 17 00:00:00 2001 From: Matthew Westphall Date: Wed, 30 Jul 2025 13:39:38 -0500 Subject: [PATCH 2/4] Fix issue with empty build matrix --- scripts/build-job-matrix.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/build-job-matrix.py b/scripts/build-job-matrix.py index a920f1c0..7cc6bb5e 100644 --- a/scripts/build-job-matrix.py +++ b/scripts/build-job-matrix.py @@ -75,6 +75,11 @@ def main(image_dirs): }) sys.stdout.flush() + + if not include_list: + # Add a dummy value to the matrix so later GHA steps don't auto-error + include_list.append('dummy') + # Write the include list to GITHUB_OUTPUT with open(GITHUB_OUTPUT, 'a') as github_output: github_output.writelines([ From df203f2c49da7b2cba8b4b7741eb0f6959828589 Mon Sep 17 00:00:00 2001 From: Matthew Westphall Date: Mon, 4 Aug 2025 16:15:40 -0500 Subject: [PATCH 3/4] Use just build-container-action instead of both build and push --- .github/workflows/build-containers.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index efdeffdd..8eed8a87 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -87,7 +87,7 @@ jobs: if [ -n "${{ matrix.tag_override }}" ] ; then echo "IMAGE_NAME=${ORGANIZATION}/${{ matrix.name }}:${{ matrix.tag_override }}" >> $GITHUB_ENV else - echo "IMAGE_NAME=${ORGANIZATION}/${{ matrix.name }}:${OSG_SERIES}-${BASE_OS}-${BASE_REPO}" >> $GITHUB_ENV + echo "IMAGE_NAME=${ORGANIZATION}/${{ matrix.name }}:${OSG_SERIES}-${BASE_REPO}" >> $GITHUB_ENV fi - name: Pull External Repo @@ -115,14 +115,15 @@ jobs: github.event_name != 'pull_request' && github.repository_owner == 'opensciencegrid' && needs.build-image-list.outputs.image_list != '["dummy"]' - uses: opensciencegrid/push-container-action@v0.8.1 + uses: opensciencegrid/build-container-action@v0.7.1 with: clean_before_build: false - repo: ${{ matrix.base_repo }} + push_image: true osg_series: ${{ matrix.osg_series }} + osg_repo: ${{ matrix.base_repo }} context: ${{ matrix.context }} base_os: ${{ matrix.base_os }} - image_name: ${{ env.IMAGE_NAME }} + output_image: ${{ env.IMAGE_NAME }} registry_url: hub.opensciencegrid.org registry_user: ${{ secrets.OSG_HARBOR_ROBOT_USER }} registry_pass: ${{ secrets.OSG_HARBOR_ROBOT_PASSWORD }} @@ -134,14 +135,15 @@ jobs: github.event_name != 'pull_request' && github.repository_owner == 'opensciencegrid' && needs.build-image-list.outputs.image_list != '["dummy"]' - uses: opensciencegrid/push-container-action@v0.8.1 + uses: opensciencegrid/build-container-action@v0.7.1 with: clean_before_build: false - repo: ${{ matrix.base_repo }} + push_image: true osg_series: ${{ matrix.osg_series }} + osg_repo: ${{ matrix.base_repo }} context: ${{ matrix.context }} base_os: ${{ matrix.base_os }} - image_name: ${{ env.IMAGE_NAME }} + output_image: ${{ env.IMAGE_NAME }} registry_url: docker.io registry_user: ${{ secrets.DOCKER_USERNAME }} registry_pass: ${{ secrets.DOCKER_PASSWORD }} From 5100966938367b36033715a7666044d05c187217 Mon Sep 17 00:00:00 2001 From: Matthew Westphall Date: Mon, 4 Aug 2025 16:47:18 -0500 Subject: [PATCH 4/4] DEBUG: Attempt to push even though we're not in the main repo --- .github/workflows/build-containers.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index 8eed8a87..3e374368 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -113,8 +113,8 @@ jobs: if: >- github.ref == 'refs/heads/main' && github.event_name != 'pull_request' && - github.repository_owner == 'opensciencegrid' && needs.build-image-list.outputs.image_list != '["dummy"]' + #github.repository_owner == 'opensciencegrid' && uses: opensciencegrid/build-container-action@v0.7.1 with: clean_before_build: false @@ -133,8 +133,8 @@ jobs: startsWith(env.IMAGE_NAME, 'opensciencegrid') && github.ref == 'refs/heads/main' && github.event_name != 'pull_request' && - github.repository_owner == 'opensciencegrid' && needs.build-image-list.outputs.image_list != '["dummy"]' + #github.repository_owner == 'opensciencegrid' && uses: opensciencegrid/build-container-action@v0.7.1 with: clean_before_build: false