From 517a44a8a5b4125ec89748846bf85e157cf2eaaf Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Sat, 1 Jun 2024 10:38:13 -0400 Subject: [PATCH 1/8] Refactor GitHub actions into standalone scripts --- .../swell-tier1_application_discover.yml | 84 +------------------ scripts/install-swell.sh | 13 +++ scripts/run-swell-3dvar.sh | 25 ++++++ scripts/run-swell-hofx.sh | 25 ++++++ scripts/run-swell-ufo_testing.sh | 27 ++++++ 5 files changed, 94 insertions(+), 80 deletions(-) create mode 100644 scripts/install-swell.sh create mode 100644 scripts/run-swell-3dvar.sh create mode 100644 scripts/run-swell-hofx.sh create mode 100644 scripts/run-swell-ufo_testing.sh diff --git a/.github/workflows/swell-tier1_application_discover.yml b/.github/workflows/swell-tier1_application_discover.yml index dd3cdf9..8899a25 100644 --- a/.github/workflows/swell-tier1_application_discover.yml +++ b/.github/workflows/swell-tier1_application_discover.yml @@ -24,14 +24,7 @@ jobs: uses: actions/checkout@v3 - name: install-swell - run: | - # Make experiment directory - mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} - # Copy and source modules - cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/ - source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - pip install --prefix=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE} - # Remove source code (needed to ensure nothing relies on the source) + run: ./scripts/install-swell.sh # Run ufo_testing workflow # ------------------------ @@ -44,30 +37,7 @@ jobs: steps: - name: run-swell-ufo_testing - run: | - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} - SUITE_NAME=ufo_testing - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} - EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - - mkdir -p $CI_WORKSPACE_JOB - - source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - - # Get python version - PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - - export PATH=$CI_WORKSPACE/swell/bin:$PATH - export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - - echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - - cd $CI_WORKSPACE_JOB - swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} + run: ./scripts/run-swell-ufo_testing.sh # Move experiment directory on failure swell-tier_1-ufo_testing-failure: @@ -96,30 +66,7 @@ jobs: steps: - name: run-swell-hofx - run: | - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} - SUITE_NAME=hofx - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} - EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - - mkdir -p $CI_WORKSPACE_JOB - - source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - - # Get python version - PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - - export PATH=$CI_WORKSPACE/swell/bin:$PATH - export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - - echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - - cd $CI_WORKSPACE_JOB - swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} + run: ./scripts/run-swell-hofx.sh # Move experiment directory on failure swell-tier_1-hofx-failure: @@ -147,30 +94,7 @@ jobs: steps: - name: run-swell-3dvar - run: | - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} - SUITE_NAME=3dvar - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} - EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - - mkdir -p $CI_WORKSPACE_JOB - - source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - - # Get python version - PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - - export PATH=$CI_WORKSPACE/swell/bin:$PATH - export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - - echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - - cd $CI_WORKSPACE_JOB - swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} + run: ./scripts/run-swell-3dvar.sh # Move experiment directory on failure swell-tier_1-3dvar-failure: diff --git a/scripts/install-swell.sh b/scripts/install-swell.sh new file mode 100644 index 0000000..d3bb6a8 --- /dev/null +++ b/scripts/install-swell.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +# Make experiment directory +mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} + +# Copy and source modules +cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/ +source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules +pip install --prefix=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE} + +# Remove source code (needed to ensure nothing relies on the source) diff --git a/scripts/run-swell-3dvar.sh b/scripts/run-swell-3dvar.sh new file mode 100644 index 0000000..10d7d2b --- /dev/null +++ b/scripts/run-swell-3dvar.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} +SUITE_NAME=3dvar +CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} +EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} + +mkdir -p $CI_WORKSPACE_JOB + +source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules + +# Get python version +PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` + +export PATH=$CI_WORKSPACE/swell/bin:$PATH +export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages + +echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml +echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + +rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite + +cd $CI_WORKSPACE_JOB +swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml +swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} diff --git a/scripts/run-swell-hofx.sh b/scripts/run-swell-hofx.sh new file mode 100644 index 0000000..e37cc0f --- /dev/null +++ b/scripts/run-swell-hofx.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} +SUITE_NAME=hofx +CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} +EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} + +mkdir -p $CI_WORKSPACE_JOB + +source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules + +# Get python version +PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` + +export PATH=$CI_WORKSPACE/swell/bin:$PATH +export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages + +echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml +echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + +rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite + +cd $CI_WORKSPACE_JOB +swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml +swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} diff --git a/scripts/run-swell-ufo_testing.sh b/scripts/run-swell-ufo_testing.sh new file mode 100644 index 0000000..e6c7c83 --- /dev/null +++ b/scripts/run-swell-ufo_testing.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} +SUITE_NAME=ufo_testing +CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} +EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} + +mkdir -p $CI_WORKSPACE_JOB + +source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules + +# Get python version +PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` + +export PATH=$CI_WORKSPACE/swell/bin:$PATH +export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages + +echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml +echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + +rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite + +cd $CI_WORKSPACE_JOB +swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml +swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} From b5a7252791f43a527d54d11f19b02125a424dd12 Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Sun, 2 Jun 2024 15:30:17 -0400 Subject: [PATCH 2/8] Add utils script for setting GH variables --- scripts/install-swell.sh | 3 +++ scripts/run-swell-3dvar.sh | 5 +++++ scripts/run-swell-hofx.sh | 5 +++++ scripts/run-swell-ufo_testing.sh | 3 +++ scripts/utils.sh | 18 ++++++++++++++++++ 5 files changed, 34 insertions(+) create mode 100644 scripts/utils.sh diff --git a/scripts/install-swell.sh b/scripts/install-swell.sh index d3bb6a8..776a0f8 100644 --- a/scripts/install-swell.sh +++ b/scripts/install-swell.sh @@ -2,6 +2,9 @@ set -euxo pipefail +source scripts/utils.sh +github_variables + # Make experiment directory mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} diff --git a/scripts/run-swell-3dvar.sh b/scripts/run-swell-3dvar.sh index 10d7d2b..ba34f12 100644 --- a/scripts/run-swell-3dvar.sh +++ b/scripts/run-swell-3dvar.sh @@ -1,5 +1,10 @@ #!/usr/bin/env bash +set -euxo pipefail + +source scripts/utils.sh +github_variables + CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} SUITE_NAME=3dvar CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} diff --git a/scripts/run-swell-hofx.sh b/scripts/run-swell-hofx.sh index e37cc0f..1c93f4d 100644 --- a/scripts/run-swell-hofx.sh +++ b/scripts/run-swell-hofx.sh @@ -1,5 +1,10 @@ #!/usr/bin/env bash +set -euxo pipefail + +source scripts/utils.sh +github_variables + CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} SUITE_NAME=hofx CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} diff --git a/scripts/run-swell-ufo_testing.sh b/scripts/run-swell-ufo_testing.sh index e6c7c83..a622068 100644 --- a/scripts/run-swell-ufo_testing.sh +++ b/scripts/run-swell-ufo_testing.sh @@ -2,6 +2,9 @@ set -euxo pipefail +source scripts/utils.sh +github_variables + CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} SUITE_NAME=ufo_testing CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} diff --git a/scripts/utils.sh b/scripts/utils.sh new file mode 100644 index 0000000..da125df --- /dev/null +++ b/scripts/utils.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +github_run_id() { + # Set default value to `LOCAL_$(uuidgen) if unset` + : "${GITHUB_RUN_ID:=LOCAL_$RANDOM}" + export $GITHUB_RUN_ID +} + +github_workspace() { + # Set default value to current working directory + : "${GITHUB_WORKSPACE:=$PWD}" + export $GITHUB_WORKSPACE +} + +github_variables() { + github_run_id + github_workspace +} From 6e5b7b6da4a10c710a0a14a8747036413ae2fa8f Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Sun, 2 Jun 2024 15:36:07 -0400 Subject: [PATCH 3/8] Call all SWELL suites via `run-swell-suite` script --- .../swell-tier1_application_discover.yml | 8 ++--- scripts/run-swell-hofx.sh | 30 ------------------- ...{run-swell-3dvar.sh => run-swell-suite.sh} | 16 +++++++++- scripts/run-swell-ufo_testing.sh | 30 ------------------- 4 files changed, 19 insertions(+), 65 deletions(-) delete mode 100644 scripts/run-swell-hofx.sh rename scripts/{run-swell-3dvar.sh => run-swell-suite.sh} (76%) delete mode 100644 scripts/run-swell-ufo_testing.sh diff --git a/.github/workflows/swell-tier1_application_discover.yml b/.github/workflows/swell-tier1_application_discover.yml index 8899a25..a293c79 100644 --- a/.github/workflows/swell-tier1_application_discover.yml +++ b/.github/workflows/swell-tier1_application_discover.yml @@ -24,7 +24,7 @@ jobs: uses: actions/checkout@v3 - name: install-swell - run: ./scripts/install-swell.sh + run: bash scripts/install-swell.sh # Run ufo_testing workflow # ------------------------ @@ -37,7 +37,7 @@ jobs: steps: - name: run-swell-ufo_testing - run: ./scripts/run-swell-ufo_testing.sh + run: bash scripts/run-swell-suite.sh ufo_testing # Move experiment directory on failure swell-tier_1-ufo_testing-failure: @@ -66,7 +66,7 @@ jobs: steps: - name: run-swell-hofx - run: ./scripts/run-swell-hofx.sh + run: scripts/run-swell-suite.sh hofx # Move experiment directory on failure swell-tier_1-hofx-failure: @@ -94,7 +94,7 @@ jobs: steps: - name: run-swell-3dvar - run: ./scripts/run-swell-3dvar.sh + run: bash scripts/run-swell-suite.sh 3dvar # Move experiment directory on failure swell-tier_1-3dvar-failure: diff --git a/scripts/run-swell-hofx.sh b/scripts/run-swell-hofx.sh deleted file mode 100644 index 1c93f4d..0000000 --- a/scripts/run-swell-hofx.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -set -euxo pipefail - -source scripts/utils.sh -github_variables - -CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} -SUITE_NAME=hofx -CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} -EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - -mkdir -p $CI_WORKSPACE_JOB - -source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - -# Get python version -PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - -export PATH=$CI_WORKSPACE/swell/bin:$PATH -export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - -echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml -echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - -rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - -cd $CI_WORKSPACE_JOB -swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml -swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} diff --git a/scripts/run-swell-3dvar.sh b/scripts/run-swell-suite.sh similarity index 76% rename from scripts/run-swell-3dvar.sh rename to scripts/run-swell-suite.sh index ba34f12..5d05da3 100644 --- a/scripts/run-swell-3dvar.sh +++ b/scripts/run-swell-suite.sh @@ -2,14 +2,26 @@ set -euxo pipefail +SUITE_NAME="$1" + +if [[ -z "$SUITE_NAME" ]]; then + echo "Variable SUITE_NAME is unset." + exit 1 +fi + source scripts/utils.sh github_variables CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} -SUITE_NAME=3dvar CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} +echo "----------------------------------------" +echo "CI_WORKSPACE=${CI_WORKSPACE}" +echo "CI_WORKSPACE_JOB=${CI_WORKSPACE}" +echo "EXPERIMENT_ID=${CI_WORKSPACE}" +echo "----------------------------------------" + mkdir -p $CI_WORKSPACE_JOB source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules @@ -20,6 +32,8 @@ PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` export PATH=$CI_WORKSPACE/swell/bin:$PATH export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages +echo "PYTHONPATH=${PYTHONPATH}" + echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml diff --git a/scripts/run-swell-ufo_testing.sh b/scripts/run-swell-ufo_testing.sh deleted file mode 100644 index a622068..0000000 --- a/scripts/run-swell-ufo_testing.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -set -euxo pipefail - -source scripts/utils.sh -github_variables - -CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} -SUITE_NAME=ufo_testing -CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} -EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - -mkdir -p $CI_WORKSPACE_JOB - -source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - -# Get python version -PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - -export PATH=$CI_WORKSPACE/swell/bin:$PATH -export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - -echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml -echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - -rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - -cd $CI_WORKSPACE_JOB -swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml -swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} From dd2f6114f7893482569f953e2a6f6dbab4069e1d Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Tue, 4 Jun 2024 13:47:47 -0400 Subject: [PATCH 4/8] Bugfix export statements --- scripts/utils.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/utils.sh b/scripts/utils.sh index da125df..89df6e0 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -3,13 +3,13 @@ github_run_id() { # Set default value to `LOCAL_$(uuidgen) if unset` : "${GITHUB_RUN_ID:=LOCAL_$RANDOM}" - export $GITHUB_RUN_ID + export GITHUB_RUN_ID } github_workspace() { # Set default value to current working directory : "${GITHUB_WORKSPACE:=$PWD}" - export $GITHUB_WORKSPACE + export GITHUB_WORKSPACE } github_variables() { From dcd41738b37e215684ea4652bb871076cad044b5 Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Wed, 5 Jun 2024 09:21:33 -0400 Subject: [PATCH 5/8] Refactor to use reusable workflows + matrix --- .github/workflows/run-swell-suite.yml | 27 +++++ .github/workflows/setup-swell.yml | 20 ++++ .../swell-tier1_application_discover.yml | 110 ++---------------- 3 files changed, 58 insertions(+), 99 deletions(-) create mode 100644 .github/workflows/run-swell-suite.yml create mode 100644 .github/workflows/setup-swell.yml diff --git a/.github/workflows/run-swell-suite.yml b/.github/workflows/run-swell-suite.yml new file mode 100644 index 0000000..0a56408 --- /dev/null +++ b/.github/workflows/run-swell-suite.yml @@ -0,0 +1,27 @@ +name: Run a Swell suite + +on: + workflow_call: + inputs: + suite: + required: true + type: string + +defaults: + run: + shell: bash + +jobs: + run-swell-suite: + runs-on: nccs-discover + timeout-minutes: 600 + steps: + - name: run-swell-${{ inputs.suite }} + run: bash scripts/run-swell-suite.sh ${{ inputs.suite }} + + - name: Fail hold for ${{ inputs.suite }} + if: failure() + run: | + SUITE_NAME=${{ inputs.suite }} + CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} + mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED diff --git a/.github/workflows/setup-swell.yml b/.github/workflows/setup-swell.yml new file mode 100644 index 0000000..87e78ba --- /dev/null +++ b/.github/workflows/setup-swell.yml @@ -0,0 +1,20 @@ +name: Set up Swell + +on: workflow_call + +jobs: + swell-tier_1-setup: + + runs-on: nccs-discover + timeout-minutes: 30 + + steps: + - name: validate-workflow + run: | + /home/jardizzo/bin/nams_check.py ${{ github.triggering_actor }} swell + + - name: acquire-swell + uses: actions/checkout@v3 + + - name: install-swell + run: bash scripts/install-swell.sh diff --git a/.github/workflows/swell-tier1_application_discover.yml b/.github/workflows/swell-tier1_application_discover.yml index a293c79..b81f742 100644 --- a/.github/workflows/swell-tier1_application_discover.yml +++ b/.github/workflows/swell-tier1_application_discover.yml @@ -11,105 +11,17 @@ jobs: # Initialization needed by all the workflows # ------------------------------------------ swell-tier_1-setup: + uses: ./.github/workflows/setup-swell.yml - runs-on: nccs-discover - timeout-minutes: 30 - - steps: - - name: validate-workflow - run: | - /home/jardizzo/bin/nams_check.py ${{ github.triggering_actor }} swell - - - name: acquire-swell - uses: actions/checkout@v3 - - - name: install-swell - run: bash scripts/install-swell.sh - - # Run ufo_testing workflow - # ------------------------ - swell-tier_1-ufo_testing: - - runs-on: nccs-discover - timeout-minutes: 600 - needs: swell-tier_1-setup - + tier_1_matrix: + strategy: + matrix: + suite: ["ufo_testing" "hofx" "3dvar"] steps: - - - name: run-swell-ufo_testing - run: bash scripts/run-swell-suite.sh ufo_testing - - # Move experiment directory on failure - swell-tier_1-ufo_testing-failure: - - runs-on: nccs-discover - timeout-minutes: 30 - needs: swell-tier_1-ufo_testing - if: failure() - - steps: - - name: Fail hold for ufo_testing - run: | - SUITE_NAME=ufo_testing - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} - mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED - - name: Copy cylc Logs - - # Run hofx workflow - # ----------------- - swell-tier_1-hofx: - - runs-on: nccs-discover - timeout-minutes: 600 - needs: swell-tier_1-setup - - steps: - - - name: run-swell-hofx - run: scripts/run-swell-suite.sh hofx - - # Move experiment directory on failure - swell-tier_1-hofx-failure: - - runs-on: nccs-discover - timeout-minutes: 30 - needs: swell-tier_1-hofx - if: failure() - - steps: - - name: Fail hold for hofx - run: | - SUITE_NAME=hofx - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} - mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED - - # Run 3dvar workflow - # ----------------- - swell-tier_1-3dvar: - - runs-on: nccs-discover - timeout-minutes: 600 - needs: swell-tier_1-setup - - steps: - - - name: run-swell-3dvar - run: bash scripts/run-swell-suite.sh 3dvar - - # Move experiment directory on failure - swell-tier_1-3dvar-failure: - - runs-on: nccs-discover - timeout-minutes: 30 - needs: swell-tier_1-3dvar - if: failure() - - steps: - - name: Fail hold for 3dvar - run: | - SUITE_NAME=3dvar - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} - mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED + - uses: ./.github/workflows/run-swell-suite.yml + needs: swell-tier_1-setup + with: + suite: ${{ matrix.suite }} # Perform all the clean up # ------------------------ @@ -118,7 +30,7 @@ jobs: runs-on: nccs-discover timeout-minutes: 30 - needs: [swell-tier_1-ufo_testing, swell-tier_1-hofx] + needs: tier_1_matrix steps: @@ -130,7 +42,7 @@ jobs: runs-on: nccs-discover timeout-minutes: 30 - needs: [swell-tier_1-ufo_testing, swell-tier_1-hofx] + needs: tier_1_matrix if: always() # Always run the clean up, even if failed or cancelled steps: From 058bf53e98e9ebfd44bad73c6af735c735981c1d Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Wed, 5 Jun 2024 09:52:12 -0400 Subject: [PATCH 6/8] Remove utils.sh script Hard to manage paths. --- scripts/install-swell.sh | 9 +++++++-- scripts/run-swell-suite.sh | 9 +++++++-- scripts/utils.sh | 18 ------------------ 3 files changed, 14 insertions(+), 22 deletions(-) delete mode 100644 scripts/utils.sh diff --git a/scripts/install-swell.sh b/scripts/install-swell.sh index 776a0f8..3d83c74 100644 --- a/scripts/install-swell.sh +++ b/scripts/install-swell.sh @@ -2,8 +2,13 @@ set -euxo pipefail -source scripts/utils.sh -github_variables +# Set default value to `LOCAL_$(RANDOM) if unset` +: "${GITHUB_RUN_ID:=LOCAL_$RANDOM}" +export GITHUB_RUN_ID + +# Set default value to current working directory +: "${GITHUB_WORKSPACE:=$PWD}" +export GITHUB_WORKSPACE # Make experiment directory mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} diff --git a/scripts/run-swell-suite.sh b/scripts/run-swell-suite.sh index 5d05da3..132f55e 100644 --- a/scripts/run-swell-suite.sh +++ b/scripts/run-swell-suite.sh @@ -9,8 +9,13 @@ if [[ -z "$SUITE_NAME" ]]; then exit 1 fi -source scripts/utils.sh -github_variables +# Set default value to `LOCAL_$(RANDOM) if unset` +: "${GITHUB_RUN_ID:=LOCAL_$RANDOM}" +export GITHUB_RUN_ID + +# Set default value to current working directory +: "${GITHUB_WORKSPACE:=$PWD}" +export GITHUB_WORKSPACE CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} diff --git a/scripts/utils.sh b/scripts/utils.sh deleted file mode 100644 index 89df6e0..0000000 --- a/scripts/utils.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -github_run_id() { - # Set default value to `LOCAL_$(uuidgen) if unset` - : "${GITHUB_RUN_ID:=LOCAL_$RANDOM}" - export GITHUB_RUN_ID -} - -github_workspace() { - # Set default value to current working directory - : "${GITHUB_WORKSPACE:=$PWD}" - export GITHUB_WORKSPACE -} - -github_variables() { - github_run_id - github_workspace -} From 4dd2ae66719f4bd5f63e1a0e6fa0594dcee6ba07 Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Wed, 5 Jun 2024 10:00:04 -0400 Subject: [PATCH 7/8] Put scripts back into workflow YAML --- .github/workflows/run-swell-suite.yml | 26 +++++++++++++- .github/workflows/setup-swell.yml | 9 ++++- scripts/install-swell.sh | 21 ------------ scripts/run-swell-suite.sh | 49 --------------------------- 4 files changed, 33 insertions(+), 72 deletions(-) delete mode 100644 scripts/install-swell.sh delete mode 100644 scripts/run-swell-suite.sh diff --git a/.github/workflows/run-swell-suite.yml b/.github/workflows/run-swell-suite.yml index 0a56408..8c89f23 100644 --- a/.github/workflows/run-swell-suite.yml +++ b/.github/workflows/run-swell-suite.yml @@ -17,7 +17,31 @@ jobs: timeout-minutes: 600 steps: - name: run-swell-${{ inputs.suite }} - run: bash scripts/run-swell-suite.sh ${{ inputs.suite }} + run: | + SUITE_NAME=${{ inputs.suite }} + CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} + CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} + EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} + mkdir -p $CI_WORKSPACE_JOB + + source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules + + # Get python version + PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` + + export PATH=$CI_WORKSPACE/swell/bin:$PATH + export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages + + echo "PYTHONPATH=${PYTHONPATH}" + + echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + + rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite + + cd $CI_WORKSPACE_JOB + swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} - name: Fail hold for ${{ inputs.suite }} if: failure() diff --git a/.github/workflows/setup-swell.yml b/.github/workflows/setup-swell.yml index 87e78ba..9ce4236 100644 --- a/.github/workflows/setup-swell.yml +++ b/.github/workflows/setup-swell.yml @@ -17,4 +17,11 @@ jobs: uses: actions/checkout@v3 - name: install-swell - run: bash scripts/install-swell.sh + run: | + # Make experiment directory + mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} + + # Copy and source modules + cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/ + source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules + pip install --prefix=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE} diff --git a/scripts/install-swell.sh b/scripts/install-swell.sh deleted file mode 100644 index 3d83c74..0000000 --- a/scripts/install-swell.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -set -euxo pipefail - -# Set default value to `LOCAL_$(RANDOM) if unset` -: "${GITHUB_RUN_ID:=LOCAL_$RANDOM}" -export GITHUB_RUN_ID - -# Set default value to current working directory -: "${GITHUB_WORKSPACE:=$PWD}" -export GITHUB_WORKSPACE - -# Make experiment directory -mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} - -# Copy and source modules -cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/ -source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules -pip install --prefix=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE} - -# Remove source code (needed to ensure nothing relies on the source) diff --git a/scripts/run-swell-suite.sh b/scripts/run-swell-suite.sh deleted file mode 100644 index 132f55e..0000000 --- a/scripts/run-swell-suite.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env bash - -set -euxo pipefail - -SUITE_NAME="$1" - -if [[ -z "$SUITE_NAME" ]]; then - echo "Variable SUITE_NAME is unset." - exit 1 -fi - -# Set default value to `LOCAL_$(RANDOM) if unset` -: "${GITHUB_RUN_ID:=LOCAL_$RANDOM}" -export GITHUB_RUN_ID - -# Set default value to current working directory -: "${GITHUB_WORKSPACE:=$PWD}" -export GITHUB_WORKSPACE - -CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} -CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} -EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - -echo "----------------------------------------" -echo "CI_WORKSPACE=${CI_WORKSPACE}" -echo "CI_WORKSPACE_JOB=${CI_WORKSPACE}" -echo "EXPERIMENT_ID=${CI_WORKSPACE}" -echo "----------------------------------------" - -mkdir -p $CI_WORKSPACE_JOB - -source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - -# Get python version -PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - -export PATH=$CI_WORKSPACE/swell/bin:$PATH -export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - -echo "PYTHONPATH=${PYTHONPATH}" - -echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml -echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - -rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - -cd $CI_WORKSPACE_JOB -swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml -swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} From b03d569b23ee2a5333b37ddf3fe0ecc6ea72ed25 Mon Sep 17 00:00:00 2001 From: Alexey Shiklomanov Date: Wed, 5 Jun 2024 11:01:14 -0400 Subject: [PATCH 8/8] Refactor Tier 2 tests to use reusable workflows --- .github/workflows/run-swell-suite.yml | 26 +- .github/workflows/setup-swell.yml | 23 +- .../swell-tier1_application_discover.yml | 3 + .../swell-tier2_application_discover.yml | 248 ++---------------- 4 files changed, 62 insertions(+), 238 deletions(-) diff --git a/.github/workflows/run-swell-suite.yml b/.github/workflows/run-swell-suite.yml index 8c89f23..7f361f2 100644 --- a/.github/workflows/run-swell-suite.yml +++ b/.github/workflows/run-swell-suite.yml @@ -4,6 +4,11 @@ on: workflow_call: inputs: suite: + description: 'SWELL suite to run (e.g., 3dvar, hofx, ufo_testing)' + required: true + type: string + tier: + description: 'Test tier (e.g., "tier1", "tier2")' required: true type: string @@ -19,12 +24,13 @@ jobs: - name: run-swell-${{ inputs.suite }} run: | SUITE_NAME=${{ inputs.suite }} - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} + CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID} + CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/${SUITE_NAME} EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} + mkdir -p $CI_WORKSPACE_JOB - source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules + source /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/modules # Get python version PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` @@ -36,6 +42,13 @@ jobs: echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + + # Point to the active build + JEDI_BUNDLE_DIR=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/build_jedi/jedi_bundle + if [[ ${{ inputs.tier }} == "tier2" && -d "${JEDI_BUNDLE_DIR}" ]]; then + echo "existing_jedi_source_directory: ${JEDI_BUNDLE_DIR}/source" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + echo "existing_jedi_build_directory: ${JEDI_BUNDLE_DIR}/build" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml + fi rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite @@ -43,9 +56,14 @@ jobs: swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} + if [[ ${{ inputs.tier }} == "tier2" && ${{ inputs.suite }} == "build_jedi" ]]; then + # Create symbolic link to build that does not involve $GITHUB_RUN_ID + ln -s ${CI_WORKSPACE_JOB}/${EXPERIMENT_ID}/jedi_bundle ${CI_WORKSPACE_JOB}/jedi_bundle + fi + - name: Fail hold for ${{ inputs.suite }} if: failure() run: | SUITE_NAME=${{ inputs.suite }} - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME} + CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/${SUITE_NAME} mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED diff --git a/.github/workflows/setup-swell.yml b/.github/workflows/setup-swell.yml index 9ce4236..312a079 100644 --- a/.github/workflows/setup-swell.yml +++ b/.github/workflows/setup-swell.yml @@ -1,6 +1,12 @@ name: Set up Swell -on: workflow_call +on: + workflow_call: + inputs: + tier: + description: 'Test tier (e.g., tier1, tier2)' + required: true + type: string jobs: swell-tier_1-setup: @@ -13,15 +19,22 @@ jobs: run: | /home/jardizzo/bin/nams_check.py ${{ github.triggering_actor }} swell + # Only one tier 2 run is allowed at a given time + - name: establish-workflow-status + if: ${{ inputs.tier == 'tier2' }} + run: | + if [ -f "/discover/nobackup/gmao_ci/swell/tier2/__running__" ]; then echo "Tier 2 is already running. Abort"; exit 1; fi + touch /discover/nobackup/gmao_ci/swell/tier2/__running__ + - name: acquire-swell uses: actions/checkout@v3 - name: install-swell run: | # Make experiment directory - mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID} + mkdir /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID} # Copy and source modules - cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/ - source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules - pip install --prefix=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE} + cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/ + source /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/modules + pip install --prefix=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE} diff --git a/.github/workflows/swell-tier1_application_discover.yml b/.github/workflows/swell-tier1_application_discover.yml index b81f742..4b2e3ee 100644 --- a/.github/workflows/swell-tier1_application_discover.yml +++ b/.github/workflows/swell-tier1_application_discover.yml @@ -12,6 +12,8 @@ jobs: # ------------------------------------------ swell-tier_1-setup: uses: ./.github/workflows/setup-swell.yml + with: + tier: "tier1" tier_1_matrix: strategy: @@ -21,6 +23,7 @@ jobs: - uses: ./.github/workflows/run-swell-suite.yml needs: swell-tier_1-setup with: + tier: "tier1" suite: ${{ matrix.suite }} # Perform all the clean up diff --git a/.github/workflows/swell-tier2_application_discover.yml b/.github/workflows/swell-tier2_application_discover.yml index e019b97..5263010 100644 --- a/.github/workflows/swell-tier2_application_discover.yml +++ b/.github/workflows/swell-tier2_application_discover.yml @@ -11,90 +11,19 @@ jobs: # Initialization needed by all the workflows # ------------------------------------------ swell-tier_2-setup: - - runs-on: nccs-discover - timeout-minutes: 30 - - steps: - - name: validate-workflow - run: | - /home/jardizzo/bin/nams_check.py ${{ github.triggering_actor }} swell - - # Only one tier 2 run is allowed at a given time - - name: establish-workflow-status - run: | - if [ -f "/discover/nobackup/gmao_ci/swell/tier2/__running__" ]; then echo "Tier 2 is already running. Abort"; exit 1; fi - touch /discover/nobackup/gmao_ci/swell/tier2/__running__ - - - name: acquire-swell - uses: actions/checkout@v3 - - - name: install-swell - run: | - # Make experiment directory - mkdir -p /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID} - # Copy and source modules - cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/ - source /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/modules - pip install --prefix=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE} - # Remove source code (needed to ensure nothing relies on the source) - + uses: ./.github/workflows/setup-swell.yml + with: + tier: "tier2" # -------------------------------------------- # STEP1: BUILD JEDI CODE FROM DEVELOP BRANCHES # -------------------------------------------- swell-tier_2-build_jedi: - - runs-on: nccs-discover - timeout-minutes: 600 needs: swell-tier_2-setup - - steps: - - - name: run-swell-build_jedi - run: | - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID} - SUITE_NAME=build_jedi - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - - mkdir -p $CI_WORKSPACE_JOB - - source /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/modules - - # Get python version - PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - - export PATH=$CI_WORKSPACE/swell/bin:$PATH - export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - - echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - - cd $CI_WORKSPACE_JOB - swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} - - # Create symbolic link to build that does not involve $GITHUB_RUN_ID - ln -s $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/jedi_bundle $CI_WORKSPACE_JOB/jedi_bundle - - # Move experiment directory on failure - swell-tier_2-build_jedi-failure: - - runs-on: nccs-discover - timeout-minutes: 30 - needs: swell-tier_2-build_jedi - if: failure() - - steps: - - name: Fail hold for build_jedi - run: | - SUITE_NAME=build_jedi - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED - + uses: ./.github/workflows/run-swell-suite.yml + with: + tier: "tier2" + suite: "build_jedi" # ---------------------------------------- # STEP2: RUN TESTING SUITES WITH NEW BUILD @@ -102,166 +31,27 @@ jobs: # Run ncdiag convesion suite swell-tier_2-convert_ncdiags: - - runs-on: nccs-discover - timeout-minutes: 600 needs: swell-tier_2-build_jedi - - steps: - - - name: run-swell-convert_ncdiags - run: | - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID} - SUITE_NAME=convert_ncdiags - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - - mkdir -p $CI_WORKSPACE_JOB - - source /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/modules - - # Get python version - PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - - export PATH=$CI_WORKSPACE/swell/bin:$PATH - export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - - echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - # Point to the active build - echo "existing_jedi_source_directory: /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/build_jedi/jedi_bundle/source" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "existing_jedi_build_directory: /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/build_jedi/jedi_bundle/build" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - - cd $CI_WORKSPACE_JOB - swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} - - # Move experiment directory on failure - swell-tier_2-convert_ncdiags-failure: - - runs-on: nccs-discover - timeout-minutes: 30 - needs: swell-tier_2-convert_ncdiags - if: failure() - - steps: - - name: Fail hold for convert_ncdiags - run: | - SUITE_NAME=convert_ncdiags - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED - + uses: ./.github/workflows/run-swell-suite.yml + with: + suite: "convert_ncdiags" + tier: "tier2" # Run ufo_testing suite swell-tier_2-ufo_testing: - - runs-on: nccs-discover - timeout-minutes: 600 needs: swell-tier_2-build_jedi - - steps: - - - name: run-swell-ufo_testing - run: | - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID} - SUITE_NAME=ufo_testing - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - - mkdir -p $CI_WORKSPACE_JOB - - source /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/modules - - # Get python version - PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - - export PATH=$CI_WORKSPACE/swell/bin:$PATH - export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - - echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - # Point to the active build - echo "existing_jedi_source_directory: /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/build_jedi/jedi_bundle/source" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "existing_jedi_build_directory: /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/build_jedi/jedi_bundle/build" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - - cd $CI_WORKSPACE_JOB - swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} - - # Move experiment directory on failure - swell-tier_2-ufo_testing-failure: - - runs-on: nccs-discover - timeout-minutes: 30 - needs: swell-tier_2-ufo_testing - if: failure() - - steps: - - name: Fail hold for ufo_testing - run: | - SUITE_NAME=ufo_testing - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED + uses: ./.github/workflows/run-swell-suite.yml + with: + suite: "ufo_testing" + tier: "tier2" # Run hofx suite swell-tier_2-hofx: - - runs-on: nccs-discover - timeout-minutes: 600 needs: swell-tier_2-build_jedi - - steps: - - - name: run-swell-hofx - run: | - CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID} - SUITE_NAME=hofx - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID} - - mkdir -p $CI_WORKSPACE_JOB - - source /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/modules - - # Get python version - PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'` - - export PATH=$CI_WORKSPACE/swell/bin:$PATH - export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages - - echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - # Point to the active build - echo "existing_jedi_source_directory: /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/build_jedi/jedi_bundle/source" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - echo "existing_jedi_build_directory: /discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/build_jedi/jedi_bundle/build" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - - rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite - - cd $CI_WORKSPACE_JOB - swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml - swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID} - - # Move experiment directory on failure - swell-tier_2-hofx-failure: - - runs-on: nccs-discover - timeout-minutes: 30 - needs: swell-tier_2-hofx - if: failure() - - steps: - - name: Fail hold for hofx - run: | - SUITE_NAME=hofx - CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier2/${GITHUB_RUN_ID}/${SUITE_NAME} - mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED + uses: ./.github/workflows/run-swell-suite.yml + with: + suite: "hofx" + tier: "tier2" # ------------------------------------------------------------- # STEP3: PERFORM UPDATES OF STABLE NIGHTLY POINTER AND CLEAN UP