From a2f97978b498b6eb3b25e2ca56041fdccdef4f48 Mon Sep 17 00:00:00 2001
From: kithumma <kiran.thumma@amd.com>
Date: Wed, 21 Jan 2026 04:11:28 +0000
Subject: [PATCH 1/5] workflow enhancement

---
 .github/workflows/enroot-tests.yml | 62 +++++++++++++++++++++++++++---
 1 file changed, 57 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/enroot-tests.yml b/.github/workflows/enroot-tests.yml
index 4ee2c96..2b1ae9e 100644
--- a/.github/workflows/enroot-tests.yml
+++ b/.github/workflows/enroot-tests.yml
@@ -1,6 +1,9 @@
 name: Enroot Tests
 
 on:
+  push:
+    branches:
+      - main
   workflow_dispatch:
     inputs:
       test_name:
@@ -10,6 +13,7 @@ on:
         options:
           - test_single_node_pytorch
           - test_multi_node_distributed_pytorch
+          - test_multi_node_rccl
       no_install:
         description: 'Skip installation (--no-install)'
         required: false
@@ -26,41 +30,89 @@ on:
         type: string
         default: ''
       testbed_file:
-        description: 'Path to testbed file (e.g. tests/enroot/testbeds/mi325.yaml)'
+        description: 'Path to testbed file (e.g. tests/enroot/testbeds/mi325.yaml) - defaults to secrets.TESTBED_FILE'
         required: false
         type: string
-        default: 'testbed/enroot_tb.yml'
+        default: ''
 
 
 jobs:
   run-enroot-tests:
     runs-on: enroot-runners
     timeout-minutes: 120
+    strategy:
+      matrix:
+        test_name: 
+          - test_single_node_pytorch
+          - test_multi_node_distributed_pytorch
+          - test_multi_node_rccl
+      max-parallel: 1  # Run tests sequentially
     
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
+        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
       
       - name: Set up Python
         uses: actions/setup-python@v5
+        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
         with:
           python-version: '3.8'
       
       - name: Install dependencies
+        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
         run: |
           python3 -m pip install --upgrade pip
           pip install -r tests/enroot/requirements.txt
       
       - name: Run enroot tests
+        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
         working-directory: tests/enroot
         run: |
-          python3 run_test.py "${{ inputs.test_name }}" "${{ inputs.docker_image }}" "${{ inputs.no_install }}" "${{ inputs.no_uninstall }}" "${{ inputs.testbed_file }}"
+          # Use matrix test_name for the test to run
+          TEST_NAME="${{ matrix.test_name }}"
+          
+          # Determine testbed file based on test type
+          if [ "${{ github.event_name }}" = "push" ]; then
+            # For push events: use test-type-specific secrets
+            if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
+              DEFAULT_TESTBED="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
+            else
+              DEFAULT_TESTBED="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
+            fi
+            DOCKER_IMAGE=""
+            NO_INSTALL="false"
+            NO_UNINSTALL="false"
+            TESTBED_FILE="$DEFAULT_TESTBED"
+          else
+            # For workflow_dispatch: allow input override, otherwise use test-type-specific secrets
+            if [ -n "${{ inputs.testbed_file }}" ]; then
+              TESTBED_FILE="${{ inputs.testbed_file }}"
+            else
+              if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
+                TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
+              else
+                # Both multi-node tests use MULTI_NODE_TESTBED_FILE
+                TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
+              fi
+            fi
+            DOCKER_IMAGE="${{ inputs.docker_image }}"
+            NO_INSTALL="${{ inputs.no_install }}"
+            NO_UNINSTALL="${{ inputs.no_uninstall }}"
+          fi
+          
+          # Run RCCL test differently (pytest directly with hardcoded flags)
+          if [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
+            python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE" -k test_multi_node_rccl --no-install --no-uninstall
+          else
+            python3 run_test.py "$TEST_NAME" "$DOCKER_IMAGE" "$NO_INSTALL" "$NO_UNINSTALL" "$TESTBED_FILE"
+          fi
       
       - name: Upload test results
-        if: always()
+        if: ${{ always() && ((github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name)) }}
         uses: actions/upload-artifact@v4
         with:
-          name: test-results-${{ inputs.test_name }}-${{ github.run_number }}
+          name: test-results-${{ matrix.test_name }}-${{ github.run_number }}
           path: tests/enroot/results/
           if-no-files-found: warn
           retention-days: 30

From a951bf35ac05b6fd9a98f6a7a82cf7f0d1c7a627 Mon Sep 17 00:00:00 2001
From: kithumma <kiran.thumma@amd.com>
Date: Thu, 22 Jan 2026 02:12:44 +0000
Subject: [PATCH 2/5] update PYTHONPATH

---
 .github/workflows/enroot-tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/enroot-tests.yml b/.github/workflows/enroot-tests.yml
index 2b1ae9e..52b0e93 100644
--- a/.github/workflows/enroot-tests.yml
+++ b/.github/workflows/enroot-tests.yml
@@ -103,6 +103,9 @@ jobs:
           
           # Run RCCL test differently (pytest directly with hardcoded flags)
           if [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
+            # Set PYTHONPATH and cd to testsuites directory for pytest
+            export PYTHONPATH=$(pwd):$PYTHONPATH
+            cd testsuites
             python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE" -k test_multi_node_rccl --no-install --no-uninstall
           else
             python3 run_test.py "$TEST_NAME" "$DOCKER_IMAGE" "$NO_INSTALL" "$NO_UNINSTALL" "$TESTBED_FILE"

From bba9f004e632577ad6e70818c01fcde50d47dced Mon Sep 17 00:00:00 2001
From: kithumma <kiran.thumma@amd.com>
Date: Thu, 22 Jan 2026 03:30:25 +0000
Subject: [PATCH 3/5] update all three tests and BASE_IMAGE options

---
 .github/workflows/enroot-tests.yml | 130 +++++++++++++++++++++++------
 1 file changed, 103 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/enroot-tests.yml b/.github/workflows/enroot-tests.yml
index 52b0e93..f620c33 100644
--- a/.github/workflows/enroot-tests.yml
+++ b/.github/workflows/enroot-tests.yml
@@ -6,14 +6,36 @@ on:
       - main
   workflow_dispatch:
     inputs:
-      test_name:
-        description: 'Select test to run'
-        required: true
-        type: choice
-        options:
-          - test_single_node_pytorch
-          - test_multi_node_distributed_pytorch
-          - test_multi_node_rccl
+      run_single_node_test:
+        description: 'Run single-node PyTorch test'
+        required: false
+        type: boolean
+        default: true
+      run_multi_node_test:
+        description: 'Run multi-node distributed PyTorch test'
+        required: false
+        type: boolean
+        default: true
+      run_rccl_test:
+        description: 'Run multi-node RCCL test'
+        required: false
+        type: boolean
+        default: true
+      base_image_single_node:
+        description: 'Docker image for single-node test (default: rocm/pytorch:latest from batch script)'
+        required: false
+        type: string
+        default: ''
+      base_image_multi_node:
+        description: 'Docker image for multi-node test (default: docker://rocm/pytorch:rocm7.0.2_ubuntu22.04_py3.10_pytorch_release_2.7.1 from batch script)'
+        required: false
+        type: string
+        default: ''
+      base_image_rccl:
+        description: 'Docker image for RCCL test (default: docker://rocm/roce-workload:ubuntu24_rocm-7.0.2_rccl-7.0.2_anp-v1.2.0_ainic-1.117.5-a-56 from batch script)'
+        required: false
+        type: string
+        default: ''
       no_install:
         description: 'Skip installation (--no-install)'
         required: false
@@ -24,11 +46,6 @@ on:
         required: false
         type: boolean
         default: false
-      docker_image:
-        description: 'Docker image to use (default: rocm/pytorch:latest for single-node, docker://rocm/pytorch:rocm7.0.2_ubuntu22.04_py3.10_pytorch_release_2.7.1 for multi-node)'
-        required: false
-        type: string
-        default: ''
       testbed_file:
         description: 'Path to testbed file (e.g. tests/enroot/testbeds/mi325.yaml) - defaults to secrets.TESTBED_FILE'
         required: false
@@ -51,68 +68,127 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
-        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
+        if: |
+          ${{
+            github.event_name == 'push' ||
+            (github.event_name == 'workflow_dispatch' && (
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+            ))
+          }}
       
       - name: Set up Python
         uses: actions/setup-python@v5
-        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
+        if: |
+          ${{
+            github.event_name == 'push' ||
+            (github.event_name == 'workflow_dispatch' && (
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+            ))
+          }}
         with:
           python-version: '3.8'
       
       - name: Install dependencies
-        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
+        if: |
+          ${{
+            github.event_name == 'push' ||
+            (github.event_name == 'workflow_dispatch' && (
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+            ))
+          }}
         run: |
           python3 -m pip install --upgrade pip
           pip install -r tests/enroot/requirements.txt
       
       - name: Run enroot tests
-        if: ${{ (github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name) }}
+        if: |
+          ${{
+            github.event_name == 'push' ||
+            (github.event_name == 'workflow_dispatch' && (
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+            ))
+          }}
         working-directory: tests/enroot
         run: |
           # Use matrix test_name for the test to run
           TEST_NAME="${{ matrix.test_name }}"
           
-          # Determine testbed file based on test type
+          # Determine testbed file and docker image based on test type and event
           if [ "${{ github.event_name }}" = "push" ]; then
-            # For push events: use test-type-specific secrets
+            # For push events: use test-type-specific secrets and default images from batch scripts
             if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
-              DEFAULT_TESTBED="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
+              TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
             else
-              DEFAULT_TESTBED="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
+              TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
             fi
             DOCKER_IMAGE=""
             NO_INSTALL="false"
             NO_UNINSTALL="false"
-            TESTBED_FILE="$DEFAULT_TESTBED"
           else
-            # For workflow_dispatch: allow input override, otherwise use test-type-specific secrets
+            # For workflow_dispatch: use inputs
             if [ -n "${{ inputs.testbed_file }}" ]; then
               TESTBED_FILE="${{ inputs.testbed_file }}"
             else
               if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
                 TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
               else
-                # Both multi-node tests use MULTI_NODE_TESTBED_FILE
                 TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
               fi
             fi
-            DOCKER_IMAGE="${{ inputs.docker_image }}"
             NO_INSTALL="${{ inputs.no_install }}"
             NO_UNINSTALL="${{ inputs.no_uninstall }}"
+            
+            # Set DOCKER_IMAGE based on test type
+            if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
+              DOCKER_IMAGE="${{ inputs.base_image_single_node }}"
+            elif [ "$TEST_NAME" = "test_multi_node_distributed_pytorch" ]; then
+              DOCKER_IMAGE="${{ inputs.base_image_multi_node }}"
+            elif [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
+              DOCKER_IMAGE="${{ inputs.base_image_rccl }}"
+            fi
           fi
           
-          # Run RCCL test differently (pytest directly with hardcoded flags)
+          # Run RCCL test differently (pytest directly)
           if [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
+            # For RCCL test: extract version tag from docker image if provided
+            if [ -n "$DOCKER_IMAGE" ]; then
+              # Extract version tag from full docker image path
+              # Example: docker://rocm/roce-workload:ubuntu24_rocm-7.0.2_rccl-7.0.2_anp-v1.2.0_ainic-1.117.5-a-56
+              # Result: ubuntu24_rocm-7.0.2_rccl-7.0.2_anp-v1.2.0_ainic-1.117.5-a-56
+              DOCKER_IMAGE_VERSION=$(echo "$DOCKER_IMAGE" | sed 's/.*://')
+              export DOCKER_IMAGE_VERSION
+              echo "Using RCCL Docker image version: $DOCKER_IMAGE_VERSION"
+            fi
+            
             # Set PYTHONPATH and cd to testsuites directory for pytest
             export PYTHONPATH=$(pwd):$PYTHONPATH
             cd testsuites
             python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE" -k test_multi_node_rccl --no-install --no-uninstall
           else
+            # For other tests: use run_test.py
             python3 run_test.py "$TEST_NAME" "$DOCKER_IMAGE" "$NO_INSTALL" "$NO_UNINSTALL" "$TESTBED_FILE"
           fi
       
       - name: Upload test results
-        if: ${{ always() && ((github.event_name == 'push' && matrix.test_name != 'test_multi_node_rccl') || (github.event_name == 'workflow_dispatch' && inputs.test_name == matrix.test_name)) }}
+        if: |
+          ${{
+            always() && (
+              github.event_name == 'push' ||
+              (github.event_name == 'workflow_dispatch' && (
+                (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
+                (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
+                (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+              ))
+            )
+          }}
         uses: actions/upload-artifact@v4
         with:
           name: test-results-${{ matrix.test_name }}-${{ github.run_number }}

From 7c30e6fa9042541e6647bf6151e824e6db5554b0 Mon Sep 17 00:00:00 2001
From: kithumma <kiran.thumma@amd.com>
Date: Thu, 22 Jan 2026 03:57:43 +0000
Subject: [PATCH 4/5] udpate test files

---
 .github/workflows/enroot-tests-local.yml | 240 +++++++++++++++++++++++
 .github/workflows/enroot-tests.yml       |  30 +--
 2 files changed, 255 insertions(+), 15 deletions(-)
 create mode 100644 .github/workflows/enroot-tests-local.yml

diff --git a/.github/workflows/enroot-tests-local.yml b/.github/workflows/enroot-tests-local.yml
new file mode 100644
index 0000000..08e2975
--- /dev/null
+++ b/.github/workflows/enroot-tests-local.yml
@@ -0,0 +1,240 @@
+# Local workflow for testing with `act`
+# Usage: See act commands at the bottom of this file
+name: Enroot Tests (Local)
+
+on:
+  workflow_dispatch:
+    inputs:
+      run_single_node_test:
+        description: 'Run single-node PyTorch test'
+        required: false
+        type: boolean
+        default: true
+      run_multi_node_test:
+        description: 'Run multi-node distributed PyTorch test'
+        required: false
+        type: boolean
+        default: true
+      run_rccl_test:
+        description: 'Run multi-node RCCL test'
+        required: false
+        type: boolean
+        default: true
+      base_image_single_node:
+        description: 'Docker image for single-node test'
+        required: false
+        type: string
+        default: ''
+      base_image_multi_node:
+        description: 'Docker image for multi-node test'
+        required: false
+        type: string
+        default: ''
+      base_image_rccl:
+        description: 'Docker image for RCCL test'
+        required: false
+        type: string
+        default: ''
+      no_install:
+        description: 'Skip installation (--no-install)'
+        required: false
+        type: boolean
+        default: false
+      no_uninstall:
+        description: 'Skip uninstallation (--no-uninstall)'
+        required: false
+        type: boolean
+        default: false
+      testbed_file:
+        description: 'Path to testbed file'
+        required: false
+        type: string
+        default: ''
+
+jobs:
+  run-enroot-tests:
+    # Use ubuntu-latest for act compatibility (or map enroot-runners with -P flag)
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    strategy:
+      matrix:
+        test_name: 
+          - test_single_node_pytorch
+          - test_multi_node_distributed_pytorch
+          - test_multi_node_rccl
+      max-parallel: 1
+    
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        if: |
+          ${{
+            (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
+          }}
+      
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        if: |
+          ${{
+            (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
+          }}
+        with:
+          python-version: '3.8'
+      
+      - name: Install dependencies
+        if: |
+          ${{
+            (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
+          }}
+        run: |
+          python3 -m pip install --upgrade pip
+          pip install -r tests/enroot/requirements.txt
+      
+      - name: Run enroot tests
+        if: |
+          ${{
+            (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+            (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
+          }}
+        working-directory: tests/enroot
+        run: |
+          TEST_NAME="${{ matrix.test_name }}"
+          
+          # Use input testbed_file or fall back to secrets
+          if [ -n "${{ inputs.testbed_file }}" ]; then
+            TESTBED_FILE="${{ inputs.testbed_file }}"
+          else
+            if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
+              TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
+            else
+              TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
+            fi
+          fi
+          
+          NO_INSTALL="${{ inputs.no_install }}"
+          NO_UNINSTALL="${{ inputs.no_uninstall }}"
+          
+          # Set DOCKER_IMAGE based on test type
+          if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
+            DOCKER_IMAGE="${{ inputs.base_image_single_node }}"
+          elif [ "$TEST_NAME" = "test_multi_node_distributed_pytorch" ]; then
+            DOCKER_IMAGE="${{ inputs.base_image_multi_node }}"
+          elif [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
+            DOCKER_IMAGE="${{ inputs.base_image_rccl }}"
+          fi
+          
+          echo "=== Test Configuration ==="
+          echo "TEST_NAME: $TEST_NAME"
+          echo "TESTBED_FILE: $TESTBED_FILE"
+          echo "DOCKER_IMAGE: $DOCKER_IMAGE"
+          echo "NO_INSTALL: $NO_INSTALL"
+          echo "NO_UNINSTALL: $NO_UNINSTALL"
+          echo "=========================="
+          
+          # Run RCCL test differently (pytest directly)
+          if [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
+            if [ -n "$DOCKER_IMAGE" ]; then
+              DOCKER_IMAGE_VERSION=$(echo "$DOCKER_IMAGE" | sed 's/.*://')
+              export DOCKER_IMAGE_VERSION
+              echo "Using RCCL Docker image version: $DOCKER_IMAGE_VERSION"
+            fi
+            
+            export PYTHONPATH=$(pwd):$PYTHONPATH
+            cd testsuites
+            python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE" -k test_multi_node_rccl --no-install --no-uninstall
+          else
+            python3 run_test.py "$TEST_NAME" "$DOCKER_IMAGE" "$NO_INSTALL" "$NO_UNINSTALL" "$TESTBED_FILE"
+          fi
+      
+      - name: Upload test results
+        if: |
+          ${{
+            always() && (
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
+            )
+          }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-results-${{ matrix.test_name }}-${{ github.run_number }}
+          path: tests/enroot/results/
+          if-no-files-found: warn
+          retention-days: 30
+
+# =============================================================================
+# ACT COMMANDS TO RUN THIS WORKFLOW LOCALLY
+# =============================================================================
+#
+# Prerequisites:
+#   1. Install act: https://github.com/nektos/act
+#      - macOS: brew install act
+#      - Linux: curl -s https://raw.githubusercontent.com/nektos/act/master/install.sh | sudo bash
+#
+#   2. Create a secrets file at .secrets (in repo root):
+#      SINGLE_NODE_TESTBED_FILE=/path/to/your/single_node_testbed.yml
+#      MULTI_NODE_TESTBED_FILE=/path/to/your/multi_node_testbed.yml
+#
+# Run all three tests with defaults:
+#   act workflow_dispatch \
+#     -W .github/workflows/enroot-tests-local.yml \
+#     --secret-file .secrets \
+#     --input run_single_node_test=true \
+#     --input run_multi_node_test=true \
+#     --input run_rccl_test=true \
+#     --input no_install=false \
+#     --input no_uninstall=false
+#
+# Run only single-node test:
+#   act workflow_dispatch \
+#     -W .github/workflows/enroot-tests-local.yml \
+#     --secret-file .secrets \
+#     --input run_single_node_test=true \
+#     --input run_multi_node_test=false \
+#     --input run_rccl_test=false
+#
+# Run with custom testbed file:
+#   act workflow_dispatch \
+#     -W .github/workflows/enroot-tests-local.yml \
+#     --secret-file .secrets \
+#     --input run_single_node_test=true \
+#     --input run_multi_node_test=true \
+#     --input run_rccl_test=true \
+#     --input testbed_file=/path/to/testbed.yml
+#
+# Run with custom Docker images:
+#   act workflow_dispatch \
+#     -W .github/workflows/enroot-tests-local.yml \
+#     --secret-file .secrets \
+#     --input run_single_node_test=true \
+#     --input run_multi_node_test=true \
+#     --input run_rccl_test=true \
+#     --input base_image_single_node=rocm/pytorch:latest \
+#     --input base_image_multi_node=docker://rocm/pytorch:rocm7.0.2_ubuntu22.04_py3.10_pytorch_release_2.7.1 \
+#     --input base_image_rccl=docker://rocm/roce-workload:ubuntu24_rocm-7.0.2_rccl-7.0.2_anp-v1.2.0_ainic-1.117.5-a-56
+#
+# Run with --no-install and --no-uninstall flags:
+#   act workflow_dispatch \
+#     -W .github/workflows/enroot-tests-local.yml \
+#     --secret-file .secrets \
+#     --input run_single_node_test=true \
+#     --input run_multi_node_test=true \
+#     --input run_rccl_test=true \
+#     --input no_install=true \
+#     --input no_uninstall=true
+#
+# Additional act options:
+#   -v                    # Verbose output
+#   --container-architecture linux/amd64  # Specify architecture
+#   -P ubuntu-latest=catthehacker/ubuntu:act-latest  # Use different runner image
+#   --bind                # Bind working directory instead of copy
+#   -n                    # Dry run (don't actually run)
+#
+# =============================================================================
diff --git a/.github/workflows/enroot-tests.yml b/.github/workflows/enroot-tests.yml
index f620c33..57c0630 100644
--- a/.github/workflows/enroot-tests.yml
+++ b/.github/workflows/enroot-tests.yml
@@ -72,9 +72,9 @@ jobs:
           ${{
             github.event_name == 'push' ||
             (github.event_name == 'workflow_dispatch' && (
-              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
-              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
-              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
             ))
           }}
       
@@ -84,9 +84,9 @@ jobs:
           ${{
             github.event_name == 'push' ||
             (github.event_name == 'workflow_dispatch' && (
-              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
-              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
-              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
             ))
           }}
         with:
@@ -97,9 +97,9 @@ jobs:
           ${{
             github.event_name == 'push' ||
             (github.event_name == 'workflow_dispatch' && (
-              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
-              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
-              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
             ))
           }}
         run: |
@@ -111,9 +111,9 @@ jobs:
           ${{
             github.event_name == 'push' ||
             (github.event_name == 'workflow_dispatch' && (
-              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
-              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
-              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
             ))
           }}
         working-directory: tests/enroot
@@ -183,9 +183,9 @@ jobs:
             always() && (
               github.event_name == 'push' ||
               (github.event_name == 'workflow_dispatch' && (
-                (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test) ||
-                (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test) ||
-                (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test)
+                (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+                (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+                (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
               ))
             )
           }}

From ea2103b2895c408bbe980d3db7fc96451f215be2 Mon Sep 17 00:00:00 2001
From: kithumma <kiran.thumma@amd.com>
Date: Thu, 22 Jan 2026 05:14:24 +0000
Subject: [PATCH 5/5] update workflow

---
 .github/workflows/enroot-tests.yml | 66 ++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/enroot-tests.yml b/.github/workflows/enroot-tests.yml
index 57c0630..aac5746 100644
--- a/.github/workflows/enroot-tests.yml
+++ b/.github/workflows/enroot-tests.yml
@@ -47,7 +47,7 @@ on:
         type: boolean
         default: false
       testbed_file:
-        description: 'Path to testbed file (e.g. tests/enroot/testbeds/mi325.yaml) - defaults to secrets.TESTBED_FILE'
+        description: 'Path to testbed file (overrides secret-based testbed). If not provided, uses SINGLE_NODE_TESTBED_FILE or MULTI_NODE_TESTBED_FILE secrets (which should contain YAML content).'
         required: false
         type: string
         default: ''
@@ -106,6 +106,44 @@ jobs:
           python3 -m pip install --upgrade pip
           pip install -r tests/enroot/requirements.txt
       
+      - name: Create testbed file from secret
+        if: |
+          ${{
+            github.event_name == 'push' ||
+            (github.event_name == 'workflow_dispatch' && (
+              (matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
+              (matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
+            ))
+          }}
+        working-directory: tests/enroot
+        env:
+          SINGLE_NODE_TESTBED: ${{ secrets.SINGLE_NODE_TESTBED_FILE }}
+          MULTI_NODE_TESTBED: ${{ secrets.MULTI_NODE_TESTBED_FILE }}
+        run: |
+          # Create testbed files from secrets (secrets contain YAML content)
+          mkdir -p testbed
+          
+          # Write single-node testbed if secret exists
+          if [ -n "$SINGLE_NODE_TESTBED" ]; then
+            printf '%s\n' "$SINGLE_NODE_TESTBED" > testbed/single_node_tb.yml
+            echo "Created testbed/single_node_tb.yml from secret"
+          else
+            echo "[WARNING] SINGLE_NODE_TESTBED_FILE secret is not set"
+          fi
+          
+          # Write multi-node testbed if secret exists  
+          if [ -n "$MULTI_NODE_TESTBED" ]; then
+            printf '%s\n' "$MULTI_NODE_TESTBED" > testbed/multi_node_tb.yml
+            echo "Created testbed/multi_node_tb.yml from secret"
+          else
+            echo "[WARNING] MULTI_NODE_TESTBED_FILE secret is not set"
+          fi
+          
+          # List created testbed files for debugging
+          echo "Testbed files created:"
+          ls -la testbed/ || echo "No testbed directory"
+      
       - name: Run enroot tests
         if: |
           ${{
@@ -123,11 +161,11 @@ jobs:
           
           # Determine testbed file and docker image based on test type and event
           if [ "${{ github.event_name }}" = "push" ]; then
-            # For push events: use test-type-specific secrets and default images from batch scripts
+            # For push events: use test-type-specific testbed files and default images from batch scripts
             if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
-              TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
+              TESTBED_FILE="testbed/single_node_tb.yml"
             else
-              TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
+              TESTBED_FILE="testbed/multi_node_tb.yml"
             fi
             DOCKER_IMAGE=""
             NO_INSTALL="false"
@@ -138,9 +176,9 @@ jobs:
               TESTBED_FILE="${{ inputs.testbed_file }}"
             else
               if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
-                TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
+                TESTBED_FILE="testbed/single_node_tb.yml"
               else
-                TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
+                TESTBED_FILE="testbed/multi_node_tb.yml"
               fi
             fi
             NO_INSTALL="${{ inputs.no_install }}"
@@ -156,6 +194,17 @@ jobs:
             fi
           fi
           
+          # Validate testbed file exists
+          if [ ! -f "$TESTBED_FILE" ]; then
+            echo "[ERROR] Testbed file not found: $TESTBED_FILE"
+            echo "Please ensure the appropriate secret is set:"
+            echo "  - SINGLE_NODE_TESTBED_FILE for single-node tests"
+            echo "  - MULTI_NODE_TESTBED_FILE for multi-node tests"
+            echo "Or provide a custom testbed_file input via workflow_dispatch."
+            exit 1
+          fi
+          echo "Using testbed file: $TESTBED_FILE"
+          
           # Run RCCL test differently (pytest directly)
           if [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
             # For RCCL test: extract version tag from docker image if provided
@@ -168,10 +217,13 @@ jobs:
               echo "Using RCCL Docker image version: $DOCKER_IMAGE_VERSION"
             fi
             
+            # Convert testbed file to absolute path before changing directory
+            TESTBED_FILE_ABS="$(pwd)/$TESTBED_FILE"
+            
             # Set PYTHONPATH and cd to testsuites directory for pytest
             export PYTHONPATH=$(pwd):$PYTHONPATH
             cd testsuites
-            python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE" -k test_multi_node_rccl --no-install --no-uninstall
+            python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE_ABS" -k test_multi_node_rccl --no-install --no-uninstall
           else
             # For other tests: use run_test.py
             python3 run_test.py "$TEST_NAME" "$DOCKER_IMAGE" "$NO_INSTALL" "$NO_UNINSTALL" "$TESTBED_FILE"