From a642b152b89386462e3f4cca7bc56660ce8c89c9 Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Wed, 4 Feb 2026 10:20:19 +0100 Subject: [PATCH 1/4] Add TNA arbiter topology support to kcli deployment + general improvements --- deploy/Makefile | 32 ++++- deploy/README.md | 4 +- deploy/aws-hypervisor/scripts/create.sh | 45 ++++++- deploy/openshift-clusters/README-kcli.md | 25 +++- deploy/openshift-clusters/kcli-install.yml | 2 +- .../files/config_arbiter_example.sh | 3 +- .../files/config_fencing_example.sh | 6 +- .../roles/kcli/kcli-install/README.md | 13 +- .../kcli/kcli-install/tasks/validate.yml | 7 + .../kcli-install/templates/kcli-params.yml.j2 | 16 ++- .../roles/kcli/kcli-install/vars/main.yml | 4 +- .../scripts/deploy-arbiter-agent.sh | 49 ------- .../scripts/deploy-arbiter-ipi.sh | 48 ------- .../scripts/deploy-cluster.sh | 125 ++++++++++++++++++ .../scripts/deploy-fencing-agent.sh | 48 ------- .../scripts/deploy-fencing-ipi.sh | 48 ------- .../openshift-clusters/vars/kcli.yml.template | 9 +- 17 files changed, 262 insertions(+), 222 deletions(-) delete mode 100755 deploy/openshift-clusters/scripts/deploy-arbiter-agent.sh delete mode 100755 deploy/openshift-clusters/scripts/deploy-arbiter-ipi.sh create mode 100755 deploy/openshift-clusters/scripts/deploy-cluster.sh delete mode 100755 deploy/openshift-clusters/scripts/deploy-fencing-agent.sh delete mode 100755 deploy/openshift-clusters/scripts/deploy-fencing-ipi.sh diff --git a/deploy/Makefile b/deploy/Makefile index a6fd992..548bb4c 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -1,3 +1,18 @@ +# Valid cluster types for 'make deploy ' +VALID_CLUSTER_TYPES := fencing-ipi fencing-agent arbiter-ipi arbiter-agent arbiter-kcli fencing-kcli + +# Handle 'make deploy ' pattern +# When 'deploy' is first, validate any following arguments are valid cluster types +ifeq (deploy,$(firstword $(MAKECMDGOALS))) + DEPLOY_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS)) + ifneq ($(DEPLOY_ARGS),) + INVALID_ARGS := $(filter-out $(VALID_CLUSTER_TYPES),$(DEPLOY_ARGS)) + ifneq ($(INVALID_ARGS),) + $(error Unknown cluster type: '$(INVALID_ARGS)'. Run 'make help' for more information.) + endif + endif +endif + create: @./aws-hypervisor/scripts/create.sh @@ -44,16 +59,22 @@ inventory: @./aws-hypervisor/scripts/inventory.sh fencing-ipi: - @./openshift-clusters/scripts/deploy-fencing-ipi.sh + @./openshift-clusters/scripts/deploy-cluster.sh --topology fencing --method ipi fencing-agent: - @./openshift-clusters/scripts/deploy-fencing-agent.sh + @./openshift-clusters/scripts/deploy-cluster.sh --topology fencing --method agent arbiter-ipi: - @./openshift-clusters/scripts/deploy-arbiter-ipi.sh + @./openshift-clusters/scripts/deploy-cluster.sh --topology arbiter --method ipi arbiter-agent: - @./openshift-clusters/scripts/deploy-arbiter-agent.sh + @./openshift-clusters/scripts/deploy-cluster.sh --topology arbiter --method agent + +arbiter-kcli: + @./openshift-clusters/scripts/deploy-cluster.sh --topology arbiter --method kcli + +fencing-kcli: + @./openshift-clusters/scripts/deploy-cluster.sh --topology fencing --method kcli patch-nodes: @./openshift-clusters/scripts/patch-nodes.sh @@ -65,6 +86,7 @@ help: @echo "" @echo "Instance Lifecycle Management:" @echo " deploy - Create, initialize, and update inventory for new EC2 instance" + @echo " deploy - Deploy EC2 instance AND deploy the specified cluster type" @echo " create - Create new EC2 instance" @echo " init - Initialize deployed instance" @echo " start - Start stopped EC2 instance" @@ -82,6 +104,8 @@ help: @echo " fencing-agent - Deploy fencing Agent cluster (non-interactive) (WIP Experimental)" @echo " arbiter-ipi - Deploy arbiter IPI cluster (non-interactive)" @echo " arbiter-agent - Deploy arbiter Agent cluster (non-interactive)" + @echo " arbiter-kcli - Deploy arbiter cluster using kcli (non-interactive)" + @echo " fencing-kcli - Deploy fencing cluster using kcli (non-interactive)" @echo " redeploy-cluster - Redeploy OpenShift cluster using dev-scripts make redeploy" @echo " shutdown-cluster - Shutdown OpenShift cluster VMs in orderly fashion" @echo " startup-cluster - Start up OpenShift cluster VMs and proxy container" diff --git a/deploy/README.md b/deploy/README.md index f910805..bc915de 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -111,7 +111,7 @@ $ make destroy When running OpenShift clusters on the instance (using dev-scripts), you have several options for managing cluster lifecycle: **Quick deployment commands:** -- `make fencing-ipi`, `make arbiter-ipi`, and `make arbiter-agent` provide non-interactive deployment for specific topologies +- `make fencing-ipi`, `make arbiter-ipi`, `make arbiter-agent`, `make fencing-kcli`, `make arbiter-kcli` provide non-interactive deployment for specific topologies - These commands automatically call the underlying setup.yml playbook with the appropriate configuration - Useful for automation and when you know exactly which topology you want to deploy @@ -169,8 +169,10 @@ $ make start # Quick deployment over clean server $ make fencing-ipi # Deploy fencing topology (IPI method) $ make fencing-agent # Deploy fencing topology (Agent method) (WIP Experimental) +$ make fencing-kcli # Deploy fencing topology (kcli method) $ make arbiter-ipi # Deploy arbiter topology (IPI method) $ make arbiter-agent # Deploy arbiter topology (Agent method) +$ make arbiter-kcli # Deploy arbiter topology (kcli method) ``` #### Option 4: Graceful Cluster Shutdown/Startup (Not recommended due to speed and consistency) diff --git a/deploy/aws-hypervisor/scripts/create.sh b/deploy/aws-hypervisor/scripts/create.sh index 99d5737..c32cd10 100755 --- a/deploy/aws-hypervisor/scripts/create.sh +++ b/deploy/aws-hypervisor/scripts/create.sh @@ -36,6 +36,34 @@ if [[ -z "${RHEL_HOST_AMI}" ]]; then exit 1 fi +# Check if stack already exists +if aws --region "$REGION" cloudformation describe-stacks --stack-name "${STACK_NAME}" &>/dev/null; then + echo "" + echo "WARNING: CloudFormation stack '${STACK_NAME}' already exists." + echo "" + echo "Options:" + echo " 1) Create new stack with random suffix (${STACK_NAME}-XXXX)" + echo " 2) Abort" + echo "" + read -r -p "Choose an option [1/2]: " choice + + case "$choice" in + 1) + RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 4) + STACK_NAME="${STACK_NAME}-${RANDOM_SUFFIX}" + echo "Using new stack name: ${STACK_NAME}" + ;; + 2) + echo "Aborted." + exit 0 + ;; + *) + echo "Invalid option. Aborted." + exit 1 + ;; + esac +fi + echo "ec2-user" > "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" echo -e "AMI ID: $RHEL_HOST_AMI" @@ -388,11 +416,22 @@ echo "${HOST_PRIVATE_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/private_address" echo "Waiting up to 10 mins for RHEL host to be up." timeout 10m aws ec2 wait instance-status-ok --instance-id "${INSTANCE_ID}" --no-cli-pager -sleep 15 - # Add the host key to known_hosts to avoid prompts while maintaining security echo "Adding host key for $HOST_PUBLIC_IP to known_hosts..." -ssh-keyscan -H "$HOST_PUBLIC_IP" >> ~/.ssh/known_hosts 2>/dev/null +max_attempts=5 +retry_delay=5 +for ((attempt=1; attempt<=max_attempts; attempt++)); do + if ssh-keyscan -H "$HOST_PUBLIC_IP" >> ~/.ssh/known_hosts 2>/dev/null; then + echo "Host key added successfully" + break + fi + if ((attempt < max_attempts)); then + echo "SSH not ready (attempt $attempt/$max_attempts), retrying in ${retry_delay}s..." + sleep "$retry_delay" + else + echo "Warning: Could not retrieve host key after $max_attempts attempts" + fi +done echo "updating sshconfig for aws-hypervisor" (cd "${SCRIPT_DIR}/.." && go run main.go -k aws-hypervisor -h "$HOST_PUBLIC_IP") diff --git a/deploy/openshift-clusters/README-kcli.md b/deploy/openshift-clusters/README-kcli.md index 10cb1a5..1208441 100644 --- a/deploy/openshift-clusters/README-kcli.md +++ b/deploy/openshift-clusters/README-kcli.md @@ -4,7 +4,7 @@ This guide covers deploying OpenShift two-node clusters using the kcli virtualiz ## Overview -The kcli deployment method automates OpenShift two-node cluster creation using **fencing topology** by default. Arbiter topology support will be available for future releases. +The kcli deployment method automates OpenShift two-node cluster creation supporting both **fencing** and **arbiter** topologies. Fencing topology is the default. ## 1. Machine Requirements @@ -138,12 +138,22 @@ ansible-playbook kcli-install.yml \ **Fencing Topology:** ```yaml topology: "fencing" +platform: "baremetal" bmc_user: "admin" bmc_password: "admin123" -bmc_driver: "redfish" +bmc_driver: "redfish" ksushy_port: 9000 ``` +**Arbiter Topology (TNA):** +```yaml +topology: "arbiter" +platform: "none" +arbiter_memory: 8192 +arbiter_numcpus: 2 +arbiter_disk_size: 30 +``` + ## 5. Deployment The deployment uses a **fencing topology** by default and runs non-interactively for consistent automation: @@ -163,6 +173,17 @@ ansible-playbook kcli-install.yml -i inventory.ini ansible-playbook kcli-install.yml -i inventory.ini \ -e "test_cluster_name=prod-edge-cluster" +# Deploy arbiter cluster +ansible-playbook kcli-install.yml -i inventory.ini \ + -e "topology=arbiter" \ + -e "interactive_mode=false" + +# Deploy arbiter cluster with custom configuration +ansible-playbook kcli-install.yml -i inventory.ini \ + -e "topology=arbiter" \ + -e "platform=none" \ + -e "arbiter_memory=16384" \ + -e "interactive_mode=false" ``` To redeploy a cluster, check the [redeployment](#9-redeployment) section diff --git a/deploy/openshift-clusters/kcli-install.yml b/deploy/openshift-clusters/kcli-install.yml index f5ca7da..9af9ec1 100644 --- a/deploy/openshift-clusters/kcli-install.yml +++ b/deploy/openshift-clusters/kcli-install.yml @@ -18,7 +18,7 @@ # Set interactive_mode: true to enable prompts for manual execution interactive_mode: false - # Default topology is fencing (can be overridden to 'arbiter' if needed, not supported at the moment) + # Default topology is fencing (can be overridden to 'arbiter' for TNA clusters) # This default ensures no prompt is triggered when interactive_mode is false topology: fencing diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh index 172cdf7..3224f98 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh @@ -17,6 +17,7 @@ export NUM_MASTERS=2 ## BEGIN Agent Specific Install Config Variables export AGENT_E2E_TEST_SCENARIO="TNA_IPV4" +#export AGENT_PLATFORM_TYPE=none ## END Agent Specific Install Config Variables #### @@ -31,6 +32,6 @@ export OPENSHIFT_CI="true" # You can find the latest public images in https://quay.io/repository/openshift-release-dev/ocp-release?tab=tags # and select your preferred version. Public sources can be found at https://mirror.openshift.com/pub/openshift-v4/ -export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.20.0-ec.4-x86_64 +export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.21.0-x86_64 # Unless you need to override the installer image, this is not needed # export OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE="" diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh index dfdde43..5f951da 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh @@ -20,10 +20,6 @@ export CI_TOKEN="sha256~" # You can find the latest public images in https://quay.io/repository/openshift-release-dev/ocp-release?tab=tags # and select your preferred version. Public sources can be found at https://mirror.openshift.com/pub/openshift-v4/ -export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.19.5-multi +export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.21.0-multi # Unless you need to override the installer image, this is not needed # export OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE="" - - - - diff --git a/deploy/openshift-clusters/roles/kcli/kcli-install/README.md b/deploy/openshift-clusters/roles/kcli/kcli-install/README.md index e8c06b8..5c4a9cd 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-install/README.md +++ b/deploy/openshift-clusters/roles/kcli/kcli-install/README.md @@ -16,7 +16,7 @@ But adds comprehensive validation and error checking. **Consistent with install-dev role**: This role follows the same patterns as the existing `install-dev` role, using identical variable names (`test_cluster_name`, `topology`) and state management for seamless integration. Key features: -- Automated two-node OpenShift deployment with fencing or arbiter (future release) +- Automated two-node OpenShift deployment with fencing or arbiter topology - Configurable VM specifications and networking - Integration with kcli's BMC/Redfish use of libvirt and sushytools for fencing (ksushy) - Support for both interactive and non-interactive deployment @@ -65,8 +65,8 @@ This role follows the same authentication file conventions as the dev-scripts ro ### Cluster Configuration - `topology`: Deployment topology (required) - - "fencing": Two-node cluster with automatic fencing - - "arbiter": Two-node cluster with arbiter node (not supported yet) + - "fencing": Two-node cluster with automatic fencing + - "arbiter": Two-node cluster with arbiter node (TNA) - `ctlplanes`: Number of control plane nodes (default: 2, required for two-node) - `workers`: Number of worker nodes (default: 0 for two-node configuration) - `cluster_network_type`: OpenShift network type (default: "OVNKubernetes") @@ -109,8 +109,11 @@ If you're installing a specific openshift release image, you will need to set th ### Arbiter Configuration (when topology="arbiter") -- `enable_arbiter`: Automatically set to "true" for arbiter topology -- `arbiter_memory`: Memory for arbiter node in MB (default: 16384) +- `platform`: Platform type - "none" or "baremetal" (required for arbiter) +- `arbiters`: Number of arbiter nodes (default: 1) +- `arbiter_memory`: Memory for arbiter node in MB (default: 8192) +- `arbiter_numcpus`: CPU cores for arbiter node (default: 2) +- `arbiter_disk_size`: Disk size for arbiter node in GB (default: 30) ### Deployment Options diff --git a/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml b/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml index 1166602..8210c1c 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml +++ b/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml @@ -11,6 +11,13 @@ msg: "topology must be either 'fencing' or 'arbiter'" when: topology is not defined or topology not in ['fencing', 'arbiter'] +- name: Validate platform value for arbiter topology + fail: + msg: "platform must be 'none' or 'baremetal' for arbiter topology (got '{{ platform }}')" + when: + - topology == 'arbiter' + - platform not in ['none', 'baremetal'] + - name: Validate domain fail: msg: "domain must be defined and not empty" diff --git a/deploy/openshift-clusters/roles/kcli/kcli-install/templates/kcli-params.yml.j2 b/deploy/openshift-clusters/roles/kcli/kcli-install/templates/kcli-params.yml.j2 index 73f7584..34d3023 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-install/templates/kcli-params.yml.j2 +++ b/deploy/openshift-clusters/roles/kcli/kcli-install/templates/kcli-params.yml.j2 @@ -7,6 +7,9 @@ domain: {{ domain }} topology: {{ topology }} network_type: {{ cluster_network_type }} +# Platform configuration +platform: {{ platform }} + # OpenShift version version: {{ ocp_version }} tag: '{{ ocp_tag }}' @@ -14,7 +17,7 @@ tag: '{{ ocp_tag }}' openshift_release_image: {{ openshift_release_image }} {% endif %} -# Two-node cluster configuration with fencing +# Two-node cluster configuration ctlplanes: {{ ctlplanes }} workers: {{ workers }} @@ -37,16 +40,21 @@ pull_secret: {{ ansible_user_dir }}/pull-secret.json keys: - {{ ansible_user_dir }}/.ssh/id_ed25519.pub -# BMC/Redfish configuration +{% if topology == "fencing" %} +# BMC/Redfish configuration (fencing topology only) bmc_user: {{ bmc_user }} bmc_password: {{ bmc_password }} bmc_driver: {{ bmc_driver }} ksushy_ip: {{ ksushy_ip }} ksushy_port: {{ ksushy_port }} +{% endif %} -# Topology-specific configuration {% if topology == "arbiter" %} -enable_arbiter: {{ enable_arbiter }} +# Arbiter node configuration (TNA topology) +arbiters: {{ arbiters }} +arbiter_numcpus: {{ arbiter_numcpus }} +arbiter_memory: {{ arbiter_memory }} +arbiter_disk_size: {{ arbiter_disk_size }} {% endif %} # Deployment settings diff --git a/deploy/openshift-clusters/roles/kcli/kcli-install/vars/main.yml b/deploy/openshift-clusters/roles/kcli/kcli-install/vars/main.yml index 993f7c0..6f4a61c 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-install/vars/main.yml +++ b/deploy/openshift-clusters/roles/kcli/kcli-install/vars/main.yml @@ -13,4 +13,6 @@ kubeadmin_password_path: "{{ ansible_user_dir }}/.kcli/clusters/{{ test_cluster_ # Topology-based computed values feature_set: "{{ 'TechPreviewNoUpgrade' if topology == 'arbiter' else 'DevPreviewNoUpgrade' }}" -enable_arbiter: "{{ 'true' if topology == 'arbiter' else 'false' }}" \ No newline at end of file + +# Arbiter count - default to 1 for arbiter topology, else 0 +arbiters: "{{ 1 if topology == 'arbiter' else 0 }}" \ No newline at end of file diff --git a/deploy/openshift-clusters/scripts/deploy-arbiter-agent.sh b/deploy/openshift-clusters/scripts/deploy-arbiter-agent.sh deleted file mode 100755 index 65b8474..0000000 --- a/deploy/openshift-clusters/scripts/deploy-arbiter-agent.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -# Get the directory where this script is located -SCRIPT_DIR=$(dirname "$0") -# Get the deploy directory (two levels up from scripts) -DEPLOY_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -set -o nounset -set -o errexit -set -o pipefail - -# Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then - echo "Error: No instance found. Please run 'make deploy' first." - exit 1 -fi - -echo "Deploying arbiter agent cluster..." - -# Check if inventory.ini exists in the openshift-clusters directory -if [[ ! -f "${DEPLOY_DIR}/openshift-clusters/inventory.ini" ]]; then - echo "Error: inventory.ini not found in ${DEPLOY_DIR}/openshift-clusters/" - echo "Please ensure the inventory file is properly configured." - echo "You can run 'make inventory' to update it with current instance information." - exit 1 -fi - -# Navigate to the openshift-clusters directory and run the setup playbook -echo "Running Ansible setup playbook with arbiter topology in non-interactive mode..." -cd "${DEPLOY_DIR}/openshift-clusters" - -# Run the setup playbook with arbiter topology and non-interactive mode - -if ansible-playbook setup.yml -e "topology=arbiter" -e "interactive_mode=false" -e "method=agent" -i inventory.ini; -then - echo "" - echo "✓ OpenShift arbiter cluster deployment completed successfully!" - echo "" - echo "Next steps:" - echo "1. Source the proxy environment from anywhere:" - echo " source ${DEPLOY_DIR}/openshift-clusters/proxy.env" - echo " (or from openshift-clusters directory: source proxy.env)" - echo "2. Verify cluster access: oc get nodes" - echo "3. Access the cluster console if needed" -else - echo "Error: OpenShift cluster deployment failed!" - echo "Check the Ansible logs for more details." - exit 1 -fi \ No newline at end of file diff --git a/deploy/openshift-clusters/scripts/deploy-arbiter-ipi.sh b/deploy/openshift-clusters/scripts/deploy-arbiter-ipi.sh deleted file mode 100755 index 50b9fe2..0000000 --- a/deploy/openshift-clusters/scripts/deploy-arbiter-ipi.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# Get the directory where this script is located -SCRIPT_DIR=$(dirname "$0") -# Get the deploy directory (two levels up from scripts) -DEPLOY_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -set -o nounset -set -o errexit -set -o pipefail - -# Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then - echo "Error: No instance found. Please run 'make deploy' first." - exit 1 -fi - -echo "Deploying arbiter IPI cluster..." - -# Check if inventory.ini exists in the openshift-clusters directory -if [[ ! -f "${DEPLOY_DIR}/openshift-clusters/inventory.ini" ]]; then - echo "Error: inventory.ini not found in ${DEPLOY_DIR}/openshift-clusters/" - echo "Please ensure the inventory file is properly configured." - echo "You can run 'make inventory' to update it with current instance information." - exit 1 -fi - -# Navigate to the openshift-clusters directory and run the setup playbook -echo "Running Ansible setup playbook with arbiter topology in non-interactive mode..." -cd "${DEPLOY_DIR}/openshift-clusters" - -# Run the setup playbook with arbiter topology and non-interactive mode -if ansible-playbook setup.yml -e "topology=arbiter" -e "interactive_mode=false" -i inventory.ini; -then - echo "" - echo "✓ OpenShift arbiter cluster deployment completed successfully!" - echo "" - echo "Next steps:" - echo "1. Source the proxy environment from anywhere:" - echo " source ${DEPLOY_DIR}/openshift-clusters/proxy.env" - echo " (or from openshift-clusters directory: source proxy.env)" - echo "2. Verify cluster access: oc get nodes" - echo "3. Access the cluster console if needed" -else - echo "Error: OpenShift cluster deployment failed!" - echo "Check the Ansible logs for more details." - exit 1 -fi \ No newline at end of file diff --git a/deploy/openshift-clusters/scripts/deploy-cluster.sh b/deploy/openshift-clusters/scripts/deploy-cluster.sh new file mode 100755 index 0000000..e516253 --- /dev/null +++ b/deploy/openshift-clusters/scripts/deploy-cluster.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# +# Unified cluster deployment script +# Usage: deploy-cluster.sh --topology --method +# + +# Get the directory where this script is located +SCRIPT_DIR=$(dirname "$0") +# Get the deploy directory (two levels up from scripts) +DEPLOY_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +set -o nounset +set -o errexit +set -o pipefail + +# Default values +TOPOLOGY="" +METHOD="" + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --topology) + TOPOLOGY="$2" + shift 2 + ;; + --method) + METHOD="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 --topology --method " + echo "" + echo "Options:" + echo " --topology Cluster topology: arbiter or fencing" + echo " --method Deployment method: ipi, agent, or kcli" + exit 0 + ;; + *) + echo "Error: Unknown option: $1" + echo "Run '$0 --help' for usage information." + exit 1 + ;; + esac +done + +# Validate required arguments +if [[ -z "${TOPOLOGY}" ]]; then + echo "Error: --topology is required (arbiter or fencing)" + exit 1 +fi + +if [[ -z "${METHOD}" ]]; then + echo "Error: --method is required (ipi, agent, or kcli)" + exit 1 +fi + +# Validate topology value +if [[ "${TOPOLOGY}" != "arbiter" && "${TOPOLOGY}" != "fencing" ]]; then + echo "Error: Invalid topology '${TOPOLOGY}'. Must be 'arbiter' or 'fencing'." + exit 1 +fi + +# Validate method value +if [[ "${METHOD}" != "ipi" && "${METHOD}" != "agent" && "${METHOD}" != "kcli" ]]; then + echo "Error: Invalid method '${METHOD}'. Must be 'ipi', 'agent', or 'kcli'." + exit 1 +fi + +# Check if instance data exists +if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then + echo "Error: No instance found. Please run 'make deploy' first." + exit 1 +fi + +# Check if inventory.ini exists in the openshift-clusters directory +if [[ ! -f "${DEPLOY_DIR}/openshift-clusters/inventory.ini" ]]; then + echo "Error: inventory.ini not found in ${DEPLOY_DIR}/openshift-clusters/" + echo "Please ensure the inventory file is properly configured." + echo "You can run 'make inventory' to update it with current instance information." + exit 1 +fi + +# Determine playbook and extra variables based on method +case "${METHOD}" in + ipi) + PLAYBOOK="setup.yml" + EXTRA_VARS="-e topology=${TOPOLOGY} -e interactive_mode=false" + METHOD_DISPLAY="IPI" + ;; + agent) + PLAYBOOK="setup.yml" + EXTRA_VARS="-e topology=${TOPOLOGY} -e interactive_mode=false -e method=agent" + METHOD_DISPLAY="agent" + ;; + kcli) + PLAYBOOK="kcli-install.yml" + EXTRA_VARS="-e topology=${TOPOLOGY} -e interactive_mode=false" + METHOD_DISPLAY="kcli" + ;; +esac + +echo "Deploying ${TOPOLOGY} cluster using ${METHOD_DISPLAY} method..." + +# Navigate to the openshift-clusters directory +cd "${DEPLOY_DIR}/openshift-clusters" + +echo "Running Ansible ${PLAYBOOK} playbook with ${TOPOLOGY} topology in non-interactive mode..." + +# Run the playbook +if ansible-playbook "${PLAYBOOK}" ${EXTRA_VARS} -i inventory.ini; then + echo "" + echo "OpenShift ${TOPOLOGY} cluster deployment (${METHOD_DISPLAY}) completed successfully!" + echo "" + echo "Next steps:" + echo "1. Source the proxy environment from anywhere:" + echo " source ${DEPLOY_DIR}/openshift-clusters/proxy.env" + echo " (or from openshift-clusters directory: source proxy.env)" + echo "2. Verify cluster access: oc get nodes" + echo "3. Access the cluster console if needed" +else + echo "Error: OpenShift cluster deployment failed!" + echo "Check the Ansible logs for more details." + exit 1 +fi diff --git a/deploy/openshift-clusters/scripts/deploy-fencing-agent.sh b/deploy/openshift-clusters/scripts/deploy-fencing-agent.sh deleted file mode 100755 index bb60257..0000000 --- a/deploy/openshift-clusters/scripts/deploy-fencing-agent.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# Get the directory where this script is located -SCRIPT_DIR=$(dirname "$0") -# Get the deploy directory (two levels up from scripts) -DEPLOY_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -set -o nounset -set -o errexit -set -o pipefail - -# Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then - echo "Error: No instance found. Please run 'make deploy' first." - exit 1 -fi - -echo "Deploying fencing agent cluster..." - -# Check if inventory.ini exists in the openshift-clusters directory -if [[ ! -f "${DEPLOY_DIR}/openshift-clusters/inventory.ini" ]]; then - echo "Error: inventory.ini not found in ${DEPLOY_DIR}/openshift-clusters/" - echo "Please ensure the inventory file is properly configured." - echo "You can run 'make inventory' to update it with current instance information." - exit 1 -fi - -# Navigate to the openshift-clusters directory and run the setup playbook -echo "Running Ansible setup playbook with fencing topology in non-interactive mode..." -cd "${DEPLOY_DIR}/openshift-clusters" - -# Run the setup playbook with fencing topology and non-interactive mode -if ansible-playbook setup.yml -e "topology=fencing" -e "interactive_mode=false" -e "method=agent" -i inventory.ini; -then - echo "" - echo "✓ OpenShift fencing cluster deployment completed successfully!" - echo "" - echo "Next steps:" - echo "1. Source the proxy environment from anywhere:" - echo " source ${DEPLOY_DIR}/openshift-clusters/proxy.env" - echo " (or from openshift-clusters directory: source proxy.env)" - echo "2. Verify cluster access: oc get nodes" - echo "3. Access the cluster console if needed" -else - echo "Error: OpenShift cluster deployment failed!" - echo "Check the Ansible logs for more details." - exit 1 -fi \ No newline at end of file diff --git a/deploy/openshift-clusters/scripts/deploy-fencing-ipi.sh b/deploy/openshift-clusters/scripts/deploy-fencing-ipi.sh deleted file mode 100755 index 967da8c..0000000 --- a/deploy/openshift-clusters/scripts/deploy-fencing-ipi.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# Get the directory where this script is located -SCRIPT_DIR=$(dirname "$0") -# Get the deploy directory (two levels up from scripts) -DEPLOY_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -set -o nounset -set -o errexit -set -o pipefail - -# Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then - echo "Error: No instance found. Please run 'make deploy' first." - exit 1 -fi - -echo "Deploying fencing IPI cluster..." - -# Check if inventory.ini exists in the openshift-clusters directory -if [[ ! -f "${DEPLOY_DIR}/openshift-clusters/inventory.ini" ]]; then - echo "Error: inventory.ini not found in ${DEPLOY_DIR}/openshift-clusters/" - echo "Please ensure the inventory file is properly configured." - echo "You can run 'make inventory' to update it with current instance information." - exit 1 -fi - -# Navigate to the openshift-clusters directory and run the setup playbook -echo "Running Ansible setup playbook with fencing topology in non-interactive mode..." -cd "${DEPLOY_DIR}/openshift-clusters" - -# Run the setup playbook with fencing topology and non-interactive mode -if ansible-playbook setup.yml -e "topology=fencing" -e "interactive_mode=false" -i inventory.ini; -then - echo "" - echo "✓ OpenShift fencing cluster deployment completed successfully!" - echo "" - echo "Next steps:" - echo "1. Source the proxy environment from anywhere:" - echo " source ${DEPLOY_DIR}/openshift-clusters/proxy.env" - echo " (or from openshift-clusters directory: source proxy.env)" - echo "2. Verify cluster access: oc get nodes" - echo "3. Access the cluster console if needed" -else - echo "Error: OpenShift cluster deployment failed!" - echo "Check the Ansible logs for more details." - exit 1 -fi \ No newline at end of file diff --git a/deploy/openshift-clusters/vars/kcli.yml.template b/deploy/openshift-clusters/vars/kcli.yml.template index 6c23d9a..8df07ad 100644 --- a/deploy/openshift-clusters/vars/kcli.yml.template +++ b/deploy/openshift-clusters/vars/kcli.yml.template @@ -38,8 +38,13 @@ bmc_password: admin123 bmc_driver: redfish # "redfish" or "ipmi" ksushy_port: 9000 -# Arbiter-specific configuration (only used when topology == "arbiter") -arbiter_memory: 16384 # Memory for arbiter node +# Platform configuration: "baremetal" or "none" +platform: baremetal + +# Arbiter-specific configuration (only used when topology == "arbiter") +arbiter_memory: 8192 +arbiter_numcpus: 2 +arbiter_disk_size: 30 # State management cluster_state_dir: "../aws-hypervisor/instance-data" From c6297effeada8390335fbaa9e2b2bfb001629b2e Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Wed, 4 Feb 2026 10:57:59 +0100 Subject: [PATCH 2/4] fixed abort return code --- deploy/aws-hypervisor/scripts/create.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/aws-hypervisor/scripts/create.sh b/deploy/aws-hypervisor/scripts/create.sh index c32cd10..37dc935 100755 --- a/deploy/aws-hypervisor/scripts/create.sh +++ b/deploy/aws-hypervisor/scripts/create.sh @@ -55,7 +55,7 @@ if aws --region "$REGION" cloudformation describe-stacks --stack-name "${STACK_N ;; 2) echo "Aborted." - exit 0 + exit 1 ;; *) echo "Invalid option. Aborted." From 544f0e6129b9d2409ea8eab6e9d7cc1261475f4f Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Wed, 4 Feb 2026 12:53:41 +0100 Subject: [PATCH 3/4] minor fixes --- deploy/openshift-clusters/scripts/deploy-cluster.sh | 8 ++++---- deploy/openshift-clusters/vars/kcli.yml.template | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/openshift-clusters/scripts/deploy-cluster.sh b/deploy/openshift-clusters/scripts/deploy-cluster.sh index e516253..72536c1 100755 --- a/deploy/openshift-clusters/scripts/deploy-cluster.sh +++ b/deploy/openshift-clusters/scripts/deploy-cluster.sh @@ -85,17 +85,17 @@ fi case "${METHOD}" in ipi) PLAYBOOK="setup.yml" - EXTRA_VARS="-e topology=${TOPOLOGY} -e interactive_mode=false" + EXTRA_VARS=(-e "topology=${TOPOLOGY}" -e "interactive_mode=false") METHOD_DISPLAY="IPI" ;; agent) PLAYBOOK="setup.yml" - EXTRA_VARS="-e topology=${TOPOLOGY} -e interactive_mode=false -e method=agent" + EXTRA_VARS=(-e "topology=${TOPOLOGY}" -e "interactive_mode=false" -e "method=agent") METHOD_DISPLAY="agent" ;; kcli) PLAYBOOK="kcli-install.yml" - EXTRA_VARS="-e topology=${TOPOLOGY} -e interactive_mode=false" + EXTRA_VARS=(-e "topology=${TOPOLOGY}" -e "interactive_mode=false") METHOD_DISPLAY="kcli" ;; esac @@ -108,7 +108,7 @@ cd "${DEPLOY_DIR}/openshift-clusters" echo "Running Ansible ${PLAYBOOK} playbook with ${TOPOLOGY} topology in non-interactive mode..." # Run the playbook -if ansible-playbook "${PLAYBOOK}" ${EXTRA_VARS} -i inventory.ini; then +if ansible-playbook "${PLAYBOOK}" "${EXTRA_VARS[@]}" -i inventory.ini; then echo "" echo "OpenShift ${TOPOLOGY} cluster deployment (${METHOD_DISPLAY}) completed successfully!" echo "" diff --git a/deploy/openshift-clusters/vars/kcli.yml.template b/deploy/openshift-clusters/vars/kcli.yml.template index 8df07ad..b1b1faf 100644 --- a/deploy/openshift-clusters/vars/kcli.yml.template +++ b/deploy/openshift-clusters/vars/kcli.yml.template @@ -14,8 +14,8 @@ workers: 0 # OpenShift version configuration ocp_version: candidate -ocp_tag: '4.20' -openshift_release_image: "quay.io/openshift-release-dev/ocp-release:4.20.0-ec.6-multi" # Optional: override with specific release image +ocp_tag: '4.21' +openshift_release_image: "quay.io/openshift-release-dev/ocp-release:4.21.0-multi" # Optional: override with specific release image openshift_ci: false # Set to true to avoid using CI_TOKEN (has side effects) # Virtual machine specifications From 9ba2f37d6ea9455b8291fa78da51e7e5b2a4844e Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Thu, 5 Feb 2026 15:33:01 +0100 Subject: [PATCH 4/4] Addressed PR comments --- deploy/Makefile | 5 +++- deploy/README.md | 2 +- deploy/aws-hypervisor/scripts/create.sh | 28 ------------------- .../files/config_arbiter_example.sh | 3 ++ .../roles/kcli/kcli-install/README.md | 2 +- .../kcli/kcli-install/tasks/validate.yml | 2 +- 6 files changed, 10 insertions(+), 32 deletions(-) diff --git a/deploy/Makefile b/deploy/Makefile index 548bb4c..1e8e8ec 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -87,6 +87,7 @@ help: @echo "Instance Lifecycle Management:" @echo " deploy - Create, initialize, and update inventory for new EC2 instance" @echo " deploy - Deploy EC2 instance AND deploy the specified cluster type" + @echo " Valid types: $(VALID_CLUSTER_TYPES)" @echo " create - Create new EC2 instance" @echo " init - Initialize deployed instance" @echo " start - Start stopped EC2 instance" @@ -99,13 +100,15 @@ help: @echo " info - Display instance information" @echo " inventory - Update inventory.ini with current instance IP" @echo "" - @echo "OpenShift Cluster Management:" + @echo "OpenShift Cluster Deployment:" @echo " fencing-ipi - Deploy fencing IPI cluster (non-interactive)" @echo " fencing-agent - Deploy fencing Agent cluster (non-interactive) (WIP Experimental)" @echo " arbiter-ipi - Deploy arbiter IPI cluster (non-interactive)" @echo " arbiter-agent - Deploy arbiter Agent cluster (non-interactive)" @echo " arbiter-kcli - Deploy arbiter cluster using kcli (non-interactive)" @echo " fencing-kcli - Deploy fencing cluster using kcli (non-interactive)" + @echo "" + @echo "OpenShift Cluster Management:" @echo " redeploy-cluster - Redeploy OpenShift cluster using dev-scripts make redeploy" @echo " shutdown-cluster - Shutdown OpenShift cluster VMs in orderly fashion" @echo " startup-cluster - Start up OpenShift cluster VMs and proxy container" diff --git a/deploy/README.md b/deploy/README.md index bc915de..4ac814e 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -111,7 +111,7 @@ $ make destroy When running OpenShift clusters on the instance (using dev-scripts), you have several options for managing cluster lifecycle: **Quick deployment commands:** -- `make fencing-ipi`, `make arbiter-ipi`, `make arbiter-agent`, `make fencing-kcli`, `make arbiter-kcli` provide non-interactive deployment for specific topologies +- `make fencing-ipi`, `make fencing-agent`, `make arbiter-ipi`, `make arbiter-agent`, `make fencing-kcli`, `make arbiter-kcli` provide non-interactive deployment for specific topologies - These commands automatically call the underlying setup.yml playbook with the appropriate configuration - Useful for automation and when you know exactly which topology you want to deploy diff --git a/deploy/aws-hypervisor/scripts/create.sh b/deploy/aws-hypervisor/scripts/create.sh index 37dc935..97bdd5d 100755 --- a/deploy/aws-hypervisor/scripts/create.sh +++ b/deploy/aws-hypervisor/scripts/create.sh @@ -36,34 +36,6 @@ if [[ -z "${RHEL_HOST_AMI}" ]]; then exit 1 fi -# Check if stack already exists -if aws --region "$REGION" cloudformation describe-stacks --stack-name "${STACK_NAME}" &>/dev/null; then - echo "" - echo "WARNING: CloudFormation stack '${STACK_NAME}' already exists." - echo "" - echo "Options:" - echo " 1) Create new stack with random suffix (${STACK_NAME}-XXXX)" - echo " 2) Abort" - echo "" - read -r -p "Choose an option [1/2]: " choice - - case "$choice" in - 1) - RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 4) - STACK_NAME="${STACK_NAME}-${RANDOM_SUFFIX}" - echo "Using new stack name: ${STACK_NAME}" - ;; - 2) - echo "Aborted." - exit 1 - ;; - *) - echo "Invalid option. Aborted." - exit 1 - ;; - esac -fi - echo "ec2-user" > "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" echo -e "AMI ID: $RHEL_HOST_AMI" diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh index 3224f98..2a887f4 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh @@ -17,6 +17,9 @@ export NUM_MASTERS=2 ## BEGIN Agent Specific Install Config Variables export AGENT_E2E_TEST_SCENARIO="TNA_IPV4" +# Sets the install-config.yaml's platform type. +# The default is 'baremetal'. +# See https://github.com/openshift-metal3/dev-scripts/blob/master/config_example.sh for more details on this variable and its effects. #export AGENT_PLATFORM_TYPE=none ## END Agent Specific Install Config Variables #### diff --git a/deploy/openshift-clusters/roles/kcli/kcli-install/README.md b/deploy/openshift-clusters/roles/kcli/kcli-install/README.md index 5c4a9cd..5176285 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-install/README.md +++ b/deploy/openshift-clusters/roles/kcli/kcli-install/README.md @@ -109,7 +109,7 @@ If you're installing a specific openshift release image, you will need to set th ### Arbiter Configuration (when topology="arbiter") -- `platform`: Platform type - "none" or "baremetal" (required for arbiter) +- `platform`: Platform type - "none" , "baremetal" or "external" (required for arbiter) - `arbiters`: Number of arbiter nodes (default: 1) - `arbiter_memory`: Memory for arbiter node in MB (default: 8192) - `arbiter_numcpus`: CPU cores for arbiter node (default: 2) diff --git a/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml b/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml index 8210c1c..c443603 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml +++ b/deploy/openshift-clusters/roles/kcli/kcli-install/tasks/validate.yml @@ -16,7 +16,7 @@ msg: "platform must be 'none' or 'baremetal' for arbiter topology (got '{{ platform }}')" when: - topology == 'arbiter' - - platform not in ['none', 'baremetal'] + - platform not in ['none', 'baremetal' , 'external' ] - name: Validate domain fail: