diff --git a/Jenkinsfile b/Jenkinsfile index 756892b901d..7730f2a0d93 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,7 +5,7 @@ /* groovylint-disable ParameterName, VariableName */ /* Copyright 2019-2024 Intel Corporation /* Copyright 2025 Google LLC - * Copyright 2025 Hewlett Packard Enterprise Development LP + * Copyright 2025-2026 Hewlett Packard Enterprise Development LP * All rights reserved. * * This file is part of the DAOS Project. It is subject to the license terms @@ -403,6 +403,9 @@ pipeline { string(name: 'FUNCTIONAL_HARDWARE_LARGE_LABEL', defaultValue: 'ci_nvme9', description: 'Label to use for 9 node Functional Hardware Large (MD on SSD) stages') + string(name: 'FUNCTIONAL_HARDWARE_MEDIUM_IMAGE_VERSION', + defaultValue: 'el8.8', + description: 'Version of OS used in all Functional Hardware Medium stages') string(name: 'CI_STORAGE_PREP_LABEL', defaultValue: '', description: 'Label for cluster to do a DAOS Storage Preparation') @@ -628,6 +631,7 @@ pipeline { } } } +/* stage('Build on Leap 15.5') { when { beforeAgent true @@ -718,6 +722,7 @@ pipeline { } } } +*/ } } stage('Unit Tests') { @@ -1147,6 +1152,7 @@ pipeline { stage_tags: 'hw,medium,-provider', default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', nvme: 'auto', + image_version: params.FUNCTIONAL_HARDWARE_MEDIUM_IMAGE_VERSION, run_if_pr: false, run_if_landing: false, job_status: job_status_internal @@ -1159,6 +1165,7 @@ pipeline { stage_tags: 'hw,medium,-provider', default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', nvme: 'auto_md_on_ssd', + image_version: params.FUNCTIONAL_HARDWARE_MEDIUM_IMAGE_VERSION, run_if_pr: true, run_if_landing: false, job_status: job_status_internal @@ -1172,6 +1179,7 @@ pipeline { /* groovylint-disable-next-line UnnecessaryGetter */ default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', nvme: 'auto', + image_version: params.FUNCTIONAL_HARDWARE_MEDIUM_IMAGE_VERSION, run_if_pr: false, run_if_landing: false, job_status: job_status_internal @@ -1185,6 +1193,7 @@ pipeline { default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', default_nvme: 'auto', provider: 'ofi+verbs;ofi_rxm', + image_version: params.FUNCTIONAL_HARDWARE_MEDIUM_IMAGE_VERSION, run_if_pr: false, run_if_landing: false, job_status: job_status_internal @@ -1198,6 +1207,7 @@ pipeline { default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', default_nvme: 'auto_md_on_ssd', provider: 'ofi+verbs;ofi_rxm', + image_version: params.FUNCTIONAL_HARDWARE_MEDIUM_IMAGE_VERSION, run_if_pr: true, run_if_landing: false, job_status: job_status_internal @@ -1211,6 +1221,7 @@ pipeline { default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', default_nvme: 'auto', provider: cachedCommitPragma('Test-provider-ucx', 'ucx+ud_x'), + image_version: params.FUNCTIONAL_HARDWARE_MEDIUM_IMAGE_VERSION, run_if_pr: false, run_if_landing: false, job_status: job_status_internal diff --git a/ci/provisioning/post_provision_config_nodes.sh b/ci/provisioning/post_provision_config_nodes.sh index 37ac6f23aaa..bd3f52d8cae 100644 --- a/ci/provisioning/post_provision_config_nodes.sh +++ b/ci/provisioning/post_provision_config_nodes.sh @@ -1,11 +1,11 @@ #!/bin/bash # # Copyright 2020-2023 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # -set -eux +set -euxo pipefail env > /root/last_run-env.txt @@ -78,79 +78,272 @@ if lspci | grep -i nvme; then daos_server nvme reset && rmmod vfio_pci && modprobe vfio_pci fi -# FOR now limit to 2 devices per CPU NUMA node -: "${DAOS_CI_NVME_NUMA_LIMIT:=2}" +# This workaround ensures that the NVMe configuration remains consistent across +# all cluster nodes. +# This prevents situations where the binding between NVMe devices and PCIe +# addresses varies from restart to restart, resulting in error messages such as +# "Failed to initialize SSD: [xxxx:xx:xx.x]' when DAOS engines are started. +SPDK_SETUP_CMD="/usr/share/daos/spdk/scripts/setup.sh" + +check_spdk_setup_cmd () { + if [ ! -d "$(dirname "$SPDK_SETUP_CMD")" ] || [ ! -f "$SPDK_SETUP_CMD" ]; then + echo -n "Required SPDK scripts directory $(dirname "$SPDK_SETUP_CMD")" + echo " or setup.sh not found!" + return 1 + fi + return 0 +} + +get_nvme_count_devices () { + lspci -D | grep -c -E "Non-Volatile memory controller" || true +} + +declare -A MOUNTED_PCI_DEVICES +declare -A PCI_DEVICES_WITH_DATA +pci_device_create_cache () { + MOUNTED_PCI_DEVICES=() + PCI_DEVICES_WITH_DATA=() + if check_spdk_setup_cmd; then + local status_output line pci_device_address + status_output="$($SPDK_SETUP_CMD status 2>&1)" + while read -r line; do + pci_device_address="${line%% *}" + if [[ "$pci_device_address" =~ ^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9]$ ]]; then + [[ "$line" == *"Active devices: mount@"* ]] && MOUNTED_PCI_DEVICES["$pci_device_address"]=1 + [[ "$line" == *"Active devices: data@"* ]] && PCI_DEVICES_WITH_DATA["$pci_device_address"]=1 + fi + done <<< "$status_output" + fi + echo "Cached ${#MOUNTED_PCI_DEVICES[@]} mounted PCI devices" + echo "Cached ${#PCI_DEVICES_WITH_DATA[@]} PCI devices with data" +} + +pci_device_is_mounted() { + local pci_device_address="${1:?Usage: pci_device_is_mounted }" + [[ -v MOUNTED_PCI_DEVICES[$pci_device_address] ]] +} + +pci_device_has_data() { + local pci_device_address="${1:?Usage: pci_device_has_data }" + [[ -v PCI_DEVICES_WITH_DATA[$pci_device_address] ]] +} + +pci_device_get_numa () { + local pci_device="${1:?Usage: pci_device_get_numa }" + local pci_device_numa_path="/sys/bus/pci/devices/${pci_device}/numa_node" + cat "${pci_device_numa_path}" +} + +nvme_dev_get_first_by_pcie_addr (){ + local pci_device_address="${1:?Usage: nvme_dev_get_first_by_pcie_addr }" + local nvme_dir="/sys/bus/pci/devices/$pci_device_address/nvme" + local nvme_device symlink + if [ -d "$nvme_dir" ]; then + for symlink in "$nvme_dir"/*; do + [ -e "$symlink" ] || continue + nvme_device=$(basename "$symlink") + echo -n "${nvme_device}" + return + done + else + echo "ERROR: nvme_dev_get_first_by_pcie_addr can not find nvme for $pci_device_address" + exit 1 + fi +} + +# Calculates --nsze for a device so the namespace spans the full usable capacity +nvme_calc_full_nsze () { + local nvme_device="${1:?Usage: nvme_calc_full_nsze_ncap }" + # Query the NVMe device info for total logical blocks and LBA size + # Prefer tnvmcap, fallback to unvmcap if tnvmcap not found + local nvmcap_bytes + nvmcap_bytes=$(nvme id-ctrl "$nvme_device" 2>/dev/null | \ + awk -F: ' + /tnvmcap/ {gsub(/[^0-9]/,"",$2); print $2; found=1; exit} + /unvmcap/ && !found {gsub(/[^0-9]/,"",$2); val=$2} + END{if(!found && val) print val} + ') + + if [[ -z "$nvmcap_bytes" || "$nvmcap_bytes" -eq 0 ]]; then + echo "ERROR: Could not find tnvmcap or unvmcap in nvme id-ctrl output" >&2 + return 1 + fi + + # Extract the size of a logical block (lba size), usually from nvme id-ns or id-ctrl + local lbads="" id_ns="" lba_bytes="" lba_count="" + id_ns=$(nvme id-ns "${nvme_device}n1" 2>/dev/null || true) + if [[ -n "$id_ns" ]]; then + # Look for "lbads" line in id-ns output + lbads=$(echo "$id_ns" | awk -F: '/lbads/ {gsub(/[^0-9]/,"",$2); print $2; exit}') + fi + if [[ -z "$lbads" ]]; then + # fallback: Try to get LBA (logical block addressing) from id-ctrl if possible, else default to 512 + lbads=12 # Default for 4096 bytes (2^12 = 4096) + fi + lba_bytes=$((2 ** lbads)) -function mount_nvme_drive { - local drive="$1" - file_system=$(file -sL "/dev/$drive") - if [[ "$file_system" != *"ext4 filesystem"* ]]; then - yes | mkfs -t ext4 "/dev/$drive" + # Calculate number of logical blocks + lba_count=$(( nvmcap_bytes / lba_bytes )) + + # Output as hexadecimal format for nvme-cli + printf -- "0x%x\n" "$lba_count" +} + +nvme_recreate_namespace (){ +# lbaf 0 : ms:0 lbads:9 rp:0x1 (in use) → 512B blocks +# lbaf 1 : ms:0 lbads:12 rp:0 → 4096B blocks (4K) +# lbaf 2 : ms:8 lbads:9 rp:0x3 → 512B + 8B metadata +# lbaf 3 : ms:8 lbads:12 rp:0x2 → 4K + 8B metadata +# lbaf 4 : ms:64 lbads:12 rp:0x3 → 4K + 64B metadata +# lbads = log2(block size). +# 9 → 2⁹ = 512 bytes +# 12 → 2¹² = 4096 bytes (4K) +# ms = metadata size per block (0, 8, or 64 bytes). +# rp = relative performance hint. + + local nvme_device="${1:?Usage: nvme_recreate_namespace [skip_delete:true|false]}" + local skip_delete="${2:-false}" # true to skip, default false (delete enabled) + local nvme_device_path="/dev/${nvme_device}" + local nvme_device_ns_path="${nvme_device_path}n1" + local nvme_nsze nvme_cntlid + # Optionally skip delete step + if [[ "$skip_delete" != "true" ]]; then + nvme delete-ns "$nvme_device_path" -n 1 || \ + { echo "ERROR: delete the ${nvme_device_path} namespace failed"; exit 1; } + nvme reset "$nvme_device_path" || \ + { echo "ERROR: reset the ${nvme_device_path} device failed"; exit 1; } + else + echo "INFO: Skipping namespace delete on $nvme_device_path" + fi + nvme reset "$nvme_device_path" || \ + { echo "ERROR: reset the ${nvme_device_path} device failed"; exit 1; } + + nvme_nsze=$(nvme_calc_full_nsze "${nvme_device_path}") + nvme create-ns "$nvme_device_path" "--nsze=${nvme_nsze}" "--ncap=${nvme_nsze}" --flbas=0 || \ + { echo "ERROR: create the ${nvme_device_path} namespace failed"; exit 1; } + nvme_cntlid=$(nvme id-ctrl "$nvme_device_path" | grep -iw cntlid | cut -d: -f2 | tr -d ' ') + nvme attach-ns "$nvme_device_path" -n 1 -c "$nvme_cntlid" || \ + { echo "ERROR: attach the ${nvme_device_path} namespace failed"; exit 1; } + # Wait up to 5 seconds for device node to appear + for i in {1..5}; do + if [ -b "$nvme_device_ns_path" ]; then + break fi - mkdir -p "/mnt/$drive" - mount "/dev/$drive" "/mnt/$drive" + sleep "$i" + done + if [ ! -b "$nvme_device_ns_path" ]; then + echo "ERROR: Namespace $nvme_device_ns_path did not appear after attach" + exit 1 + fi + # selects LBA format index 0 (512B) and no secure erase, just format + nvme format "$nvme_device_ns_path" --lbaf=0 --ses=0 --force || \ + { echo "ERROR: format the ${nvme_device_ns_path} namespace failed"; exit 1; } + nvme reset "$nvme_device_path" || \ + { echo "ERROR: reset the ${nvme_device_path} namespace failed"; exit 1; } + nvme id-ns "$nvme_device_ns_path" |grep -E "lbaf|nvmcap|nsze|ncap|nuse" } +# Format ext4 on each element of array after "daos_reserved" is reached. +mkfs_on_nvme_over_limit () { + local daos_nvme_numa_limit="${1:?Usage: mkfs_on_nvme_over_limit }" + shift + local nvme_pci_address_array=("$@") + local count=0 + local nvme_pci_address nvme_device nvme_device_ns_path + for nvme_pci_address in "${nvme_pci_address_array[@]}"; do + nvme_device=$(nvme_dev_get_first_by_pcie_addr "$nvme_pci_address") + nvme_device_ns_path="/dev/${nvme_device}n1" + # always recreate namespace if it does not exist + if [ ! -e "$nvme_device_ns_path" ]; then + echo "INFO recreate namespace 1 on /dev/${nvme_device} ${nvme_pci_address}" + nvme_recreate_namespace "$nvme_device" true + fi + if [ "$count" -ge "$daos_nvme_numa_limit" ]; then + if ! blkid -t TYPE=ext4 "$nvme_device_ns_path" >/dev/null 2>&1; then + echo "INFO mkfs on $nvme_device_ns_path" + sudo mkfs.ext4 -F "$nvme_device_ns_path" > /dev/null + else + echo "SKIP mkfs on $nvme_device_ns_path" + fi + else + if pci_device_has_data "$nvme_pci_address"; then + echo "INFO clean /dev/${nvme_device} ${nvme_pci_address}" + nvme_recreate_namespace "$nvme_device" + else + echo "SKIP clean /dev/${nvme_device} ${nvme_pci_address}" + fi + fi + ((count++)) || true + done +} -nvme_class="/sys/class/nvme/" -function nvme_limit { - set +x - if [ ! -d "${nvme_class}" ] || [ -z "$(ls -A "${nvme_class}")" ]; then - echo "No NVMe devices found" - return +nvme_setup (){ + local daos_nvme_numa_limit="${1:-?Usage: nvme_setup }" + local numa0_pci_devices=() + local numa1_pci_devices=() + local all_numas_pci_devices + local nvme_count nvme_pcie_address_all nvme_pci_address numa_node + + nvme_count=$(get_nvme_count_devices) + if [ "$nvme_count" -le 1 ]; then # Expect at least 2 NVMe devices for proper setup + return 0 + fi + + if ! check_spdk_setup_cmd; then + exit 1 + fi + + set +x + pci_device_create_cache + set -x + + nvme_pcie_address_all=$(lspci -D | awk '/Non-Volatile memory controller/{print $1}' | sort) + + for nvme_pci_address in $nvme_pcie_address_all; do + # Skip already mounted namespace + if pci_device_is_mounted "$nvme_pci_address"; then + echo "Skip already mounted namespace $nvme_pci_address" + continue fi - local numa0_devices=() - local numa1_devices=() - for nvme_path in "$nvme_class"*; do - nvme="$(basename "$nvme_path")n1" - numa_node="$(cat "${nvme_path}/numa_node")" - if mount | grep "$nvme"; then - continue - fi - if [ "$numa_node" -eq 0 ]; then - numa0_devices+=("$nvme") - else - numa1_devices+=("$nvme") - fi - done - echo numa0 "${numa0_devices[@]}" - echo numa1 "${numa1_devices[@]}" - if [ "${#numa0_devices[@]}" -gt 0 ] && [ "${#numa1_devices[@]}" -gt 0 ]; then - echo "balanced NVMe configuration possible" - nvme_count=0 - for nvme in "${numa0_devices[@]}"; do - if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then - mount_nvme_drive "$nvme" - else - ((nvme_count++)) || true - fi - done - nvme_count=0 - for nvme in "${numa1_devices[@]}"; do - if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then - mount_nvme_drive "$nvme" - else - ((nvme_count++)) || true - fi - done + numa_node="$(pci_device_get_numa "$nvme_pci_address")" + if [ "$numa_node" -eq 0 ]; then + numa0_pci_devices+=("$nvme_pci_address") else - echo "balanced NVMe configuration not possible" - for nvme in "${numa0_devices[@]}" "${numa1_devices[@]}"; do - ((needed = "$DAOS_CI_NVME_NUMA_LIMIT" + 1)) || true - nvme_count=0 - if [ "$nvme_count" -ge "$needed" ]; then - mount_nvme_drive "$nvme" - else - ((nvme_count++)) || true - fi - done + numa1_pci_devices+=("$nvme_pci_address") + fi + done + echo NUMA0 PCIe devices: "${numa0_pci_devices[@]}" + echo NUMA1 PCIe devices: "${numa1_pci_devices[@]}" + if [ "${#numa0_pci_devices[@]}" -ge "$daos_nvme_numa_limit" ] && \ + [ "${#numa1_pci_devices[@]}" -ge "$daos_nvme_numa_limit" ]; then + echo "balanced NVMe configuration possible" + mkfs_on_nvme_over_limit "$daos_nvme_numa_limit" "${numa0_pci_devices[@]}" + mkfs_on_nvme_over_limit "$daos_nvme_numa_limit" "${numa1_pci_devices[@]}" + else + daos_nvme_numa_limit=$((daos_nvme_numa_limit + daos_nvme_numa_limit)) + all_numas_pci_devices=( "${numa0_pci_devices[@]}" "${numa1_pci_devices[@]}" ) + echo "balanced NVMe configuration not possible" + mkfs_on_nvme_over_limit "$daos_nvme_numa_limit" "${all_numas_pci_devices[@]}" + fi +} + +function spdk_setup_status { + set +e + if check_spdk_setup_cmd; then + "$SPDK_SETUP_CMD" status fi - set -x + set -e } -# Force only the desired number of NVMe devices to be seen by DAOS tests -# by mounting the extra ones. -nvme_limit +#For now limit to 2 devices per CPU NUMA node +: "${DAOS_CI_NVME_NUMA_LIMIT:=2}" + +spdk_setup_status +nvme_setup "$DAOS_CI_NVME_NUMA_LIMIT" +spdk_setup_status +if command -v daos_server >/dev/null 2>&1; then + daos_server nvme scan +fi systemctl enable nfs-server.service systemctl start nfs-server.service diff --git a/ci/provisioning/post_provision_config_nodes_EL.sh b/ci/provisioning/post_provision_config_nodes_EL.sh index 75e1d7934e3..505d445fca3 100644 --- a/ci/provisioning/post_provision_config_nodes_EL.sh +++ b/ci/provisioning/post_provision_config_nodes_EL.sh @@ -18,6 +18,9 @@ set +e set -e # Seems to be needed to fix some issues. dnf -y reinstall sssd-common + # Seems to be required until https://daosio.atlassian.net/browse/DAOS-18358 + # is fixed. + dnf -y remove clamav-lib } group_repo_post() { diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index cd8bf8eeed2..ff6d090f194 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -1,6 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -1257,6 +1257,7 @@ def _setup_application_directory(self, logger, result): run_local(logger, f"ls -al '{self._test_env.app_dir}'") return 0 + # pylint: disable=too-many-locals def run_tests(self, logger, result, repeat, slurm_setup, sparse, fail_fast, stop_daos, archive, rename, jenkins_log, core_files, threshold, user_create, code_coverage, job_results_dir, logdir, clear_mounts, cleanup_files): @@ -1287,6 +1288,7 @@ def run_tests(self, logger, result, repeat, slurm_setup, sparse, fail_fast, stop int: status code indicating any issues running tests """ return_code = 0 + return_execute_code = 0 runner = TestRunner(self._avocado, result, len(self.tests), repeat, self.tag_filters) # Display the location of the avocado logs @@ -1296,6 +1298,10 @@ def run_tests(self, logger, result, repeat, slurm_setup, sparse, fail_fast, stop if not code_coverage.setup(logger, result.tests[0]): return_code |= 128 + # ignore return code from coverage setup as tests can still run until first failure + if repeat > 1: + return_code = 0 + self._details["tests"] = [] # Run each test for as many repetitions as requested @@ -1332,7 +1338,9 @@ def run_tests(self, logger, result, repeat, slurm_setup, sparse, fail_fast, stop continue # Run the test with avocado - return_code |= runner.execute(logger, test, loop, sequence + 1, sparse, fail_fast) + return_execute_code = runner.execute(logger, test, loop, sequence + 1, sparse, + fail_fast) + return_code |= return_execute_code # Archive the test results return_code |= runner.process( @@ -1344,6 +1352,15 @@ def run_tests(self, logger, result, repeat, slurm_setup, sparse, fail_fast, stop # Stop logging to the test log file logger.removeHandler(test_file_handler) + if repeat > 1 and return_execute_code != 0: + logger.info("Failure at test repetition %s/%s/%d: %d. ", loop, repeat, + sequence + 1, return_execute_code) + break + + if repeat > 1 and return_execute_code != 0: + logger.info("Failure at test repetition %s/%s: %d. ", loop, repeat, + return_execute_code) + break # Cleanup any specified files at the end of testing for file, info in cleanup_files.items():