Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/tests/ftest/recovery/dlck_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

Check failure on line 2 in src/tests/ftest/recovery/dlck_basic.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os

from apricot import TestWithServers
from dlck_utils import DlckCommand


class DlckBasicTest(TestWithServers):
"""Test class for dlck command line utility.

:avocado: recursive
"""
def test_dlck_basic_test(self):
"""Basic Test: Run 'dlck' command

:avocado: tags=all,daily_regression
:avocado: tags=hw,medium
:avocado: tags=recovery
:avocado: tags=DlckBasicTest,dlck_cmd,test_dlck_basic
"""
errors = []
dmg = self.get_dmg_command()
self.add_pool()
pool_uuids = dmg.get_pool_list_uuids(no_query=True)
scm_mount = self.server_managers[0].get_config_value("scm_mount")
if self.server_managers[0].manager.job.using_control_metadata:
log_dir = os.path.dirname(self.server_managers[0].get_config_value("log_file"))
control_metadata_dir = os.path.join(log_dir, "control_metadata")
nvme_conf=os.path.join(control_metadata_dir, "daos_nvme.conf")

Check failure on line 33 in src/tests/ftest/recovery/dlck_basic.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E225 missing whitespace around operator
dmg.system_stop()
host = self.server_managers[0].hosts[0:1]
if self.server_managers[0].manager.job.using_control_metadata:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf,
storage_mount=scm_mount)

Check failure on line 38 in src/tests/ftest/recovery/dlck_basic.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E128 continuation line under-indented for visual indent
else:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], storage_mount=scm_mount)
result = dlck_cmd.run()
if not result.passed:
errors.append(f"dlck failed on {result.failed_hosts}")
self.log.info("dlck basic test output:\n%s", result)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.log.info("dlck basic test output:\n%s", result)
self.log.info(f"dlck basic test output:\n{result}")

dmg.system_start()
if errors:
self.fail("Errors detected:\n{}".format("\n".join(errors)))
self.log.info("dlck basic test passed with no errors")
22 changes: 22 additions & 0 deletions src/tests/ftest/recovery/dlck_basic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
hosts:
test_servers: 1
test_clients: 1

timeout: 500

server_config:
name: daos_server
engines_per_host: 1
engines:
0:
targets: 4
storage: auto
1:
targets: 4
storage: auto
Comment on lines +14 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same here. A single engine should be enough.


setup:
start_servers_once: False

pool:
size: 50%
94 changes: 94 additions & 0 deletions src/tests/ftest/recovery/dlck_basic_fault.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

Check failure on line 2 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os

from apricot import TestWithServers
from dlck_utils import DlckCommand
from fault_config_utils import FaultInjection
from file_utils import distribute_files
from run_utils import run_remote


class DlckBasicFaultTest(TestWithServers):
"""Test class for dlck command line utility.

:avocado: recursive
"""
def test_dlck_basic_fault_test(self):
"""Basic Fault Test: Run 'dlck' injecting basic faults.

:avocado: tags=all,full_regression
:avocado: tags=hw,medium
:avocado: tags=recovery
:avocado: tags=DlckBasicFaultTest,dlck_cmd,test_dlck_basic_fault
"""
errors = []
faults_dict = {}
faults_object = FaultInjection()
faults_dict = faults_object.get_faults_dict()
fault_list = self.params.get("fault_list", '/run/dlck_test_additional_faults/*')
dmg = self.get_dmg_command()
self.add_pool()
pool_uuids = dmg.get_pool_list_uuids(no_query=True)
scm_mount = self.server_managers[0].get_config_value("scm_mount")
if self.server_managers[0].manager.job.using_control_metadata:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick. A few comments would be nice. Otherwise it is really bad to read.

Suggested change
if self.server_managers[0].manager.job.using_control_metadata:
# build nvme_conf path
if self.server_managers[0].manager.job.using_control_metadata:

log_dir = os.path.dirname(self.server_managers[0].get_config_value("log_file"))
control_metadata_dir = os.path.join(log_dir, "control_metadata")
nvme_conf=os.path.join(control_metadata_dir, "daos_nvme.conf")

Check failure on line 40 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E225 missing whitespace around operator
fault_inject_file = os.getenv("D_FI_CONFIG", "None set for now")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
fault_inject_file = os.getenv("D_FI_CONFIG", "None set for now")
# check if the D_FI_CONFIG variable is present in the environment
fault_inject_file = os.getenv("D_FI_CONFIG", "None set for now")

if fault_inject_file == "None set for now":
self.fail("D_FI_CONFIG environment variable not set, cannot run fault injection test")
env_str = "D_FI_CONFIG={} ".format(fault_inject_file)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If D_FI_CONFIG is already in the environment why we have to add it again?

self.log.info("Fault injection file contents")
cmd = "cat {}".format(fault_inject_file)

Check warning on line 46 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Flake8 check

W291 trailing whitespace

Check warning on line 46 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Pylint check

trailing-whitespace, Trailing whitespace
host = self.server_managers[0].hosts[0:1]
run_remote(self.log, self.hostlist_clients[0], cmd, timeout=30)
dmg.system_stop()
# Run the testing with the first fault which is injected at the beginning of the test.
if self.server_managers[0].manager.job.using_control_metadata:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf,
storage_mount=scm_mount, env_str=env_str)

Check failure on line 53 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E128 continuation line under-indented for visual indent
else:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], storage_mount=scm_mount,
env_str=env_str)
Comment on lines +51 to +56
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this if is unnecessary. You can provide the nvme_conf regardless. I believe it is None by default. If not you can set it to None at the beginning and set it to a meaningful path if necessary. Regardless, I believe there is no need to have two ways of calling this command.

Suggested change
if self.server_managers[0].manager.job.using_control_metadata:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf,
storage_mount=scm_mount, env_str=env_str)
else:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], storage_mount=scm_mount,
env_str=env_str)
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf,
storage_mount=scm_mount, env_str=env_str)

result = dlck_cmd.run()
if not result.passed:
errors.append(f"dlck failed on {result.failed_hosts}")
self.log.info("dlck basic test output:\n%s", result)
Comment on lines +59 to +60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason why you decided not to use f-string here in contrast to all other cases where you need pass a variable's contents as a string? e.g. the line above.

Suggested change
errors.append(f"dlck failed on {result.failed_hosts}")
self.log.info("dlck basic test output:\n%s", result)
errors.append(f"dlck failed on {result.failed_hosts}")
self.log.info(f"dlck basic test output:\n{result}")

# Now, run the other fault injection flags without rebooting or creating any new pools.
# Rebooting the servers or creating the new pools will result in injectung fault in

Check warning on line 62 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Pylint check

wrong-spelling-in-comment, Wrong spelling of a word 'injectung' in a comment:
# the wrong test code. Fault injections should done only for the dlck alone.
Comment on lines +61 to +63
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • injectung -> injecting
  • should done -> should be done
Suggested change
# Now, run the other fault injection flags without rebooting or creating any new pools.
# Rebooting the servers or creating the new pools will result in injectung fault in
# the wrong test code. Fault injections should done only for the dlck alone.
# Now, run the other fault injection flags without rebooting or creating any new pools.
# Rebooting the servers or creating the new pools will result in injecting fault in
# the wrong test code. Fault injections should be done only for the dlck alone.

for test_fault in fault_list:
with open(fault_inject_file, 'w') as f:
f.write(f"fault_config:\n")

Check failure on line 66 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Flake8 check

F541 f-string is missing placeholders

Check warning on line 66 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Pylint check

f-string-without-interpolation, Using an f-string that does not have any interpolated variables
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick. It is not actually an f-string. You have no variables in there.

Suggested change
f.write(f"fault_config:\n")
f.write("fault_config:\n")

count = 0
for key, value in faults_dict[test_fault].items():
if count == 0:
f.write(f"- {key}: \'{value}\'\n")
else:
f.write(f" {key}: \'{value}\'\n")

Check warning on line 72 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Flake8 check

W291 trailing whitespace

Check warning on line 72 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Pylint check

trailing-whitespace, Trailing whitespace
count += 1

Check warning on line 73 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Flake8 check

W291 trailing whitespace

Check warning on line 73 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Pylint check

trailing-whitespace, Trailing whitespace
f.close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK with should take care of closing the file without going any further.

Suggested change
f.close()

self.log.info("Reading the updated fault injection file contents")
with open(fault_inject_file, 'r') as f:
file_data = f.read()
self.log.info("\n%s", file_data)
f.close()
Comment on lines +75 to +79
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you are dumping the contents of the file twice in this procedure I think it makes sense to write it once as a helper function and just call it when necessary. Otherwise, the code is getting messy.

distribute_files(self.log, self.hostlist_servers, fault_inject_file,
fault_inject_file)
if self.server_managers[0].manager.job.using_control_metadata:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf,
storage_mount=scm_mount, env_str=env_str)

Check failure on line 84 in src/tests/ftest/recovery/dlck_basic_fault.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E128 continuation line under-indented for visual indent
else:
dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], storage_mount=scm_mount,
env_str=env_str)
Comment on lines +82 to +87
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same as above. There is no need to have an if just to provide nvme_conf=None for some cases. Please reduce.

result = dlck_cmd.run()
if not result.passed:
errors.append(f"dlck failed on {result.failed_hosts}")
self.log.info("dlck basic test output:\n%s", result)
Comment on lines +88 to +91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same as with dumping the contents of fault injection file you are processing and printing the command result twice in this code. Please write a helper function.

dmg.system_start()
if not errors:
self.fail("No Errors detected:\n{}".format("\n".join(errors)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a very elaborate way of printing an empty list. 😉

Suggested change
self.fail("No Errors detected:\n{}".format("\n".join(errors)))
self.fail("No Errors detected.")

49 changes: 49 additions & 0 deletions src/tests/ftest/recovery/dlck_basic_fault.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
hosts:
test_servers: 1
test_clients: 1

timeout: 500

server_config:
name: daos_server
engines_per_host: 1
engines:
0:
targets: 4
storage: auto
1:
targets: 4
storage: auto
Comment on lines +14 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the value? It looks like one engine too many,

Suggested change
1:
targets: 4
storage: auto


setup:
start_servers_once: True

pool:
size: 50%

dlck:
scm_mount: /mnt/daos0

faults: !mux
create_log_dir:
fault_list:
- DLCK_FAULT_CREATE_LOG_DIR
Comment on lines +27 to +30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not understand this bit. Can you please explain why you use !mux with a single case? and why you do this for this case but no for the other you decided to keep separate.


dlck_test_additional_faults:
fault_list:
- DLCK_FAULT_CREATE_POOL_DIR
- DLCK_FAULT_ENGINE_START
- DLCK_FAULT_ENGINE_EXEC
- DLCK_FAULT_ENGINE_JOIN
- DLCK_FAULT_ENGINE_STOP
- DAOS_FAULT_POOL_NVME_HEALTH
- DAOS_FAULT_POOL_OPEN_BIO
- DAOS_FAULT_POOL_OPEN_UMEM
- DAOS_FAULT_POOL_OPEN_MAGIC
- DAOS_FAULT_POOL_OPEN_VERSION
- DAOS_FAULT_POOL_OPEN_UUID
- DAOS_FAULT_BTREE_OPEN_INV_CLASS
- DAOS_FAULT_BTREE_OPEN_UNREG_CLASS
- DAOS_FAULT_BTREE_FEATURES
- DAOS_FAULT_POOL_EXT_PADDING
- DAOS_FAULT_POOL_EXT_RESERVED
85 changes: 85 additions & 0 deletions src/tests/ftest/util/dlck_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

Check failure on line 2 in src/tests/ftest/util/dlck_utils.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""

from command_utils_base import CommandWithParameters, FormattedParameter
from run_utils import run_remote


class DlckCommand(CommandWithParameters):
"""Defines the basic structures of dlck command."""

def __init__(self, server_host, path, pool_uuid=None, nvme_conf=None, storage_mount=None,
verbose=True, timeout=None, sudo=True, env_str=None):
"""Constructor that sets the common variables for sub-commands.

Args:
server_host (NodeSet): Server host to run the command.
path (str): path to the dlck command.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is curious. Do you expect dlck not to be on the PATH? From what I saw this is what we tend to do in similar tests but I do not understand why we do it this way.

pool_uuid (str, optional): Pool UUID. Defaults to None.
nvme_conf (str, optional): NVMe config file path. Defaults to None.
storage_mount (str, optional): Storage mount point. Defaults to None.
verbose (bool, optional): Display command output in run.
Defaults to True.
timeout (int, optional): Command timeout (sec) used in run. Defaults to
None.
sudo (bool, optional): Whether to run dlck with sudo. Defaults to True.
env_str (str, optional): Environment variable string to prepend to command.
"""
super().__init__("/run/dlck/*", "dlck", path)
# Pass environment variable string
self.env_str = ""
if env_str is not None:
self.env_str = str(env_str)

Check failure on line 35 in src/tests/ftest/util/dlck_utils.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E111 indentation is not a multiple of 4

Check warning on line 35 in src/tests/ftest/util/dlck_utils.py

View workflow job for this annotation

GitHub Actions / Pylint check

bad-indentation, Bad indentation. Found 11 spaces, expected 12

# We need to run with sudo.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope this comment to become outdated very soon. 😄
On the other hand it does not mean you will be able to run dlck in our CI as a normal user the next day.

I will keep this comment here for now. We will see where we will go with this.

Ref: https://daosio.atlassian.net/browse/DAOS-18324

self.sudo = sudo

self.host = server_host

# Members needed for run().
self.verbose = verbose
self.timeout = timeout

# Pool UUID. (--file pool_uuid[,target_id])
if pool_uuid:
self.pool_uuid = FormattedParameter("--file={}", pool_uuid)

# NVMe config file path. (--nvme_cont nvme_conf_path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# NVMe config file path. (--nvme_cont nvme_conf_path)
# NVMe config file path. (--nvme nvme_conf_path)

if nvme_conf:
self.nvme_conf = FormattedParameter("--nvme={}", nvme_conf)

# Storage mount point. (--storage storage_mount)
if storage_mount:
self.storage_mount = FormattedParameter("--storage={}", storage_mount)

def __str__(self):
"""Return the command with all of its defined parameters as a string.

Returns:
str: the command with all the defined parameters

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant new line.

Suggested change

"""
value = super().__str__()
if self.sudo:
value = " ".join(["sudo -E -n", value])
return value

def run(self):
"""Run the dlck command.
Args:
host (NodeSet): Host(s) on which to run the command.
command (str): Environment Variable string + dlck sub-command to run.
verbose (bool, optional): Display command output in run.
Defaults to True.
timeout (int, optional): Command timeout (sec) used in run. Defaults to
None.
Comment on lines +72 to +78
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe these are not strictly arguments of this method. You may choose to move bits of this documentation to a comment but IMHO they do not belong to this docstring.

Suggested change
Args:
host (NodeSet): Host(s) on which to run the command.
command (str): Environment Variable string + dlck sub-command to run.
verbose (bool, optional): Display command output in run.
Defaults to True.
timeout (int, optional): Command timeout (sec) used in run. Defaults to
None.


Returns:
CommandResult: groups of command results from the same hosts with the same return status
"""
return run_remote(
self.log, self.host, command=self.env_str + str(self), verbose=self.verbose,
timeout=self.timeout)
Loading
Loading