-
Notifications
You must be signed in to change notification settings - Fork 338
DAOS-17519 test: Automate dlck testing (basic/fault_injection) #17307
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| """ | ||
| (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
|
|
||
| SPDX-License-Identifier: BSD-2-Clause-Patent | ||
| """ | ||
| import os | ||
|
|
||
| from apricot import TestWithServers | ||
| from dlck_utils import DlckCommand | ||
|
|
||
|
|
||
| class DlckBasicTest(TestWithServers): | ||
| """Test class for dlck command line utility. | ||
|
|
||
| :avocado: recursive | ||
| """ | ||
| def test_dlck_basic_test(self): | ||
| """Basic Test: Run 'dlck' command | ||
|
|
||
| :avocado: tags=all,daily_regression | ||
| :avocado: tags=hw,medium | ||
| :avocado: tags=recovery | ||
| :avocado: tags=DlckBasicTest,dlck_cmd,test_dlck_basic | ||
| """ | ||
| errors = [] | ||
| dmg = self.get_dmg_command() | ||
| self.add_pool() | ||
| pool_uuids = dmg.get_pool_list_uuids(no_query=True) | ||
| scm_mount = self.server_managers[0].get_config_value("scm_mount") | ||
| if self.server_managers[0].manager.job.using_control_metadata: | ||
| log_dir = os.path.dirname(self.server_managers[0].get_config_value("log_file")) | ||
| control_metadata_dir = os.path.join(log_dir, "control_metadata") | ||
| nvme_conf=os.path.join(control_metadata_dir, "daos_nvme.conf") | ||
| dmg.system_stop() | ||
| host = self.server_managers[0].hosts[0:1] | ||
| if self.server_managers[0].manager.job.using_control_metadata: | ||
| dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf, | ||
| storage_mount=scm_mount) | ||
| else: | ||
| dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], storage_mount=scm_mount) | ||
| result = dlck_cmd.run() | ||
| if not result.passed: | ||
| errors.append(f"dlck failed on {result.failed_hosts}") | ||
| self.log.info("dlck basic test output:\n%s", result) | ||
| dmg.system_start() | ||
| if errors: | ||
| self.fail("Errors detected:\n{}".format("\n".join(errors))) | ||
| self.log.info("dlck basic test passed with no errors") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| hosts: | ||
| test_servers: 1 | ||
| test_clients: 1 | ||
|
|
||
| timeout: 500 | ||
|
|
||
| server_config: | ||
| name: daos_server | ||
| engines_per_host: 1 | ||
| engines: | ||
| 0: | ||
| targets: 4 | ||
| storage: auto | ||
| 1: | ||
| targets: 4 | ||
| storage: auto | ||
|
Comment on lines
+14
to
+16
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same here. A single engine should be enough. |
||
|
|
||
| setup: | ||
| start_servers_once: False | ||
|
|
||
| pool: | ||
| size: 50% | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,94 @@ | ||||||||||||||||||
| """ | ||||||||||||||||||
| (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||||||||||||||||||
|
|
||||||||||||||||||
| SPDX-License-Identifier: BSD-2-Clause-Patent | ||||||||||||||||||
| """ | ||||||||||||||||||
| import os | ||||||||||||||||||
|
|
||||||||||||||||||
| from apricot import TestWithServers | ||||||||||||||||||
| from dlck_utils import DlckCommand | ||||||||||||||||||
| from fault_config_utils import FaultInjection | ||||||||||||||||||
| from file_utils import distribute_files | ||||||||||||||||||
| from run_utils import run_remote | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| class DlckBasicFaultTest(TestWithServers): | ||||||||||||||||||
| """Test class for dlck command line utility. | ||||||||||||||||||
|
|
||||||||||||||||||
| :avocado: recursive | ||||||||||||||||||
| """ | ||||||||||||||||||
| def test_dlck_basic_fault_test(self): | ||||||||||||||||||
| """Basic Fault Test: Run 'dlck' injecting basic faults. | ||||||||||||||||||
|
|
||||||||||||||||||
| :avocado: tags=all,full_regression | ||||||||||||||||||
| :avocado: tags=hw,medium | ||||||||||||||||||
| :avocado: tags=recovery | ||||||||||||||||||
| :avocado: tags=DlckBasicFaultTest,dlck_cmd,test_dlck_basic_fault | ||||||||||||||||||
| """ | ||||||||||||||||||
| errors = [] | ||||||||||||||||||
| faults_dict = {} | ||||||||||||||||||
| faults_object = FaultInjection() | ||||||||||||||||||
| faults_dict = faults_object.get_faults_dict() | ||||||||||||||||||
| fault_list = self.params.get("fault_list", '/run/dlck_test_additional_faults/*') | ||||||||||||||||||
| dmg = self.get_dmg_command() | ||||||||||||||||||
| self.add_pool() | ||||||||||||||||||
| pool_uuids = dmg.get_pool_list_uuids(no_query=True) | ||||||||||||||||||
| scm_mount = self.server_managers[0].get_config_value("scm_mount") | ||||||||||||||||||
| if self.server_managers[0].manager.job.using_control_metadata: | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick. A few comments would be nice. Otherwise it is really bad to read.
Suggested change
|
||||||||||||||||||
| log_dir = os.path.dirname(self.server_managers[0].get_config_value("log_file")) | ||||||||||||||||||
| control_metadata_dir = os.path.join(log_dir, "control_metadata") | ||||||||||||||||||
| nvme_conf=os.path.join(control_metadata_dir, "daos_nvme.conf") | ||||||||||||||||||
| fault_inject_file = os.getenv("D_FI_CONFIG", "None set for now") | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| if fault_inject_file == "None set for now": | ||||||||||||||||||
| self.fail("D_FI_CONFIG environment variable not set, cannot run fault injection test") | ||||||||||||||||||
| env_str = "D_FI_CONFIG={} ".format(fault_inject_file) | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||||||||||||||||||
| self.log.info("Fault injection file contents") | ||||||||||||||||||
| cmd = "cat {}".format(fault_inject_file) | ||||||||||||||||||
|
Check warning on line 46 in src/tests/ftest/recovery/dlck_basic_fault.py
|
||||||||||||||||||
| host = self.server_managers[0].hosts[0:1] | ||||||||||||||||||
| run_remote(self.log, self.hostlist_clients[0], cmd, timeout=30) | ||||||||||||||||||
| dmg.system_stop() | ||||||||||||||||||
| # Run the testing with the first fault which is injected at the beginning of the test. | ||||||||||||||||||
| if self.server_managers[0].manager.job.using_control_metadata: | ||||||||||||||||||
| dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf, | ||||||||||||||||||
| storage_mount=scm_mount, env_str=env_str) | ||||||||||||||||||
| else: | ||||||||||||||||||
| dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], storage_mount=scm_mount, | ||||||||||||||||||
| env_str=env_str) | ||||||||||||||||||
|
Comment on lines
+51
to
+56
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this if is unnecessary. You can provide the
Suggested change
|
||||||||||||||||||
| result = dlck_cmd.run() | ||||||||||||||||||
| if not result.passed: | ||||||||||||||||||
| errors.append(f"dlck failed on {result.failed_hosts}") | ||||||||||||||||||
| self.log.info("dlck basic test output:\n%s", result) | ||||||||||||||||||
|
Comment on lines
+59
to
+60
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason why you decided not to use f-string here in contrast to all other cases where you need pass a variable's contents as a string? e.g. the line above.
Suggested change
|
||||||||||||||||||
| # Now, run the other fault injection flags without rebooting or creating any new pools. | ||||||||||||||||||
| # Rebooting the servers or creating the new pools will result in injectung fault in | ||||||||||||||||||
| # the wrong test code. Fault injections should done only for the dlck alone. | ||||||||||||||||||
|
Comment on lines
+61
to
+63
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| for test_fault in fault_list: | ||||||||||||||||||
| with open(fault_inject_file, 'w') as f: | ||||||||||||||||||
| f.write(f"fault_config:\n") | ||||||||||||||||||
|
Check failure on line 66 in src/tests/ftest/recovery/dlck_basic_fault.py
|
||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick. It is not actually an f-string. You have no variables in there.
Suggested change
|
||||||||||||||||||
| count = 0 | ||||||||||||||||||
| for key, value in faults_dict[test_fault].items(): | ||||||||||||||||||
| if count == 0: | ||||||||||||||||||
| f.write(f"- {key}: \'{value}\'\n") | ||||||||||||||||||
| else: | ||||||||||||||||||
| f.write(f" {key}: \'{value}\'\n") | ||||||||||||||||||
|
Check warning on line 72 in src/tests/ftest/recovery/dlck_basic_fault.py
|
||||||||||||||||||
| count += 1 | ||||||||||||||||||
|
Check warning on line 73 in src/tests/ftest/recovery/dlck_basic_fault.py
|
||||||||||||||||||
| f.close() | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAIK
Suggested change
|
||||||||||||||||||
| self.log.info("Reading the updated fault injection file contents") | ||||||||||||||||||
| with open(fault_inject_file, 'r') as f: | ||||||||||||||||||
| file_data = f.read() | ||||||||||||||||||
| self.log.info("\n%s", file_data) | ||||||||||||||||||
| f.close() | ||||||||||||||||||
|
Comment on lines
+75
to
+79
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you are dumping the contents of the file twice in this procedure I think it makes sense to write it once as a helper function and just call it when necessary. Otherwise, the code is getting messy. |
||||||||||||||||||
| distribute_files(self.log, self.hostlist_servers, fault_inject_file, | ||||||||||||||||||
| fault_inject_file) | ||||||||||||||||||
| if self.server_managers[0].manager.job.using_control_metadata: | ||||||||||||||||||
| dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], nvme_conf=nvme_conf, | ||||||||||||||||||
| storage_mount=scm_mount, env_str=env_str) | ||||||||||||||||||
| else: | ||||||||||||||||||
| dlck_cmd = DlckCommand(host, self.bin, pool_uuids[0], storage_mount=scm_mount, | ||||||||||||||||||
| env_str=env_str) | ||||||||||||||||||
|
Comment on lines
+82
to
+87
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same as above. There is no need to have an |
||||||||||||||||||
| result = dlck_cmd.run() | ||||||||||||||||||
| if not result.passed: | ||||||||||||||||||
| errors.append(f"dlck failed on {result.failed_hosts}") | ||||||||||||||||||
| self.log.info("dlck basic test output:\n%s", result) | ||||||||||||||||||
|
Comment on lines
+88
to
+91
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same as with dumping the contents of fault injection file you are processing and printing the command result twice in this code. Please write a helper function. |
||||||||||||||||||
| dmg.system_start() | ||||||||||||||||||
| if not errors: | ||||||||||||||||||
| self.fail("No Errors detected:\n{}".format("\n".join(errors))) | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a very elaborate way of printing an empty list. 😉
Suggested change
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,49 @@ | ||||||||
| hosts: | ||||||||
| test_servers: 1 | ||||||||
| test_clients: 1 | ||||||||
|
|
||||||||
| timeout: 500 | ||||||||
|
|
||||||||
| server_config: | ||||||||
| name: daos_server | ||||||||
| engines_per_host: 1 | ||||||||
| engines: | ||||||||
| 0: | ||||||||
| targets: 4 | ||||||||
| storage: auto | ||||||||
| 1: | ||||||||
| targets: 4 | ||||||||
| storage: auto | ||||||||
|
Comment on lines
+14
to
+16
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the value? It looks like one engine too many,
Suggested change
|
||||||||
|
|
||||||||
| setup: | ||||||||
| start_servers_once: True | ||||||||
|
|
||||||||
| pool: | ||||||||
| size: 50% | ||||||||
|
|
||||||||
| dlck: | ||||||||
| scm_mount: /mnt/daos0 | ||||||||
|
|
||||||||
| faults: !mux | ||||||||
| create_log_dir: | ||||||||
| fault_list: | ||||||||
| - DLCK_FAULT_CREATE_LOG_DIR | ||||||||
|
Comment on lines
+27
to
+30
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not understand this bit. Can you please explain why you use |
||||||||
|
|
||||||||
| dlck_test_additional_faults: | ||||||||
| fault_list: | ||||||||
| - DLCK_FAULT_CREATE_POOL_DIR | ||||||||
| - DLCK_FAULT_ENGINE_START | ||||||||
| - DLCK_FAULT_ENGINE_EXEC | ||||||||
| - DLCK_FAULT_ENGINE_JOIN | ||||||||
| - DLCK_FAULT_ENGINE_STOP | ||||||||
| - DAOS_FAULT_POOL_NVME_HEALTH | ||||||||
| - DAOS_FAULT_POOL_OPEN_BIO | ||||||||
| - DAOS_FAULT_POOL_OPEN_UMEM | ||||||||
| - DAOS_FAULT_POOL_OPEN_MAGIC | ||||||||
| - DAOS_FAULT_POOL_OPEN_VERSION | ||||||||
| - DAOS_FAULT_POOL_OPEN_UUID | ||||||||
| - DAOS_FAULT_BTREE_OPEN_INV_CLASS | ||||||||
| - DAOS_FAULT_BTREE_OPEN_UNREG_CLASS | ||||||||
| - DAOS_FAULT_BTREE_FEATURES | ||||||||
| - DAOS_FAULT_POOL_EXT_PADDING | ||||||||
| - DAOS_FAULT_POOL_EXT_RESERVED | ||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,85 @@ | ||||||||||||||||
| """ | ||||||||||||||||
| (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||||||||||||||||
|
|
||||||||||||||||
| SPDX-License-Identifier: BSD-2-Clause-Patent | ||||||||||||||||
| """ | ||||||||||||||||
|
|
||||||||||||||||
| from command_utils_base import CommandWithParameters, FormattedParameter | ||||||||||||||||
| from run_utils import run_remote | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| class DlckCommand(CommandWithParameters): | ||||||||||||||||
| """Defines the basic structures of dlck command.""" | ||||||||||||||||
|
|
||||||||||||||||
| def __init__(self, server_host, path, pool_uuid=None, nvme_conf=None, storage_mount=None, | ||||||||||||||||
| verbose=True, timeout=None, sudo=True, env_str=None): | ||||||||||||||||
| """Constructor that sets the common variables for sub-commands. | ||||||||||||||||
|
|
||||||||||||||||
| Args: | ||||||||||||||||
| server_host (NodeSet): Server host to run the command. | ||||||||||||||||
| path (str): path to the dlck command. | ||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one is curious. Do you expect dlck not to be on the PATH? From what I saw this is what we tend to do in similar tests but I do not understand why we do it this way. |
||||||||||||||||
| pool_uuid (str, optional): Pool UUID. Defaults to None. | ||||||||||||||||
| nvme_conf (str, optional): NVMe config file path. Defaults to None. | ||||||||||||||||
| storage_mount (str, optional): Storage mount point. Defaults to None. | ||||||||||||||||
| verbose (bool, optional): Display command output in run. | ||||||||||||||||
| Defaults to True. | ||||||||||||||||
| timeout (int, optional): Command timeout (sec) used in run. Defaults to | ||||||||||||||||
| None. | ||||||||||||||||
| sudo (bool, optional): Whether to run dlck with sudo. Defaults to True. | ||||||||||||||||
| env_str (str, optional): Environment variable string to prepend to command. | ||||||||||||||||
| """ | ||||||||||||||||
| super().__init__("/run/dlck/*", "dlck", path) | ||||||||||||||||
| # Pass environment variable string | ||||||||||||||||
| self.env_str = "" | ||||||||||||||||
| if env_str is not None: | ||||||||||||||||
| self.env_str = str(env_str) | ||||||||||||||||
|
Check failure on line 35 in src/tests/ftest/util/dlck_utils.py
|
||||||||||||||||
|
|
||||||||||||||||
| # We need to run with sudo. | ||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope this comment to become outdated very soon. 😄 I will keep this comment here for now. We will see where we will go with this. |
||||||||||||||||
| self.sudo = sudo | ||||||||||||||||
|
|
||||||||||||||||
| self.host = server_host | ||||||||||||||||
|
|
||||||||||||||||
| # Members needed for run(). | ||||||||||||||||
| self.verbose = verbose | ||||||||||||||||
| self.timeout = timeout | ||||||||||||||||
|
|
||||||||||||||||
| # Pool UUID. (--file pool_uuid[,target_id]) | ||||||||||||||||
| if pool_uuid: | ||||||||||||||||
| self.pool_uuid = FormattedParameter("--file={}", pool_uuid) | ||||||||||||||||
|
|
||||||||||||||||
| # NVMe config file path. (--nvme_cont nvme_conf_path) | ||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
| if nvme_conf: | ||||||||||||||||
| self.nvme_conf = FormattedParameter("--nvme={}", nvme_conf) | ||||||||||||||||
|
|
||||||||||||||||
| # Storage mount point. (--storage storage_mount) | ||||||||||||||||
| if storage_mount: | ||||||||||||||||
| self.storage_mount = FormattedParameter("--storage={}", storage_mount) | ||||||||||||||||
|
|
||||||||||||||||
| def __str__(self): | ||||||||||||||||
| """Return the command with all of its defined parameters as a string. | ||||||||||||||||
|
|
||||||||||||||||
| Returns: | ||||||||||||||||
| str: the command with all the defined parameters | ||||||||||||||||
|
|
||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Redundant new line.
Suggested change
|
||||||||||||||||
| """ | ||||||||||||||||
| value = super().__str__() | ||||||||||||||||
| if self.sudo: | ||||||||||||||||
| value = " ".join(["sudo -E -n", value]) | ||||||||||||||||
| return value | ||||||||||||||||
|
|
||||||||||||||||
| def run(self): | ||||||||||||||||
| """Run the dlck command. | ||||||||||||||||
| Args: | ||||||||||||||||
| host (NodeSet): Host(s) on which to run the command. | ||||||||||||||||
| command (str): Environment Variable string + dlck sub-command to run. | ||||||||||||||||
| verbose (bool, optional): Display command output in run. | ||||||||||||||||
| Defaults to True. | ||||||||||||||||
| timeout (int, optional): Command timeout (sec) used in run. Defaults to | ||||||||||||||||
| None. | ||||||||||||||||
|
Comment on lines
+72
to
+78
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe these are not strictly arguments of this method. You may choose to move bits of this documentation to a comment but IMHO they do not belong to this docstring.
Suggested change
|
||||||||||||||||
|
|
||||||||||||||||
| Returns: | ||||||||||||||||
| CommandResult: groups of command results from the same hosts with the same return status | ||||||||||||||||
| """ | ||||||||||||||||
| return run_remote( | ||||||||||||||||
| self.log, self.host, command=self.env_str + str(self), verbose=self.verbose, | ||||||||||||||||
| timeout=self.timeout) | ||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.