Skip to content

Commit d306ffb

Browse files
sourabgupta3rwgk
andauthored
Add logic to skip WSL tests and add cufile.json for async tests (#778)
* Add logic to skip WSL tests and add cufile.json for async tests * Review Comments * Pre-commit * test: improve cuFile test ergonomics and reduce log noise - Add @cache to isSupportedFilesystem(), cufileLibraryAvailable(), and cufileVersionLessThan() to avoid redundant checks and repeated INFO log messages - Set fixture scope for cufile_env_json to "module" to reduce env var churn --------- Co-authored-by: Ralf W. Grosse-Kunstleve <rgrossekunst@nvidia.com>
1 parent 173733b commit d306ffb

File tree

2 files changed

+80
-18
lines changed

2 files changed

+80
-18
lines changed

cuda_bindings/tests/cufile.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
// NOTE : Application can override custom configuration via export CUFILE_ENV_PATH_JSON=<filepath>
3+
// e.g : export CUFILE_ENV_PATH_JSON="/home/<xxx>/cufile.json"
4+
5+
6+
"execution" : {
7+
// max number of workitems in the queue;
8+
"max_io_queue_depth": 128,
9+
// max number of host threads per gpu to spawn for parallel IO
10+
"max_io_threads" : 4,
11+
// enable support for parallel IO
12+
"parallel_io" : true,
13+
// minimum IO threshold before splitting the IO
14+
"min_io_threshold_size_kb" : 8192,
15+
// maximum parallelism for a single request
16+
"max_request_parallelism" : 4
17+
}
18+
}

cuda_bindings/tests/test_cufile.py

Lines changed: 62 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,99 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2-
#
32
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
43

54
import ctypes
65
import errno
6+
import logging
77
import os
8+
import pathlib
9+
import platform
810
import tempfile
911
from contextlib import suppress
12+
from functools import cache
1013

1114
import pytest
1215

1316
import cuda.bindings.driver as cuda
1417

18+
# Configure logging to show INFO level and above
19+
logging.basicConfig(
20+
level=logging.INFO,
21+
format="%(levelname)s: %(message)s",
22+
force=True, # Override any existing logging configuration
23+
)
24+
1525
try:
1626
from cuda.bindings import cufile
1727
except ImportError:
1828
cufile = None
1929

2030

31+
def platform_is_wsl():
32+
"""Check if running on Windows Subsystem for Linux (WSL)."""
33+
return platform.system() == "Linux" and "microsoft" in pathlib.Path("/proc/version").read_text().lower()
34+
35+
2136
if cufile is None:
2237
pytest.skip("skipping tests on Windows", allow_module_level=True)
2338

39+
if platform_is_wsl():
40+
pytest.skip("skipping cuFile tests on WSL", allow_module_level=True)
41+
42+
43+
@pytest.fixture(scope="module")
44+
def cufile_env_json():
45+
"""Set CUFILE_ENV_PATH_JSON environment variable for async tests."""
46+
original_value = os.environ.get("CUFILE_ENV_PATH_JSON")
47+
48+
# Use /etc/cufile.json if it exists, otherwise fallback to cufile.json in tests directory
49+
if os.path.exists("/etc/cufile.json"):
50+
config_path = "/etc/cufile.json"
51+
else:
52+
# Get absolute path to cufile.json in the same directory as this test file
53+
test_dir = os.path.dirname(os.path.abspath(__file__))
54+
config_path = os.path.join(test_dir, "cufile.json")
55+
56+
logging.info(f"Using cuFile config: {config_path}")
57+
os.environ["CUFILE_ENV_PATH_JSON"] = config_path
58+
yield
59+
# Restore original value or remove if it wasn't set
60+
if original_value is not None:
61+
os.environ["CUFILE_ENV_PATH_JSON"] = original_value
62+
else:
63+
os.environ.pop("CUFILE_ENV_PATH_JSON", None)
64+
2465

66+
@cache
2567
def cufileLibraryAvailable():
2668
"""Check if cuFile library is available on the system."""
2769
try:
2870
# Try to get cuFile library version - this will fail if library is not available
2971
version = cufile.get_version()
30-
print(f"cuFile library available, version: {version}")
72+
logging.info(f"cuFile library available, version: {version}")
3173
return True
3274
except Exception as e:
33-
print(f"cuFile library not available: {e}")
75+
logging.warning(f"cuFile library not available: {e}")
3476
return False
3577

3678

79+
@cache
3780
def cufileVersionLessThan(target):
3881
"""Check if cuFile library version is less than target version."""
3982
try:
4083
# Get cuFile library version
4184
version = cufile.get_version()
42-
print(f"cuFile library version: {version}")
85+
logging.info(f"cuFile library version: {version}")
4386
# Check if version is less than target
4487
if version < target:
45-
print(f"cuFile library version {version} is less than required {target}")
88+
logging.warning(f"cuFile library version {version} is less than required {target}")
4689
return True
4790
return False
4891
except Exception as e:
49-
print(f"Error checking cuFile version: {e}")
92+
logging.error(f"Error checking cuFile version: {e}")
5093
return True # Assume old version if any error occurs
5194

5295

96+
@cache
5397
def isSupportedFilesystem():
5498
"""Check if the current filesystem is supported (ext4 or xfs)."""
5599
try:
@@ -65,14 +109,14 @@ def isSupportedFilesystem():
65109
current_dir = os.path.abspath(".")
66110
if current_dir.startswith(mount_point):
67111
fs_type_lower = fs_type.lower()
68-
print(f"Current filesystem type: {fs_type_lower}")
112+
logging.info(f"Current filesystem type: {fs_type_lower}")
69113
return fs_type_lower in ["ext4", "xfs"]
70114

71115
# If we get here, we couldn't determine the filesystem type
72-
print("Could not determine filesystem type from /proc/mounts")
116+
logging.warning("Could not determine filesystem type from /proc/mounts")
73117
return False
74118
except Exception as e:
75-
print(f"Error checking filesystem type: {e}")
119+
logging.error(f"Error checking filesystem type: {e}")
76120
return False
77121

78122

@@ -730,7 +774,7 @@ def test_cufile_read_write_large():
730774

731775

732776
@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
733-
def test_cufile_write_async():
777+
def test_cufile_write_async(cufile_env_json):
734778
"""Test cuFile asynchronous write operations."""
735779
# Initialize CUDA
736780
(err,) = cuda.cuInit(0)
@@ -823,7 +867,7 @@ def test_cufile_write_async():
823867

824868

825869
@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
826-
def test_cufile_read_async():
870+
def test_cufile_read_async(cufile_env_json):
827871
"""Test cuFile asynchronous read operations."""
828872
# Initialize CUDA
829873
(err,) = cuda.cuInit(0)
@@ -929,7 +973,7 @@ def test_cufile_read_async():
929973

930974

931975
@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
932-
def test_cufile_async_read_write():
976+
def test_cufile_async_read_write(cufile_env_json):
933977
"""Test cuFile asynchronous read and write operations in sequence."""
934978
# Initialize CUDA
935979
(err,) = cuda.cuInit(0)
@@ -1788,13 +1832,13 @@ def test_set_get_parameter_string():
17881832
retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOGGING_LEVEL, 256)
17891833
# Use safe_decode_string to handle null terminators and padding
17901834
retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8"))
1791-
print(f"Logging level test: set {logging_level}, got {retrieved_value}")
1835+
logging.info(f"Logging level test: set {logging_level}, got {retrieved_value}")
17921836
# The retrieved value should be a string, so we can compare directly
17931837
assert retrieved_value == logging_level, (
17941838
f"Logging level mismatch: set {logging_level}, got {retrieved_value}"
17951839
)
17961840
except Exception as e:
1797-
print(f"Logging level test failed: {e}")
1841+
logging.error(f"Logging level test failed: {e}")
17981842
# Re-raise the exception to make the test fail
17991843
raise
18001844

@@ -1810,11 +1854,11 @@ def test_set_get_parameter_string():
18101854
retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.ENV_LOGFILE_PATH, 256)
18111855
# Use safe_decode_string to handle null terminators and padding
18121856
retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8"))
1813-
print(f"Log file path test: set {logfile_path}, got {retrieved_value}")
1857+
logging.info(f"Log file path test: set {logfile_path}, got {retrieved_value}")
18141858
# The retrieved value should be a string, so we can compare directly
18151859
assert retrieved_value == logfile_path, f"Log file path mismatch: set {logfile_path}, got {retrieved_value}"
18161860
except Exception as e:
1817-
print(f"Log file path test failed: {e}")
1861+
logging.error(f"Log file path test failed: {e}")
18181862
# Re-raise the exception to make the test fail
18191863
raise
18201864

@@ -1828,11 +1872,11 @@ def test_set_get_parameter_string():
18281872
retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOG_DIR, 256)
18291873
# Use safe_decode_string to handle null terminators and padding
18301874
retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8"))
1831-
print(f"Log directory test: set {log_dir}, got {retrieved_value}")
1875+
logging.info(f"Log directory test: set {log_dir}, got {retrieved_value}")
18321876
# The retrieved value should be a string, so we can compare directly
18331877
assert retrieved_value == log_dir, f"Log directory mismatch: set {log_dir}, got {retrieved_value}"
18341878
except Exception as e:
1835-
print(f"Log directory test failed: {e}")
1879+
logging.error(f"Log directory test failed: {e}")
18361880
# Re-raise the exception to make the test fail
18371881
raise
18381882

0 commit comments

Comments
 (0)