From cd9d3709659682f626605599d6ea25caeceefa7f Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 2 Mar 2026 15:18:05 +0100 Subject: [PATCH 1/2] #1442 fix dataset filtering when -dp is provided --- README.md | 3 +- core.py | 21 +++++- .../test_Issues/test_CoreIssue1442.py | 68 +++++++++++++------ 3 files changed, 69 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index a087308af..e2ceaf06e 100644 --- a/README.md +++ b/README.md @@ -241,7 +241,8 @@ This will show the list of validation options. "[████████████████████████████--------] 78%"is printed. -jcf, --jsonata-custom-functions Pair containing a variable name and a Path to directory containing a set of custom JSONata functions. Can be specified multiple times - -e, --encoding TEXT File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc. + -e, --encoding TEXT File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc. + -ft, --filetype TEXT File extension to filter datasets. Has higher priority then --dataset-path parameter. --help Show this message and exit. ``` diff --git a/core.py b/core.py index d40f91687..9a64b3d0e 100644 --- a/core.py +++ b/core.py @@ -146,9 +146,22 @@ def _validate_data_directory( return dataset_paths, found_formats -def _validate_dataset_paths(dataset_path: tuple[str], logger) -> tuple[list, set]: +def _validate_dataset_paths( + dataset_path: tuple[str], logger, filetype: None +) -> tuple[list, set]: """Validate dataset paths and return dataset paths and found formats.""" - dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path]) + if filetype: + pattern = f"*.{filetype}" + dataset_paths, found_formats = valid_data_file( + [ + str(p) + for p in dataset_path + if Path(p).match(pattern) + if Path(p).is_file() + ] + ) + else: + dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path]) if DataFormatTypes.XLSX.value in found_formats and len(found_formats) > 1: logger.error( @@ -505,7 +518,9 @@ def validate( # noqa if not dataset_paths: ctx.exit(2) elif dataset_path: - dataset_paths, found_formats = _validate_dataset_paths(dataset_path, logger) + dataset_paths, found_formats = _validate_dataset_paths( + dataset_path, logger, filetype + ) if not dataset_paths: ctx.exit(2) else: diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py index 7c71ba7c7..172516786 100644 --- a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py +++ b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py @@ -1,31 +1,61 @@ import os import subprocess -import unittest import openpyxl import pytest from conftest import get_python_executable @pytest.mark.regression -class TestCoreIssue1442(unittest.TestCase): - def test_positive_dataset(self): +class TestCoreIssue1442: + @pytest.mark.parametrize( + "command", + [ + ( + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "usdm", + "-v", + "4-0", + "-dp", + os.path.join( + "tests", "resources", "CoreIssue1442", "test_adam_dataset.xpt" + ), + "-dp", + os.path.join( + "tests", "resources", "CoreIssue1442", "test_dataset.ndjson" + ), + "-dp", + os.path.join( + "tests", "resources", "CoreIssue1442", "CDISC_Pilot_Study.json" + ), + "-ft", + "json", + "-lr", + os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"), + ), + ( + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "usdm", + "-v", + "4-0", + "-d", + os.path.join("tests", "resources", "CoreIssue1442"), + "-ft", + "json", + "-lr", + os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"), + ), + ], + ) + def test_positive_dataset(self, command): # Run the command in the terminal - command = [ - f"{get_python_executable()}", - "-m", - "core", - "validate", - "-s", - "usdm", - "-v", - "4-0", - "-d", - os.path.join("tests", "resources", "CoreIssue1442"), - "-ft", - "json", - "-lr", - os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"), - ] subprocess.run(command, check=True) # Get the latest created Excel file From d8cacd90ec18d1b81c86b533664a75a8de9607e2 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 3 Mar 2026 10:44:53 +0100 Subject: [PATCH 2/2] #1442 fix log message in case when -dp path is valid but didn't match with -ft parameter --- README.md | 2 +- core.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e2ceaf06e..3cb873b04 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,7 @@ This will show the list of validation options. 78%"is printed. -jcf, --jsonata-custom-functions Pair containing a variable name and a Path to directory containing a set of custom JSONata functions. Can be specified multiple times -e, --encoding TEXT File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc. - -ft, --filetype TEXT File extension to filter datasets. Has higher priority then --dataset-path parameter. + -ft, --filetype TEXT File extension to filter datasets. Has higher priority than --dataset-path parameter. --help Show this message and exit. ``` diff --git a/core.py b/core.py index 9a64b3d0e..5db814132 100644 --- a/core.py +++ b/core.py @@ -147,7 +147,7 @@ def _validate_data_directory( def _validate_dataset_paths( - dataset_path: tuple[str], logger, filetype: None + dataset_path: tuple[str], logger, filetype: str ) -> tuple[list, set]: """Validate dataset paths and return dataset paths and found formats.""" if filetype: @@ -180,6 +180,12 @@ def _validate_dataset_paths( f"Please provide either a single XLSX file or use other supported formats: " f"{VALIDATION_FORMATS_MESSAGE}" ) + elif filetype: + logger.error( + f"Provided dataset path does not match the specified file type.\n" + f"Specified format: {filetype}\n" + f"Please ensure the file extension matches the selected format." + ) else: logger.error( f"No valid dataset files provided.\n"