diff --git a/README.md b/README.md index a087308af..3cb873b04 100644 --- a/README.md +++ b/README.md @@ -241,7 +241,8 @@ This will show the list of validation options. "[████████████████████████████--------] 78%"is printed. -jcf, --jsonata-custom-functions Pair containing a variable name and a Path to directory containing a set of custom JSONata functions. Can be specified multiple times - -e, --encoding TEXT File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc. + -e, --encoding TEXT File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc. + -ft, --filetype TEXT File extension to filter datasets. Has higher priority than --dataset-path parameter. --help Show this message and exit. ``` diff --git a/core.py b/core.py index d40f91687..5db814132 100644 --- a/core.py +++ b/core.py @@ -146,9 +146,22 @@ def _validate_data_directory( return dataset_paths, found_formats -def _validate_dataset_paths(dataset_path: tuple[str], logger) -> tuple[list, set]: +def _validate_dataset_paths( + dataset_path: tuple[str], logger, filetype: str +) -> tuple[list, set]: """Validate dataset paths and return dataset paths and found formats.""" - dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path]) + if filetype: + pattern = f"*.{filetype}" + dataset_paths, found_formats = valid_data_file( + [ + str(p) + for p in dataset_path + if Path(p).match(pattern) + if Path(p).is_file() + ] + ) + else: + dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path]) if DataFormatTypes.XLSX.value in found_formats and len(found_formats) > 1: logger.error( @@ -167,6 +180,12 @@ def _validate_dataset_paths(dataset_path: tuple[str], logger) -> tuple[list, set f"Please provide either a single XLSX file or use other supported formats: " f"{VALIDATION_FORMATS_MESSAGE}" ) + elif filetype: + logger.error( + f"Provided dataset path does not match the specified file type.\n" + f"Specified format: {filetype}\n" + f"Please ensure the file extension matches the selected format." + ) else: logger.error( f"No valid dataset files provided.\n" @@ -505,7 +524,9 @@ def validate( # noqa if not dataset_paths: ctx.exit(2) elif dataset_path: - dataset_paths, found_formats = _validate_dataset_paths(dataset_path, logger) + dataset_paths, found_formats = _validate_dataset_paths( + dataset_path, logger, filetype + ) if not dataset_paths: ctx.exit(2) else: diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py index 7c71ba7c7..172516786 100644 --- a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py +++ b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py @@ -1,31 +1,61 @@ import os import subprocess -import unittest import openpyxl import pytest from conftest import get_python_executable @pytest.mark.regression -class TestCoreIssue1442(unittest.TestCase): - def test_positive_dataset(self): +class TestCoreIssue1442: + @pytest.mark.parametrize( + "command", + [ + ( + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "usdm", + "-v", + "4-0", + "-dp", + os.path.join( + "tests", "resources", "CoreIssue1442", "test_adam_dataset.xpt" + ), + "-dp", + os.path.join( + "tests", "resources", "CoreIssue1442", "test_dataset.ndjson" + ), + "-dp", + os.path.join( + "tests", "resources", "CoreIssue1442", "CDISC_Pilot_Study.json" + ), + "-ft", + "json", + "-lr", + os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"), + ), + ( + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "usdm", + "-v", + "4-0", + "-d", + os.path.join("tests", "resources", "CoreIssue1442"), + "-ft", + "json", + "-lr", + os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"), + ), + ], + ) + def test_positive_dataset(self, command): # Run the command in the terminal - command = [ - f"{get_python_executable()}", - "-m", - "core", - "validate", - "-s", - "usdm", - "-v", - "4-0", - "-d", - os.path.join("tests", "resources", "CoreIssue1442"), - "-ft", - "json", - "-lr", - os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"), - ] subprocess.run(command, check=True) # Get the latest created Excel file