cdisc-org · SFJohnson24 · Mar 4, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 3, 2026
diff --git a/README.md b/README.md
@@ -241,7 +241,8 @@ This will show the list of validation options.
                                   "[████████████████████████████--------]
                                   78%"is printed.
   -jcf, --jsonata-custom-functions Pair containing a variable name and a Path to directory containing a set of custom JSONata functions. Can be specified multiple times
-  -e, --encoding TEXT            File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc.
+  -e, --encoding TEXT             File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc.
+  -ft, --filetype TEXT            File extension to filter datasets. Has higher priority than --dataset-path parameter.
   --help                          Show this message and exit.
 ```
 

diff --git a/core.py b/core.py
@@ -146,9 +146,22 @@ def _validate_data_directory(
     return dataset_paths, found_formats
 
 
-def _validate_dataset_paths(dataset_path: tuple[str], logger) -> tuple[list, set]:
+def _validate_dataset_paths(
+    dataset_path: tuple[str], logger, filetype: str
+) -> tuple[list, set]:
     """Validate dataset paths and return dataset paths and found formats."""
-    dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path])
+    if filetype:
+        pattern = f"*.{filetype}"
+        dataset_paths, found_formats = valid_data_file(
+            [
+                str(p)
+                for p in dataset_path
+                if Path(p).match(pattern)
+                if Path(p).is_file()
+            ]
+        )
+    else:
+        dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path])
 
     if DataFormatTypes.XLSX.value in found_formats and len(found_formats) > 1:
         logger.error(
@@ -167,6 +180,12 @@ def _validate_dataset_paths(dataset_path: tuple[str], logger) -> tuple[list, set
                 f"Please provide either a single XLSX file or use other supported formats: "
                 f"{VALIDATION_FORMATS_MESSAGE}"
             )
+        elif filetype:
+            logger.error(
+                f"Provided dataset path does not match the specified file type.\n"
+                f"Specified format: {filetype}\n"
+                f"Please ensure the file extension matches the selected format."
+            )
         else:
             logger.error(
                 f"No valid dataset files provided.\n"
@@ -505,7 +524,9 @@ def validate(  # noqa
         if not dataset_paths:
             ctx.exit(2)
     elif dataset_path:
-        dataset_paths, found_formats = _validate_dataset_paths(dataset_path, logger)
+        dataset_paths, found_formats = _validate_dataset_paths(
+            dataset_path, logger, filetype
+        )
         if not dataset_paths:
             ctx.exit(2)
     else:

diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py
@@ -1,31 +1,61 @@
 import os
 import subprocess
-import unittest
 import openpyxl
 import pytest
 from conftest import get_python_executable
 
 
 @pytest.mark.regression
-class TestCoreIssue1442(unittest.TestCase):
-    def test_positive_dataset(self):
+class TestCoreIssue1442:
+    @pytest.mark.parametrize(
+        "command",
+        [
+            (
+                f"{get_python_executable()}",
+                "-m",
+                "core",
+                "validate",
+                "-s",
+                "usdm",
+                "-v",
+                "4-0",
+                "-dp",
+                os.path.join(
+                    "tests", "resources", "CoreIssue1442", "test_adam_dataset.xpt"
+                ),
+                "-dp",
+                os.path.join(
+                    "tests", "resources", "CoreIssue1442", "test_dataset.ndjson"
+                ),
+                "-dp",
+                os.path.join(
+                    "tests", "resources", "CoreIssue1442", "CDISC_Pilot_Study.json"
+                ),
+                "-ft",
+                "json",
+                "-lr",
+                os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
+            ),
+            (
+                f"{get_python_executable()}",
+                "-m",
+                "core",
+                "validate",
+                "-s",
+                "usdm",
+                "-v",
+                "4-0",
+                "-d",
+                os.path.join("tests", "resources", "CoreIssue1442"),
+                "-ft",
+                "json",
+                "-lr",
+                os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
+            ),
+        ],
+    )
+    def test_positive_dataset(self, command):
         # Run the command in the terminal
-        command = [
-            f"{get_python_executable()}",
-            "-m",
-            "core",
-            "validate",
-            "-s",
-            "usdm",
-            "-v",
-            "4-0",
-            "-d",
-            os.path.join("tests", "resources", "CoreIssue1442"),
-            "-ft",
-            "json",
-            "-lr",
-            os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
-        ]
         subprocess.run(command, check=True)
 
         # Get the latest created Excel file