From cd9d3709659682f626605599d6ea25caeceefa7f Mon Sep 17 00:00:00 2001
From: alexfurmenkov <alexeyfurmenkov@gmail.com>
Date: Mon, 2 Mar 2026 15:18:05 +0100
Subject: [PATCH 1/2] #1442 fix dataset filtering when -dp is provided

---
 README.md                                     |  3 +-
 core.py                                       | 21 +++++-
 .../test_Issues/test_CoreIssue1442.py         | 68 +++++++++++++------
 3 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index a087308af..e2ceaf06e 100644
--- a/README.md
+++ b/README.md
@@ -241,7 +241,8 @@ This will show the list of validation options.
                                   "[████████████████████████████--------]
                                   78%"is printed.
   -jcf, --jsonata-custom-functions Pair containing a variable name and a Path to directory containing a set of custom JSONata functions. Can be specified multiple times
-  -e, --encoding TEXT            File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc.
+  -e, --encoding TEXT             File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc.
+  -ft, --filetype TEXT            File extension to filter datasets. Has higher priority then --dataset-path parameter.
   --help                          Show this message and exit.
 ```
 
diff --git a/core.py b/core.py
index d40f91687..9a64b3d0e 100644
--- a/core.py
+++ b/core.py
@@ -146,9 +146,22 @@ def _validate_data_directory(
     return dataset_paths, found_formats
 
 
-def _validate_dataset_paths(dataset_path: tuple[str], logger) -> tuple[list, set]:
+def _validate_dataset_paths(
+    dataset_path: tuple[str], logger, filetype: None
+) -> tuple[list, set]:
     """Validate dataset paths and return dataset paths and found formats."""
-    dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path])
+    if filetype:
+        pattern = f"*.{filetype}"
+        dataset_paths, found_formats = valid_data_file(
+            [
+                str(p)
+                for p in dataset_path
+                if Path(p).match(pattern)
+                if Path(p).is_file()
+            ]
+        )
+    else:
+        dataset_paths, found_formats = valid_data_file([dp for dp in dataset_path])
 
     if DataFormatTypes.XLSX.value in found_formats and len(found_formats) > 1:
         logger.error(
@@ -505,7 +518,9 @@ def validate(  # noqa
         if not dataset_paths:
             ctx.exit(2)
     elif dataset_path:
-        dataset_paths, found_formats = _validate_dataset_paths(dataset_path, logger)
+        dataset_paths, found_formats = _validate_dataset_paths(
+            dataset_path, logger, filetype
+        )
         if not dataset_paths:
             ctx.exit(2)
     else:
diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py
index 7c71ba7c7..172516786 100644
--- a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py
+++ b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py
@@ -1,31 +1,61 @@
 import os
 import subprocess
-import unittest
 import openpyxl
 import pytest
 from conftest import get_python_executable
 
 
 @pytest.mark.regression
-class TestCoreIssue1442(unittest.TestCase):
-    def test_positive_dataset(self):
+class TestCoreIssue1442:
+    @pytest.mark.parametrize(
+        "command",
+        [
+            (
+                f"{get_python_executable()}",
+                "-m",
+                "core",
+                "validate",
+                "-s",
+                "usdm",
+                "-v",
+                "4-0",
+                "-dp",
+                os.path.join(
+                    "tests", "resources", "CoreIssue1442", "test_adam_dataset.xpt"
+                ),
+                "-dp",
+                os.path.join(
+                    "tests", "resources", "CoreIssue1442", "test_dataset.ndjson"
+                ),
+                "-dp",
+                os.path.join(
+                    "tests", "resources", "CoreIssue1442", "CDISC_Pilot_Study.json"
+                ),
+                "-ft",
+                "json",
+                "-lr",
+                os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
+            ),
+            (
+                f"{get_python_executable()}",
+                "-m",
+                "core",
+                "validate",
+                "-s",
+                "usdm",
+                "-v",
+                "4-0",
+                "-d",
+                os.path.join("tests", "resources", "CoreIssue1442"),
+                "-ft",
+                "json",
+                "-lr",
+                os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
+            ),
+        ],
+    )
+    def test_positive_dataset(self, command):
         # Run the command in the terminal
-        command = [
-            f"{get_python_executable()}",
-            "-m",
-            "core",
-            "validate",
-            "-s",
-            "usdm",
-            "-v",
-            "4-0",
-            "-d",
-            os.path.join("tests", "resources", "CoreIssue1442"),
-            "-ft",
-            "json",
-            "-lr",
-            os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
-        ]
         subprocess.run(command, check=True)
 
         # Get the latest created Excel file

From d8cacd90ec18d1b81c86b533664a75a8de9607e2 Mon Sep 17 00:00:00 2001
From: alexfurmenkov <alexeyfurmenkov@gmail.com>
Date: Tue, 3 Mar 2026 10:44:53 +0100
Subject: [PATCH 2/2] #1442 fix log message in case when -dp path is valid but
 didn't match with -ft parameter

---
 README.md | 2 +-
 core.py   | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index e2ceaf06e..3cb873b04 100644
--- a/README.md
+++ b/README.md
@@ -242,7 +242,7 @@ This will show the list of validation options.
                                   78%"is printed.
   -jcf, --jsonata-custom-functions Pair containing a variable name and a Path to directory containing a set of custom JSONata functions. Can be specified multiple times
   -e, --encoding TEXT             File encoding for reading datasets. If not specified, defaults to utf-8. Supported encodings: utf-8, utf-16, utf-32, cp1252, latin-1, etc.
-  -ft, --filetype TEXT            File extension to filter datasets. Has higher priority then --dataset-path parameter.
+  -ft, --filetype TEXT            File extension to filter datasets. Has higher priority than --dataset-path parameter.
   --help                          Show this message and exit.
 ```
 
diff --git a/core.py b/core.py
index 9a64b3d0e..5db814132 100644
--- a/core.py
+++ b/core.py
@@ -147,7 +147,7 @@ def _validate_data_directory(
 
 
 def _validate_dataset_paths(
-    dataset_path: tuple[str], logger, filetype: None
+    dataset_path: tuple[str], logger, filetype: str
 ) -> tuple[list, set]:
     """Validate dataset paths and return dataset paths and found formats."""
     if filetype:
@@ -180,6 +180,12 @@ def _validate_dataset_paths(
                 f"Please provide either a single XLSX file or use other supported formats: "
                 f"{VALIDATION_FORMATS_MESSAGE}"
             )
+        elif filetype:
+            logger.error(
+                f"Provided dataset path does not match the specified file type.\n"
+                f"Specified format: {filetype}\n"
+                f"Please ensure the file extension matches the selected format."
+            )
         else:
             logger.error(
                 f"No valid dataset files provided.\n"