diff --git a/cdisc_rules_engine/dataset_builders/json_schema_check_dataset_builder.py b/cdisc_rules_engine/dataset_builders/json_schema_check_dataset_builder.py index a2d2b3df4..865d63407 100644 --- a/cdisc_rules_engine/dataset_builders/json_schema_check_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/json_schema_check_dataset_builder.py @@ -55,16 +55,12 @@ def get_dataset(self, **kwargs) -> DatasetInterface: filtered = [ row for row in records if row["dataset"] == self.dataset_metadata.name ] - return tag_source( - ( - self.dataset_implementation.from_records(filtered, **kwargs) - if filtered - else self.dataset_implementation.from_dict( - self.dataset_template, **kwargs - ) - ), - self.dataset_metadata, - ) + if filtered: + result = self.dataset_implementation.from_records(filtered, **kwargs) + else: + empty_row = {key: "" for key in self.dataset_template.keys()} + result = self.dataset_implementation.from_records([empty_row], **kwargs) + return tag_source(result, self.dataset_metadata) def list_errors(self, tree: exceptions.ErrorTree, errlist: dict[str, list]): if tree.errors: diff --git a/tests/unit/test_dataset_builders/test_json_schema_check_dataset_builder.py b/tests/unit/test_dataset_builders/test_json_schema_check_dataset_builder.py index 41da2c423..04dad5d4a 100644 --- a/tests/unit/test_dataset_builders/test_json_schema_check_dataset_builder.py +++ b/tests/unit/test_dataset_builders/test_json_schema_check_dataset_builder.py @@ -1,4 +1,5 @@ from unittest.mock import MagicMock +import pandas as pd from cdisc_rules_engine.dataset_builders.json_schema_check_dataset_builder import ( JsonSchemaCheckDatasetBuilder, @@ -98,7 +99,15 @@ def test_json_schema_check_dataset_builder_valid(): dataset = builder.get_dataset() - assert dataset.empty + # Now expect a single row with all columns as empty strings or NaN, + # except source_row_number + rows = dataset.data.to_dict(orient="records") + assert len(rows) == 1 + for key, value in rows[0].items(): + if key == "source_row_number": + assert value == 1 + else: + assert value == "" or pd.isna(value) def test_json_schema_check_dataset_builder_invalid():