Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions cdisc_rules_engine/exceptions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from .custom_exceptions import (
EngineError,
DatasetNotFoundError,
ReferentialIntegrityError,
MissingDataError,
RuleExecutionError,
RuleFormatError,
InvalidMatchKeyError,
VariableMetadataNotFoundError,
DomainNotFoundError,
DomainNotFoundInDefineXMLError,
InvalidDatasetFormat,
InvalidJSONFormat,
NumberOfAttemptsExceeded,
InvalidDictionaryVariable,
UnsupportedDictionaryType,
FailedSchemaValidation,
SchemaNotFoundError,
InvalidSchemaProvidedError,
PreprocessingError,
OperationError,
DatasetBuilderError,
DateTimeParserError,
UnsupportedXptFormatError,
)

__all__ = [
"EngineError",
"DatasetNotFoundError",
"ReferentialIntegrityError",
"MissingDataError",
"RuleExecutionError",
"RuleFormatError",
"InvalidMatchKeyError",
"VariableMetadataNotFoundError",
"DomainNotFoundError",
"DomainNotFoundInDefineXMLError",
"InvalidDatasetFormat",
"InvalidJSONFormat",
"NumberOfAttemptsExceeded",
"InvalidDictionaryVariable",
"UnsupportedDictionaryType",
"FailedSchemaValidation",
"SchemaNotFoundError",
"InvalidSchemaProvidedError",
"PreprocessingError",
"OperationError",
"DatasetBuilderError",
"DateTimeParserError",
"UnsupportedXptFormatError",
]
7 changes: 7 additions & 0 deletions cdisc_rules_engine/exceptions/custom_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,10 @@ class DatasetBuilderError(EngineError):
class DateTimeParserError(EngineError):
code = 400
description = "Failure to parse a datetime string"


class UnsupportedXptFormatError(EngineError):
code = 400
description = (
"Unsupported XPT (SAS Transport) format. Only Transport v5 is supported."
)
19 changes: 15 additions & 4 deletions cdisc_rules_engine/services/data_readers/xpt_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,33 @@
from cdisc_rules_engine.interfaces import (
DataReaderInterface,
)
from cdisc_rules_engine.exceptions import UnsupportedXptFormatError


class XPTReader(DataReaderInterface):
def _read_sas(self, source, **kwargs):
try:
return pd.read_sas(source, encoding=self.encoding, **kwargs)
except ValueError as exc:
message = str(exc)
if "Header record is not an XPORT file" in message:
raise UnsupportedXptFormatError(
"Unsupported XPT (SAS Transport) format. Only Transport v5 is supported."
) from exc
raise

def read(self, data):
df = pd.read_sas(BytesIO(data), format="xport", encoding=self.encoding)
df = self._read_sas(BytesIO(data), format="xport")
df = self._format_floats(df)
return df

def _read_pandas(self, file_path):
data = pd.read_sas(file_path, format="xport", encoding=self.encoding)
data = self._read_sas(file_path, format="xport")
return PandasDataset(self._format_floats(data))

def to_parquet(self, file_path: str) -> str:
def to_parquet(self, file_path: str) -> tuple[int, str]:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".parquet")
dataset = pd.read_sas(file_path, chunksize=20000, encoding=self.encoding)
dataset = self._read_sas(file_path, chunksize=20000)
created = False
num_rows = 0
for chunk in dataset:
Expand Down
Binary file added tests/resources/test_dataset_sas_v8.xpt
Binary file not shown.
34 changes: 34 additions & 0 deletions tests/unit/test_xpt_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os

import pytest

from cdisc_rules_engine.services.data_readers.xpt_reader import XPTReader
from cdisc_rules_engine.exceptions import UnsupportedXptFormatError


def test_read():
Expand All @@ -17,3 +20,34 @@ def test_read():
Verify that the rounding of incredibly small values to 0 is applied.
"""
assert value == 0 or abs(value) > 10**-16


def test_read_xpt_v5_no_error():
"""Verify that an XPT v5 file can be read without errors."""
test_dataset_path: str = os.path.join(
os.path.dirname(__file__), "..", "resources", "test_dataset.xpt"
)
with open(test_dataset_path, "rb") as f:
data = f.read()

reader = XPTReader()
df = reader.read(data)
assert not df.empty


def test_read_xpt_v8_unsupported_error():
"""Verify that XPT v8 format raises UnsupportedXptFormatError."""
test_dataset_path: str = os.path.join(
os.path.dirname(__file__), "..", "resources", "test_dataset_sas_v8.xpt"
)
with open(test_dataset_path, "rb") as f:
data = f.read()

reader = XPTReader()
expected_msg = (
"Unsupported XPT (SAS Transport) format. Only Transport v5 is supported."
)
with pytest.raises(UnsupportedXptFormatError) as exc_info:
reader.read(data)

assert expected_msg in str(exc_info.value)
Loading