Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Run `python core.py validate --help` to see the list of validation options.
-me, --max-errors-per-rule INTEGER BOOLEAN
Imposes a maximum number of errors per rule to enforce.
Usage: -me <limit> <per_dataset_flag>
Example: -me 100 true
Example: -me 100 True (make sure to use capital for True/False)

<limit>: Maximum number of errors (integer)

Expand All @@ -115,7 +115,7 @@ Run `python core.py validate --help` to see the list of validation options.
the first <limit> issues per dataset are included in the report.

Can be set via MAX_ERRORS_PER_RULE env variable;
if both .env and -me <limit> are specified, the larger value will be used.
if both .env and -me <limit> are specified, the larger value will be used. If either sets the per_dataset_flag to true, it will be true
If limit is set to 0, no maximum will be enforced.
No maximum is the default behavior.
-dv, --define-version TEXT Define-XML version used for validation
Expand Down
5 changes: 4 additions & 1 deletion cdisc_rules_engine/services/reporting/base_report_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cdisc_rules_engine.services.reporting.report_metadata_item import (
ReportMetadataItem,
)
from cdisc_rules_engine.utilities.utils import set_max_errors_per_rule


class BaseReportData(ABC):
Expand All @@ -33,7 +34,9 @@ def __init__(
self._template = template
self._standard = args.standard.upper()
self._version = args.version.replace("-", ".")
self._max_errors_limit, self._errors_per_dataset_flag = args.max_errors_per_rule
self._max_errors_limit, self._errors_per_dataset_flag = set_max_errors_per_rule(
args
)

@staticmethod
def process_values(values: list[str]) -> list[str]:
Expand Down
14 changes: 7 additions & 7 deletions cdisc_rules_engine/services/reporting/sdtm_report_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,17 @@ def get_conformance_details_data(
ReportMetadataItem("CORE Engine Version", 4, __version__)
)
conformance_details.append(
ReportMetadataItem("Issue Limit Per Rule", 5, self._max_errors_limit)
ReportMetadataItem(
"Issue Limit Per Rule",
5,
"None" if self._max_errors_limit is None else self._max_errors_limit,
)
)
conformance_details.append(
ReportMetadataItem(
"Issue Limit Per Dataset",
6,
(
"None"
if self._errors_per_dataset_flag == 0
else str(self._errors_per_dataset_flag)
),
"True" if self._errors_per_dataset_flag else "None",
)
)
conformance_details.append(
Expand Down Expand Up @@ -186,7 +186,7 @@ def get_conformance_details_data(
def get_dataset_details_data(self) -> list[dict]:
return [
{
"filename": dataset.filename,
"filename": dataset.name,
"label": dataset.label,
"path": str(Path(dataset.full_path or "").parent),
"modification_date": dataset.modification_date,
Expand Down
30 changes: 30 additions & 0 deletions cdisc_rules_engine/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import copy
import os
import re
import ast
import pandas as pd
from datetime import datetime
from typing import Callable, Iterable, List, Optional, Union
Expand Down Expand Up @@ -451,3 +452,32 @@ def validate_dataset_files_exist(dataset_path: tuple[str], logger, ctx):
if non_existing_files:
logger.error(f'Files {", ".join(non_existing_files)} are not found')
ctx.exit(2)


def set_max_errors_per_rule(args):
env_value = (
(os.getenv("MAX_ERRORS_PER_RULE")) if os.getenv("MAX_ERRORS_PER_RULE") else None
)

if env_value:
parsed_tuple = ast.literal_eval(env_value)
env_max_errors = parsed_tuple[0]
env_per_dataset = parsed_tuple[1]
else:
env_max_errors = None
env_per_dataset = None
cli_limit, cli_per_dataset = args.max_errors_per_rule
if env_max_errors is not None and cli_limit > 0:
max_errors_per_rule = max(env_max_errors, cli_limit)
elif env_max_errors is not None:
max_errors_per_rule = env_max_errors
elif cli_limit and cli_limit > 0:
max_errors_per_rule = cli_limit
else:
max_errors_per_rule = None

if max_errors_per_rule is not None and max_errors_per_rule <= 0:
max_errors_per_rule = None

per_dataset = bool(env_per_dataset or cli_per_dataset)
return max_errors_per_rule, per_dataset
8 changes: 4 additions & 4 deletions env.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CDISC_LIBRARY_API_KEY = 012345
DATASET_SIZE_THRESHOLD = size_in_bytes to force dask implementation
MAX_REPORT_ROWS = maximum number of issues per excel sheet (plus headers) in result report
MAX_ERRORS_PER_RULE = maximum number of errors to report per rule during a validation run.
CDISC_LIBRARY_API_KEY=your_api_key_here
DATASET_SIZE_THRESHOLD=10485760 # max dataset size in bytes to force dask implementation
MAX_REPORT_ROWS = 10 # integer for maximum number of issues per excel sheet (plus headers) in result report
MAX_ERRORS_PER_RULE = (10, True) # Tuple for maximum number of errors to report per rule during a validation run. Also has a per dataset flag described as second bool value in readme. example value
Binary file modified resources/templates/report-template.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion scripts/run_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@
get_model_details_cache_key_from_ig,
get_standard_details_cache_key,
get_variable_codelist_map_cache_key,
set_max_errors_per_rule,
)
from scripts.script_utils import (
fill_cache_with_dictionaries,
get_cache_service,
get_library_metadata_from_cache,
get_rules,
get_max_dataset_size,
set_max_errors_per_rule,
)
from cdisc_rules_engine.services.reporting import BaseReport, ReportFactory
from cdisc_rules_engine.utilities.progress_displayers import get_progress_displayer
Expand Down
22 changes: 0 additions & 22 deletions scripts/script_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,25 +530,3 @@ def replace_yml_spaces(data):
return [replace_yml_spaces(item) for item in data]
else:
return data


def set_max_errors_per_rule(args):
env_value = (
int(os.getenv("MAX_ERRORS_PER_RULE"))
if os.getenv("MAX_ERRORS_PER_RULE")
else None
)
cli_limit, cli_per_dataset = args.max_errors_per_rule
if env_value is not None and cli_limit > 0:
max_errors_per_rule = max(env_value, cli_limit)
elif env_value is not None:
max_errors_per_rule = env_value
elif cli_limit > 0:
max_errors_per_rule = cli_limit
else:
max_errors_per_rule = None

if max_errors_per_rule is not None and max_errors_per_rule <= 0:
max_errors_per_rule = None

return max_errors_per_rule, cli_per_dataset
8 changes: 6 additions & 2 deletions tests/unit/test_services/test_reporting/test_excel_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_get_export(mock_validation_results):
mock_args.meddra = "test"
mock_args.whodrug = "test"
mock_args.max_report_rows = None
mock_args.max_errors_per_rule = (None, False)
mock_args.max_errors_per_rule = (10, True)
mock_args.controlled_terminology_package = ["sdtmct-03-2021"]
mock_args.standard = "sdtmig"
mock_args.substandard = None
Expand All @@ -34,6 +34,7 @@ def test_get_export(mock_validation_results):
datasets = [
SDTMDatasetMetadata(
**{
"name": "test",
"filename": "test.xpt",
"label": "Test Data",
"full_path": str(Path("tests/unit/text.xpt")),
Expand All @@ -50,6 +51,9 @@ def test_get_export(mock_validation_results):
wb = report.get_export()
assert wb["Conformance Details"]["B3"].value == "10.1 seconds"
assert wb["Conformance Details"]["B4"].value == __version__
assert wb["Conformance Details"]["B5"].value == 10
assert wb["Conformance Details"]["B6"].value == "True"
assert wb["Conformance Details"]["B7"].value == "10000"
assert wb["Conformance Details"]["B9"].value == "SDTMIG"
assert wb["Conformance Details"]["B10"].value == "NAP"
assert wb["Conformance Details"]["B11"].value == "V3.4"
Expand All @@ -59,7 +63,7 @@ def test_get_export(mock_validation_results):
assert wb["Conformance Details"]["B13"].value == "2.1"

# Check dataset details tab
assert wb["Dataset Details"]["A2"].value == "test.xpt" # filename
assert wb["Dataset Details"]["A2"].value == "test" # filename
assert wb["Dataset Details"]["B2"].value == "Test Data" # label
assert wb["Dataset Details"]["C2"].value == str(
Path("tests/unit/text.xpt").parent
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/test_services/test_reporting/test_json_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ def test_get_export(_, mock_validation_results):
assert export["Conformance_Details"]["Version"] == "V3.4"
assert export["Conformance_Details"]["CT_Version"] == "sdtmct-03-2021"
assert export["Conformance_Details"]["Define_XML_Version"] == "2.1"
assert export["Conformance_Details"]["Issue_Limit_Per_Rule"] is None
assert export["Conformance_Details"]["Issue_Limit_Per_Rule"] == "None"
assert export["Conformance_Details"]["Issue_Limit_Per_Dataset"] == "None"
assert export["Conformance_Details"]["Issue_Limit_Per_Sheet"] is None
assert "Dataset_Details" in export
assert isinstance(export["Dataset_Details"], list)

Expand Down
Loading