Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
d8614fe
1372-add-'auto'-precision-to-compare-datetimes-at=common-granularity
RakeshBobba03 Oct 21, 2025
17c311c
Add auto precision detection to date comparison operators via date_co…
RakeshBobba03 Oct 28, 2025
419dd77
minor cleanup
RakeshBobba03 Oct 28, 2025
260a305
Optimization
RakeshBobba03 Oct 28, 2025
2940244
Unit Test Fix\Update
RakeshBobba03 Oct 28, 2025
3f5c005
Merge branch 'main' into 1372-Datetime-Comparison
RamilCDISC Oct 28, 2025
a878ad3
Merge branch 'main' into 1372-Datetime-Comparison
RamilCDISC Oct 29, 2025
5b327ca
Merge branch 'main' into 1372-Datetime-Comparison
RamilCDISC Nov 2, 2025
3988fd7
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 3, 2025
ea629ec
Merge remote-tracking branch 'refs/remotes/origin/1372-Datetime-Compa…
RakeshBobba03 Nov 3, 2025
61dad95
restore comments
RakeshBobba03 Nov 3, 2025
a3d7a1b
restore test_invalid_date format, move precision constants to top, ad…
RakeshBobba03 Nov 3, 2025
ec37d74
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 3, 2025
36e171c
Merge branch 'main' into 1372-Datetime-Comparison
gerrycampion Nov 4, 2025
3400229
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 5, 2025
bf5a1c6
Merge branch '1372-Datetime-Comparison' of https://github.com/cdisc-o…
RakeshBobba03 Nov 5, 2025
ada41a2
Restructure auto precision tests into individual cases, remove duplic…
RakeshBobba03 Nov 5, 2025
c6d8b2b
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 6, 2025
1bc5505
Implement auto precision datetime comparisons
RakeshBobba03 Nov 10, 2025
0a85dbf
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 10, 2025
19f9465
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 11, 2025
56c5d13
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 12, 2025
e8a54ff
Refactor date precision handling: use DatePrecision enum, simplify pa…
RakeshBobba03 Nov 13, 2025
33148a9
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 13, 2025
3b258a2
Cleanup on refactor
RakeshBobba03 Nov 13, 2025
d9e80bf
Remove unused date precision helper functions and DatePrecision metho…
RakeshBobba03 Nov 14, 2025
c99e9a2
more cleanup
RakeshBobba03 Nov 14, 2025
ce24224
missed update from operator.md
RakeshBobba03 Nov 14, 2025
0c746f8
Refactored to remove redundant code and use enums
gerrycampion Nov 18, 2025
5c865cc
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 18, 2025
b1ff922
Merge branch '1372-Datetime-Comparison' of https://github.com/cdisc-o…
RakeshBobba03 Nov 18, 2025
83aa3cf
combine redundant/duplicate functions into one and use enum iteration
RakeshBobba03 Nov 19, 2025
e628106
redundant check removed
RakeshBobba03 Nov 19, 2025
9487126
Merge branch 'main' into 1372-Datetime-Comparison
RakeshBobba03 Nov 19, 2025
d15257a
remove has_time logic
gerrycampion Nov 21, 2025
4eeb473
fixed overwriting var in scope
gerrycampion Nov 21, 2025
62fde67
Merge branch 'main' into 1372-Datetime-Comparison
gerrycampion Nov 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
315 changes: 259 additions & 56 deletions cdisc_rules_engine/check_operators/helpers.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in all instances where you are using strings for the date components ("year", "minute", etc), you should be using the DatePrecision enum instead

Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,56 @@
import pytz
from cdisc_rules_engine.services import logger
import traceback

from functools import lru_cache
from enum import IntEnum
import operator

# Date regex pattern for validation
date_regex = re.compile(
r"^((-?[0-9]{4}|-)(-(1[0-2]|0[1-9]|-)(-(3[01]|0[1-9]|[12][0-9]|-)"
r"(T(2[0-3]|[01][0-9]|-)(:([0-5][0-9]|-)((:([0-5][0-9]|-))?(\.[0-9]+)?"
r"((Z|[+-](:2[0-3]|[01][0-9]):[0-5][0-9]))?)?)?)?)?)?)(\/((-?[0-9]{4}|-)"
r"(-(1[0-2]|0[1-9]|-)(-(3[01]|0[1-9]|[12][0-9]|-)(T(2[0-3]|[01][0-9]|-)"
r"(:([0-5][0-9]|-)((:([0-5][0-9]|-))?(\.[0-9]+)?((Z|[+-](:2[0-3]|[01][0-9])"
r":[0-5][0-9]))?)?)?)?)?)?))?$"
r"^("
r"(?P<year>-?[0-9]{4}|-)(-{1,2}(?P<month>1[0-2]|0[1-9]|-))?"
r"(-{1,2}(?P<day>3[01]|0[1-9]|[12][0-9]|-))?"
r"(T(?P<hour>2[0-3]|[01][0-9]|-)(:((?P<minute>[0-5][0-9]|-))"
r"(:((?P<second>[0-5][0-9]|-))?(\.(?P<microsecond>[0-9]+))?)?)?"
r"(?P<timezone>Z|[+-](2[0-3]|[01][0-9]):[0-5][0-9])?)?"
r"(\/"
r"(?P<interval_year>-?[0-9]{4}|-)(-{1,2}(?P<interval_month>1[0-2]|0[1-9]|-))?"
r"(-{1,2}(?P<interval_day>3[01]|0[1-9]|[12][0-9]|-))?"
r"(T(?P<interval_hour>2[0-3]|[01][0-9]|-)(:((?P<interval_minute>[0-5][0-9]|-))"
r"(:((?P<interval_second>[0-5][0-9]|-))?(\.(?P<interval_microsecond>[0-9]+))?)?)?"
r"(?P<interval_timezone>Z|[+-](2[0-3]|[01][0-9]):[0-5][0-9])?)?"
r")?"
r"|"
r"-{4,8}T(?P<timeonly_hour>2[0-3]|[01][0-9]|-)(:((?P<timeonly_minute>[0-5][0-9]|-))"
r"(:((?P<timeonly_second>[0-5][0-9]|-))?(\.(?P<timeonly_microsecond>[0-9]+))?)?)?"
r"(?P<timeonly_timezone>Z|[+-](2[0-3]|[01][0-9]):[0-5][0-9])?"
r")$"
)


class DatePrecision(IntEnum):
year = 0
month = 1
day = 2
hour = 3
minute = 4
second = 5
microsecond = 6

@property
def default_value(self):
default_values = {
DatePrecision.year: 1970,
DatePrecision.month: 1,
DatePrecision.day: 1,
DatePrecision.hour: 0,
DatePrecision.minute: 0,
DatePrecision.second: 0,
DatePrecision.microsecond: 0,
}
return default_values[self]


def is_valid_date(date_string: str) -> bool:
if date_string is None:
return False
Expand Down Expand Up @@ -56,91 +93,160 @@ def is_valid_duration(duration: str, negative) -> bool:
match = re.match(pattern, duration)
if not match:
return False

years, months, days, time_designator, hours, minutes, seconds, weeks = (
match.groups()
)

if time_designator and not any([hours, minutes, seconds]):
return False

components = [
c
for c in [years, months, weeks, days, hours, minutes, seconds]
if c is not None
]

# Check if decimal is only in the smallest unit
decimal_found = False
for i, component in enumerate(components):
if "." in component or "," in component:
if decimal_found or i != len(components) - 1:
return False
decimal_found = True

return True


def get_year(date_string: str):
timestamp = get_date(date_string)
return timestamp.year
def _empty_datetime_components():
return {precision: None for precision in DatePrecision}


def get_month(date_string: str):
timestamp = get_date(date_string)
return timestamp.month
def _extract_datetime_components(date_str: str) -> dict:
"""Extract datetime components using regex pattern matching."""
if not date_str or not isinstance(date_str, str):
return _empty_datetime_components()
match = date_regex.match(date_str)
if not match:
return _empty_datetime_components()

matches = {
DatePrecision.year: match.group("year") or match.group("interval_year"),
DatePrecision.month: match.group("month") or match.group("interval_month"),
DatePrecision.day: match.group("day") or match.group("interval_day"),
DatePrecision.hour: (
match.group("hour")
or match.group("interval_hour")
or match.group("timeonly_hour")
),
DatePrecision.minute: (
match.group("minute")
or match.group("interval_minute")
or match.group("timeonly_minute")
),
DatePrecision.second: (
match.group("second")
or match.group("interval_second")
or match.group("timeonly_second")
),
DatePrecision.microsecond: (
match.group("microsecond")
or match.group("interval_microsecond")
or match.group("timeonly_microsecond")
),
}
components = {
precision: None if _check_date_component_missing(component) else component
for precision, component in matches.items()
}
return components


@lru_cache(maxsize=1000)
def detect_datetime_precision(date_str: str) -> DatePrecision | None:
if not _datestring_is_valid(date_str):
return None
components = _extract_datetime_components(date_str)
if all(_check_date_component_missing(component) for component in components):
return None
return _date_and_time_precision(components)

def get_day(date_string: str):
timestamp = get_date(date_string)
return timestamp.day

def _datestring_is_valid(date_str: str) -> bool:
return bool(date_str and isinstance(date_str, str) and date_regex.match(date_str))

def get_hour(date_string: str):
timestamp = get_date(date_string)
return timestamp.hour

def _check_date_component_missing(component) -> bool:
return component is None or component == "-" or component == ""

def get_minute(date_string: str):
timestamp = get_date(date_string)
return timestamp.minute

def _get_precision_before(precision: DatePrecision) -> DatePrecision | None:
prev_index = precision.value - 1
return DatePrecision(prev_index) if prev_index >= 0 else None

def get_second(date_string: str):
timestamp = get_date(date_string)
return timestamp.second

def _date_and_time_precision(
components: dict,
) -> DatePrecision | None:
for precision in DatePrecision:
component = components[precision] if precision in components else None
if _check_date_component_missing(component):
return _get_precision_before(precision)

def get_microsecond(date_string: str):
timestamp = get_date(date_string)
return timestamp.microsecond
return DatePrecision.microsecond


def get_common_precision(dt1: str, dt2: str) -> DatePrecision | None:
p1 = detect_datetime_precision(dt1)
p2 = detect_datetime_precision(dt2)
if p1 is None or p2 is None:
return None
min_idx = min(p1.value, p2.value)
return DatePrecision(min_idx)


def get_date_component(component: str, date_string: str):
component_func_map = {
"year": get_year,
"month": get_month,
"day": get_day,
"hour": get_hour,
"minute": get_minute,
"microsecond": get_microsecond,
"second": get_second,
}
component_function = component_func_map.get(component)
if component_function:
return component_function(date_string)
else:
return get_date(date_string)
date = get_date(date_string)
try:
return getattr(date, DatePrecision[component].name)
except (KeyError, ValueError):
return date


def _parse_uncertain_date(date_string: str) -> datetime | None:
"""Parse uncertain dates with missing components using regex groups."""
components = _extract_datetime_components(date_string)
component_ints = [
int(components.get(precision) or precision.default_value)
for precision in DatePrecision
]
try:
return datetime(*component_ints)
except (ValueError, TypeError):
return None


def get_date(date_string: str):
"""
Returns a utc timestamp for comparison
"""
date = parse(date_string, default=datetime(1970, 1, 1))
uncertainty_substrings = ["/", "--", "-:"]
has_uncertainty = any([substr in date_string for substr in uncertainty_substrings])

if has_uncertainty:
uncertain_date = _parse_uncertain_date(date_string)
if uncertain_date is not None:
utc = pytz.UTC
return utc.localize(uncertain_date)

date = parse(
date_string,
default=datetime(
*[
precision.default_value
for precision in list(DatePrecision)[
DatePrecision.year : DatePrecision.day + 1
]
]
),
)
utc = pytz.UTC
if date.tzinfo is not None and date.tzinfo.utcoffset(date) is not None:
# timezone aware
return date.astimezone(utc)
else:
return utc.localize(date)
Expand Down Expand Up @@ -185,15 +291,112 @@ def case_insensitive_is_in(value, values):
return str(value).lower() in str(values).lower()


def compare_dates(component, target, comparator, operator):
def truncate_datetime_to_precision(date_string: str, precision: DatePrecision):
dt = get_date(date_string)
if precision is None:
return dt
replacements = {
precision_component.name: precision_component.default_value
for precision_component in list(DatePrecision)[precision.value + 1 :]
}
return dt.replace(**replacements)


def _dates_are_comparable(target: str, comparator: str) -> bool:
if not target or not comparator:
# Comparison should return false if either is empty or None
return False
else:
return operator(
get_date_component(component, target),
get_date_component(component, comparator),
)
return is_valid_date(target) and is_valid_date(comparator)


def _has_explicit_component(component) -> bool:
return component not in (None, "auto")


def _compare_with_component(component, target, comparator, operator_func):
return operator_func(
get_date_component(component, target),
get_date_component(component, comparator),
)


def _build_precision_context(target: str, comparator: str) -> dict:
return {
"target_precision": detect_datetime_precision(target),
"comparator_precision": detect_datetime_precision(comparator),
"precision": get_common_precision(target, comparator),
}


def _truncate_by_precision(
target: str, comparator: str, precision: DatePrecision | None
) -> tuple:
if precision is None:
return get_date(target), get_date(comparator)
return (
truncate_datetime_to_precision(target, precision),
truncate_datetime_to_precision(comparator, precision),
)


def _compare_with_inferred_precision(
operator_func,
target: str,
comparator: str,
truncated_target,
truncated_comparator,
context: dict,
):
target_precision = context["target_precision"]
comparator_precision = context["comparator_precision"]

if operator_func is operator.eq:
if target_precision != comparator_precision:
return False
return truncated_target == truncated_comparator

if operator_func is operator.ne:
if target_precision != comparator_precision:
return True
return truncated_target != truncated_comparator

result = operator_func(truncated_target, truncated_comparator)

if truncated_target == truncated_comparator:
if target_precision and comparator_precision:
if target_precision.value > comparator_precision.value:
return operator_func(get_date(target), get_date(comparator))
return result

return result


def compare_dates(component, target, comparator, operator_func):
if not _dates_are_comparable(target, comparator):
return False

if _has_explicit_component(component):
return _compare_with_component(component, target, comparator, operator_func)

context = _build_precision_context(target, comparator)
precision = context["precision"]
if precision is None:
return False

truncated_target, truncated_comparator = _truncate_by_precision(
target, comparator, precision
)

if component == "auto":
return operator_func(truncated_target, truncated_comparator)

return _compare_with_inferred_precision(
operator_func,
target,
comparator,
truncated_target,
truncated_comparator,
context,
)


def apply_regex(regex: str, val: str):
Expand Down
3 changes: 2 additions & 1 deletion resources/schema/Operator.json
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,8 @@
"hour",
"minute",
"second",
"microsecond"
"microsecond",
"auto"
],
"type": "string"
},
Expand Down
Loading
Loading