From 05ba4a1379cb51eefb82831476dae5724e51b7b8 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 30 Jul 2025 22:48:48 +0000 Subject: [PATCH] Fix: Reinstate dateparser==1.2.2 with a workaround for the upstream issue --- pyproject.toml | 2 +- sqlmesh/__init__.py | 7 +++++++ sqlmesh/_hacks.py | 46 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 sqlmesh/_hacks.py diff --git a/pyproject.toml b/pyproject.toml index 91a671b2d4..1915d5b029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ "click", "croniter", "duckdb>=0.10.0,!=0.10.3", - "dateparser<=1.2.1", + "dateparser>=1.2.2", "hyperscript>=0.1.0", "importlib-metadata; python_version<'3.12'", "ipywidgets", diff --git a/sqlmesh/__init__.py b/sqlmesh/__init__.py index 47e9bacce2..b347dfd3d5 100644 --- a/sqlmesh/__init__.py +++ b/sqlmesh/__init__.py @@ -5,6 +5,13 @@ from __future__ import annotations +# Work around dateparser upstream issue: https://github.com/scrapinghub/dateparser/issues/1282 +# This needs to happen before sqlmesh triggers an import of dateparser +# This hack can be removed when the upstream issue is resolved +from sqlmesh._hacks import fix_dateparser + +fix_dateparser() + import glob import logging import os diff --git a/sqlmesh/_hacks.py b/sqlmesh/_hacks.py new file mode 100644 index 0000000000..c084daf39c --- /dev/null +++ b/sqlmesh/_hacks.py @@ -0,0 +1,46 @@ +UPSTREAM_PICKLE_FILE_SIGNATURES = { + "a3a8d27b822072fa5c67c0651cb3c934" # distributed with dateparser==1.2.2 +} + + +def fix_dateparser() -> None: + # work around the following upstream issues in dateparser==1.2.2 (which all have the same root cause): + # - https://github.com/scrapinghub/dateparser/issues/1270 + # - https://github.com/scrapinghub/dateparser/issues/1281 + # - https://github.com/scrapinghub/dateparser/issues/1282 + # + # This hack can be removed if this issue is fixed upstream. + # If you're removing this hack, make sure to update pyproject.toml to blacklist version 1.2.2 + + import importlib, hashlib + from pathlib import Path + + tz_cache = None + spec = importlib.util.find_spec("dateparser") + if spec and spec.origin: + # spec.origin will be something like: + # "/path/to/venv/lib/python3.9/site-packages/dateparser/__init__.py" + tz_cache = Path(spec.origin).parent / "data" / "dateparser_tz_cache.pkl" + if tz_cache.exists(): + # if the tz_cache file matches the signature of the buggy upstream one, delete it + # deleting it forces it to be correctly re-generated for the local environment when dateparser is imported + signature = hashlib.md5(tz_cache.read_bytes()).hexdigest() + if signature in UPSTREAM_PICKLE_FILE_SIGNATURES: + try: + tz_cache.unlink() + except Exception as e: + print(f"WARNING: Unable to delete upstream dateparser cache: {str(e)}") + + # Test that it actually worked + import dateparser + + if dateparser.parse("1 minute ago") is None: + hint_filename = ( + str(tz_cache) + if tz_cache is not None + else "site-packages/dateparser/data/dateparser_tz_cache.pkl" + ) + print( + "WARNING: Buggy dateparser detected; some date expressions may fail to parse.\n" + f"Please either delete the file '{hint_filename}' manually or use dateparser<=1.2.1" + )