Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies = [
"click",
"croniter",
"duckdb>=0.10.0,!=0.10.3",
"dateparser<=1.2.1",
"dateparser>=1.2.2",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This pin forces SQLMesh to use the version of dateparser that contains the performance enhancements (which dont exist in any other version), but also the parsing regression that this PR provides a workaround for

"hyperscript>=0.1.0",
"importlib-metadata; python_version<'3.12'",
"ipywidgets",
Expand Down
7 changes: 7 additions & 0 deletions sqlmesh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@

from __future__ import annotations

# Work around dateparser upstream issue: https://github.com/scrapinghub/dateparser/issues/1282
# This needs to happen before sqlmesh triggers an import of dateparser
# This hack can be removed when the upstream issue is resolved
from sqlmesh._hacks import fix_dateparser

fix_dateparser()

import glob
import logging
import os
Expand Down
46 changes: 46 additions & 0 deletions sqlmesh/_hacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
UPSTREAM_PICKLE_FILE_SIGNATURES = {
"a3a8d27b822072fa5c67c0651cb3c934" # distributed with dateparser==1.2.2
}


def fix_dateparser() -> None:
# work around the following upstream issues in dateparser==1.2.2 (which all have the same root cause):
# - https://github.com/scrapinghub/dateparser/issues/1270
# - https://github.com/scrapinghub/dateparser/issues/1281
# - https://github.com/scrapinghub/dateparser/issues/1282
#
# This hack can be removed if this issue is fixed upstream.
# If you're removing this hack, make sure to update pyproject.toml to blacklist version 1.2.2

import importlib, hashlib
from pathlib import Path

tz_cache = None
spec = importlib.util.find_spec("dateparser")
if spec and spec.origin:
# spec.origin will be something like:
# "/path/to/venv/lib/python3.9/site-packages/dateparser/__init__.py"
tz_cache = Path(spec.origin).parent / "data" / "dateparser_tz_cache.pkl"
if tz_cache.exists():
# if the tz_cache file matches the signature of the buggy upstream one, delete it
# deleting it forces it to be correctly re-generated for the local environment when dateparser is imported
signature = hashlib.md5(tz_cache.read_bytes()).hexdigest()
if signature in UPSTREAM_PICKLE_FILE_SIGNATURES:
try:
tz_cache.unlink()
except Exception as e:
print(f"WARNING: Unable to delete upstream dateparser cache: {str(e)}")

# Test that it actually worked
import dateparser

if dateparser.parse("1 minute ago") is None:
hint_filename = (
str(tz_cache)
if tz_cache is not None
else "site-packages/dateparser/data/dateparser_tz_cache.pkl"
)
print(
"WARNING: Buggy dateparser detected; some date expressions may fail to parse.\n"
f"Please either delete the file '{hint_filename}' manually or use dateparser<=1.2.1"
)