Skip to content

Commit 53ff048

Browse files
committed
Feat(dbt_cli): Use sqlmesh.yaml for config instead of trying to inject dbt_project.yml
1 parent 1237e9a commit 53ff048

File tree

5 files changed

+42
-32
lines changed

5 files changed

+42
-32
lines changed

sqlmesh/dbt/manifest.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from dbt.tracking import do_not_track
3333

3434
from sqlmesh.core import constants as c
35+
from sqlmesh.core.config import ModelDefaultsConfig
3536
from sqlmesh.dbt.basemodel import Dependencies
3637
from sqlmesh.dbt.builtin import BUILTIN_FILTERS, BUILTIN_GLOBALS, OVERRIDDEN_MACROS
3738
from sqlmesh.dbt.model import ModelConfig
@@ -78,12 +79,14 @@ def __init__(
7879
target: TargetConfig,
7980
variable_overrides: t.Optional[t.Dict[str, t.Any]] = None,
8081
cache_dir: t.Optional[str] = None,
82+
model_defaults: t.Optional[ModelDefaultsConfig] = None,
8183
):
8284
self.project_path = project_path
8385
self.profiles_path = profiles_path
8486
self.profile_name = profile_name
8587
self.target = target
8688
self.variable_overrides = variable_overrides or {}
89+
self.model_defaults = model_defaults or ModelDefaultsConfig()
8790

8891
self.__manifest: t.Optional[Manifest] = None
8992
self._project_name: str = ""
@@ -380,9 +383,12 @@ def _load_manifest(self) -> Manifest:
380383
profile = self._load_profile()
381384
project = self._load_project(profile)
382385

383-
if not any(k in project.models for k in ("start", "+start")):
386+
if (
387+
not any(k in project.models for k in ("start", "+start"))
388+
and not self.model_defaults.start
389+
):
384390
raise ConfigError(
385-
"SQLMesh's requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates"
391+
"SQLMesh requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates"
386392
)
387393

388394
runtime_config = RuntimeConfig.from_parts(project, profile, args)

sqlmesh/dbt/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def to_sqlmesh(
565565
query,
566566
dialect=model_dialect,
567567
kind=kind,
568-
start=self.start,
568+
start=self.start or context.sqlmesh_config.model_defaults.start,
569569
audit_definitions=audit_definitions,
570570
path=model_kwargs.pop("path", self.path),
571571
# This ensures that we bypass query rendering that would otherwise be required to extract additional

sqlmesh/dbt/project.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def load(cls, context: DbtContext, variables: t.Optional[t.Dict[str, t.Any]] = N
7676
target=profile.target,
7777
variable_overrides=variable_overrides,
7878
cache_dir=context.sqlmesh_config.cache_dir,
79+
model_defaults=context.sqlmesh_config.model_defaults,
7980
)
8081

8182
extra_fields = profile.target.extra

sqlmesh_dbt/operations.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,21 +65,26 @@ def create(
6565
from sqlmesh.core.console import set_console
6666
from sqlmesh_dbt.console import DbtCliConsole
6767
from sqlmesh.utils.errors import SQLMeshError
68+
from sqlmesh.core.config import ModelDefaultsConfig
6869

6970
configure_logging(force_debug=debug)
7071
set_console(DbtCliConsole())
7172

7273
progress.update(load_task_id, description="Loading project", total=None)
7374

74-
# inject default start date if one is not specified to prevent the user from having to do anything
75-
_inject_default_start_date(project_dir)
75+
cli_config = get_or_create_sqlmesh_config(project_dir)
76+
# todo: we will need to build this out when we start storing more than model_defaults
77+
model_defaults = (
78+
ModelDefaultsConfig.model_validate(cli_config["model_defaults"])
79+
if "model_defaults" in cli_config
80+
else None
81+
)
7682

7783
config = sqlmesh_config(
7884
project_root=project_dir,
79-
# do we want to use a local duckdb for state?
80-
# warehouse state has a bunch of overhead to initialize, is slow for ongoing operations and will create tables that perhaps the user was not expecting
81-
# on the other hand, local state is not portable
85+
# This triggers warehouse state. Users will probably find this very slow
8286
state_connection=None,
87+
model_defaults=model_defaults,
8388
)
8489

8590
sqlmesh_context = Context(
@@ -109,25 +114,27 @@ def create(
109114
return DbtOperations(sqlmesh_context, dbt_project)
110115

111116

112-
def _inject_default_start_date(project_dir: t.Optional[Path] = None) -> None:
117+
def get_or_create_sqlmesh_config(project_dir: t.Optional[Path] = None) -> t.Dict[str, t.Any]:
113118
"""
114-
SQLMesh needs a start date to as the starting point for calculating intervals on incremental models
119+
SQLMesh needs a start date to as the starting point for calculating intervals on incremental models, amongst other things
115120
116121
Rather than forcing the user to update their config manually or having a default that is not saved between runs,
117-
we can inject it automatically to the dbt_project.yml file
122+
we can store sqlmesh-specific things in a `sqlmesh.yaml` file. This is preferable to trying to inject config into `dbt_project.yml`
123+
because it means we have full control over the file and dont need to worry about accidentally reformatting it or accidentally
124+
clobbering other config
118125
"""
119-
from sqlmesh.dbt.project import PROJECT_FILENAME, load_yaml
120-
from sqlmesh.utils.yaml import dump
126+
import sqlmesh.utils.yaml as yaml
121127
from sqlmesh.utils.date import yesterday_ds
128+
from sqlmesh.core.config import ModelDefaultsConfig
129+
130+
potential_filenames = [
131+
(project_dir or Path.cwd()) / f"sqlmesh.{ext}" for ext in ("yaml", "yml")
132+
]
133+
134+
sqlmesh_yaml_file = next((f for f in potential_filenames if f.exists()), potential_filenames[0])
135+
136+
if not sqlmesh_yaml_file.exists():
137+
with sqlmesh_yaml_file.open("w") as f:
138+
yaml.dump({"model_defaults": ModelDefaultsConfig(start=yesterday_ds()).dict()}, f)
122139

123-
project_yaml_path = (project_dir or Path.cwd()) / PROJECT_FILENAME
124-
if project_yaml_path.exists():
125-
loaded_project_file = load_yaml(project_yaml_path)
126-
start_date_keys = ("start", "+start")
127-
if "models" in loaded_project_file and all(
128-
k not in loaded_project_file["models"] for k in start_date_keys
129-
):
130-
loaded_project_file["models"]["+start"] = yesterday_ds()
131-
# todo: this may format the file differently, is that acceptable?
132-
with project_yaml_path.open("w") as f:
133-
dump(loaded_project_file, f)
140+
return yaml.load(sqlmesh_yaml_file, render_jinja=False)

tests/dbt/cli/test_operations.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
pytestmark = pytest.mark.slow
88

99

10-
def test_create_injects_default_start_date(jaffle_shop_duckdb: Path):
10+
def test_create_sets_and_persists_default_start_date(jaffle_shop_duckdb: Path):
1111
with time_machine.travel("2020-01-02 00:00:00 UTC"):
1212
from sqlmesh.utils.date import yesterday_ds
1313

@@ -38,14 +38,10 @@ def test_create_injects_default_start_date(jaffle_shop_duckdb: Path):
3838

3939

4040
def test_create_uses_configured_start_date_if_supplied(jaffle_shop_duckdb: Path):
41-
dbt_project_yaml = jaffle_shop_duckdb / "dbt_project.yml"
41+
sqlmesh_yaml = jaffle_shop_duckdb / "sqlmesh.yml"
4242

43-
contents = yaml.load(dbt_project_yaml, render_jinja=False)
44-
45-
contents["models"]["+start"] = "2023-12-12"
46-
47-
with dbt_project_yaml.open("w") as f:
48-
yaml.dump(contents, f)
43+
with sqlmesh_yaml.open("w") as f:
44+
yaml.dump({"model_defaults": {"start": "2023-12-12"}}, f)
4945

5046
operations = create()
5147

0 commit comments

Comments
 (0)