From 53ff048abf61229062ec123f152faaaacde40d3c Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Mon, 11 Aug 2025 23:57:16 +0000 Subject: [PATCH] Feat(dbt_cli): Use sqlmesh.yaml for config instead of trying to inject dbt_project.yml --- sqlmesh/dbt/manifest.py | 10 +++++-- sqlmesh/dbt/model.py | 2 +- sqlmesh/dbt/project.py | 1 + sqlmesh_dbt/operations.py | 49 ++++++++++++++++++-------------- tests/dbt/cli/test_operations.py | 12 +++----- 5 files changed, 42 insertions(+), 32 deletions(-) diff --git a/sqlmesh/dbt/manifest.py b/sqlmesh/dbt/manifest.py index 19795a0b9b..4f839b9c9b 100644 --- a/sqlmesh/dbt/manifest.py +++ b/sqlmesh/dbt/manifest.py @@ -32,6 +32,7 @@ from dbt.tracking import do_not_track from sqlmesh.core import constants as c +from sqlmesh.core.config import ModelDefaultsConfig from sqlmesh.dbt.basemodel import Dependencies from sqlmesh.dbt.builtin import BUILTIN_FILTERS, BUILTIN_GLOBALS, OVERRIDDEN_MACROS from sqlmesh.dbt.model import ModelConfig @@ -78,12 +79,14 @@ def __init__( target: TargetConfig, variable_overrides: t.Optional[t.Dict[str, t.Any]] = None, cache_dir: t.Optional[str] = None, + model_defaults: t.Optional[ModelDefaultsConfig] = None, ): self.project_path = project_path self.profiles_path = profiles_path self.profile_name = profile_name self.target = target self.variable_overrides = variable_overrides or {} + self.model_defaults = model_defaults or ModelDefaultsConfig() self.__manifest: t.Optional[Manifest] = None self._project_name: str = "" @@ -380,9 +383,12 @@ def _load_manifest(self) -> Manifest: profile = self._load_profile() project = self._load_project(profile) - if not any(k in project.models for k in ("start", "+start")): + if ( + not any(k in project.models for k in ("start", "+start")) + and not self.model_defaults.start + ): raise ConfigError( - "SQLMesh's requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates" + "SQLMesh requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates" ) runtime_config = RuntimeConfig.from_parts(project, profile, args) diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index 4cbca09aee..ee9f159c07 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -565,7 +565,7 @@ def to_sqlmesh( query, dialect=model_dialect, kind=kind, - start=self.start, + start=self.start or context.sqlmesh_config.model_defaults.start, audit_definitions=audit_definitions, path=model_kwargs.pop("path", self.path), # This ensures that we bypass query rendering that would otherwise be required to extract additional diff --git a/sqlmesh/dbt/project.py b/sqlmesh/dbt/project.py index ac36ee4e0a..d37c9cc6c4 100644 --- a/sqlmesh/dbt/project.py +++ b/sqlmesh/dbt/project.py @@ -76,6 +76,7 @@ def load(cls, context: DbtContext, variables: t.Optional[t.Dict[str, t.Any]] = N target=profile.target, variable_overrides=variable_overrides, cache_dir=context.sqlmesh_config.cache_dir, + model_defaults=context.sqlmesh_config.model_defaults, ) extra_fields = profile.target.extra diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py index b826a00e37..6dbf4d160d 100644 --- a/sqlmesh_dbt/operations.py +++ b/sqlmesh_dbt/operations.py @@ -65,21 +65,26 @@ def create( from sqlmesh.core.console import set_console from sqlmesh_dbt.console import DbtCliConsole from sqlmesh.utils.errors import SQLMeshError + from sqlmesh.core.config import ModelDefaultsConfig configure_logging(force_debug=debug) set_console(DbtCliConsole()) progress.update(load_task_id, description="Loading project", total=None) - # inject default start date if one is not specified to prevent the user from having to do anything - _inject_default_start_date(project_dir) + cli_config = get_or_create_sqlmesh_config(project_dir) + # todo: we will need to build this out when we start storing more than model_defaults + model_defaults = ( + ModelDefaultsConfig.model_validate(cli_config["model_defaults"]) + if "model_defaults" in cli_config + else None + ) config = sqlmesh_config( project_root=project_dir, - # do we want to use a local duckdb for state? - # warehouse state has a bunch of overhead to initialize, is slow for ongoing operations and will create tables that perhaps the user was not expecting - # on the other hand, local state is not portable + # This triggers warehouse state. Users will probably find this very slow state_connection=None, + model_defaults=model_defaults, ) sqlmesh_context = Context( @@ -109,25 +114,27 @@ def create( return DbtOperations(sqlmesh_context, dbt_project) -def _inject_default_start_date(project_dir: t.Optional[Path] = None) -> None: +def get_or_create_sqlmesh_config(project_dir: t.Optional[Path] = None) -> t.Dict[str, t.Any]: """ - SQLMesh needs a start date to as the starting point for calculating intervals on incremental models + SQLMesh needs a start date to as the starting point for calculating intervals on incremental models, amongst other things Rather than forcing the user to update their config manually or having a default that is not saved between runs, - we can inject it automatically to the dbt_project.yml file + we can store sqlmesh-specific things in a `sqlmesh.yaml` file. This is preferable to trying to inject config into `dbt_project.yml` + because it means we have full control over the file and dont need to worry about accidentally reformatting it or accidentally + clobbering other config """ - from sqlmesh.dbt.project import PROJECT_FILENAME, load_yaml - from sqlmesh.utils.yaml import dump + import sqlmesh.utils.yaml as yaml from sqlmesh.utils.date import yesterday_ds + from sqlmesh.core.config import ModelDefaultsConfig + + potential_filenames = [ + (project_dir or Path.cwd()) / f"sqlmesh.{ext}" for ext in ("yaml", "yml") + ] + + sqlmesh_yaml_file = next((f for f in potential_filenames if f.exists()), potential_filenames[0]) + + if not sqlmesh_yaml_file.exists(): + with sqlmesh_yaml_file.open("w") as f: + yaml.dump({"model_defaults": ModelDefaultsConfig(start=yesterday_ds()).dict()}, f) - project_yaml_path = (project_dir or Path.cwd()) / PROJECT_FILENAME - if project_yaml_path.exists(): - loaded_project_file = load_yaml(project_yaml_path) - start_date_keys = ("start", "+start") - if "models" in loaded_project_file and all( - k not in loaded_project_file["models"] for k in start_date_keys - ): - loaded_project_file["models"]["+start"] = yesterday_ds() - # todo: this may format the file differently, is that acceptable? - with project_yaml_path.open("w") as f: - dump(loaded_project_file, f) + return yaml.load(sqlmesh_yaml_file, render_jinja=False) diff --git a/tests/dbt/cli/test_operations.py b/tests/dbt/cli/test_operations.py index e384028bbc..fe89df3221 100644 --- a/tests/dbt/cli/test_operations.py +++ b/tests/dbt/cli/test_operations.py @@ -7,7 +7,7 @@ pytestmark = pytest.mark.slow -def test_create_injects_default_start_date(jaffle_shop_duckdb: Path): +def test_create_sets_and_persists_default_start_date(jaffle_shop_duckdb: Path): with time_machine.travel("2020-01-02 00:00:00 UTC"): from sqlmesh.utils.date import yesterday_ds @@ -38,14 +38,10 @@ def test_create_injects_default_start_date(jaffle_shop_duckdb: Path): def test_create_uses_configured_start_date_if_supplied(jaffle_shop_duckdb: Path): - dbt_project_yaml = jaffle_shop_duckdb / "dbt_project.yml" + sqlmesh_yaml = jaffle_shop_duckdb / "sqlmesh.yml" - contents = yaml.load(dbt_project_yaml, render_jinja=False) - - contents["models"]["+start"] = "2023-12-12" - - with dbt_project_yaml.open("w") as f: - yaml.dump(contents, f) + with sqlmesh_yaml.open("w") as f: + yaml.dump({"model_defaults": {"start": "2023-12-12"}}, f) operations = create()