Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions sqlmesh/dbt/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from dbt.tracking import do_not_track

from sqlmesh.core import constants as c
from sqlmesh.core.config import ModelDefaultsConfig
from sqlmesh.dbt.basemodel import Dependencies
from sqlmesh.dbt.builtin import BUILTIN_FILTERS, BUILTIN_GLOBALS, OVERRIDDEN_MACROS
from sqlmesh.dbt.model import ModelConfig
Expand Down Expand Up @@ -78,12 +79,14 @@ def __init__(
target: TargetConfig,
variable_overrides: t.Optional[t.Dict[str, t.Any]] = None,
cache_dir: t.Optional[str] = None,
model_defaults: t.Optional[ModelDefaultsConfig] = None,
):
self.project_path = project_path
self.profiles_path = profiles_path
self.profile_name = profile_name
self.target = target
self.variable_overrides = variable_overrides or {}
self.model_defaults = model_defaults or ModelDefaultsConfig()

self.__manifest: t.Optional[Manifest] = None
self._project_name: str = ""
Expand Down Expand Up @@ -380,9 +383,12 @@ def _load_manifest(self) -> Manifest:
profile = self._load_profile()
project = self._load_project(profile)

if not any(k in project.models for k in ("start", "+start")):
if (
not any(k in project.models for k in ("start", "+start"))
and not self.model_defaults.start
):
raise ConfigError(
"SQLMesh's requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates"
"SQLMesh requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates"
)

runtime_config = RuntimeConfig.from_parts(project, profile, args)
Expand Down
2 changes: 1 addition & 1 deletion sqlmesh/dbt/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ def to_sqlmesh(
query,
dialect=model_dialect,
kind=kind,
start=self.start,
start=self.start or context.sqlmesh_config.model_defaults.start,
audit_definitions=audit_definitions,
path=model_kwargs.pop("path", self.path),
# This ensures that we bypass query rendering that would otherwise be required to extract additional
Expand Down
1 change: 1 addition & 0 deletions sqlmesh/dbt/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def load(cls, context: DbtContext, variables: t.Optional[t.Dict[str, t.Any]] = N
target=profile.target,
variable_overrides=variable_overrides,
cache_dir=context.sqlmesh_config.cache_dir,
model_defaults=context.sqlmesh_config.model_defaults,
)

extra_fields = profile.target.extra
Expand Down
49 changes: 28 additions & 21 deletions sqlmesh_dbt/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,26 @@ def create(
from sqlmesh.core.console import set_console
from sqlmesh_dbt.console import DbtCliConsole
from sqlmesh.utils.errors import SQLMeshError
from sqlmesh.core.config import ModelDefaultsConfig

configure_logging(force_debug=debug)
set_console(DbtCliConsole())

progress.update(load_task_id, description="Loading project", total=None)

# inject default start date if one is not specified to prevent the user from having to do anything
_inject_default_start_date(project_dir)
cli_config = get_or_create_sqlmesh_config(project_dir)
# todo: we will need to build this out when we start storing more than model_defaults
model_defaults = (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like this alternative config here. It should be normal SQLMesh config.

Please note that despite having a separate CLI command I still want other sqlmesh commands to work as expected. The current direction doesn't ensure that

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with normal SQLMesh config is that it has fields and validations designed for normal SQLMesh projects, which includes a bunch of redundancy that is already specified in dbt_project.yml or profiles.yml in a dbt project (like the entire connection config).

It's also called config.yaml which is kind of generic next to dbt_project.yml instead of something more obvious like sqlmesh.yaml.

But i'll investigate how it can be re-used instead of defining a working subset tuned down to just what's required for the dbt-compatible CLI.

I still want other sqlmesh commands to work as expected

Understood, that helps a lot

ModelDefaultsConfig.model_validate(cli_config["model_defaults"])
if "model_defaults" in cli_config
else None
)

config = sqlmesh_config(
project_root=project_dir,
# do we want to use a local duckdb for state?
# warehouse state has a bunch of overhead to initialize, is slow for ongoing operations and will create tables that perhaps the user was not expecting
# on the other hand, local state is not portable
# This triggers warehouse state. Users will probably find this very slow
state_connection=None,
model_defaults=model_defaults,
)

sqlmesh_context = Context(
Expand Down Expand Up @@ -109,25 +114,27 @@ def create(
return DbtOperations(sqlmesh_context, dbt_project)


def _inject_default_start_date(project_dir: t.Optional[Path] = None) -> None:
def get_or_create_sqlmesh_config(project_dir: t.Optional[Path] = None) -> t.Dict[str, t.Any]:
"""
SQLMesh needs a start date to as the starting point for calculating intervals on incremental models
SQLMesh needs a start date to as the starting point for calculating intervals on incremental models, amongst other things

Rather than forcing the user to update their config manually or having a default that is not saved between runs,
we can inject it automatically to the dbt_project.yml file
we can store sqlmesh-specific things in a `sqlmesh.yaml` file. This is preferable to trying to inject config into `dbt_project.yml`
because it means we have full control over the file and dont need to worry about accidentally reformatting it or accidentally
clobbering other config
"""
from sqlmesh.dbt.project import PROJECT_FILENAME, load_yaml
from sqlmesh.utils.yaml import dump
import sqlmesh.utils.yaml as yaml
from sqlmesh.utils.date import yesterday_ds
from sqlmesh.core.config import ModelDefaultsConfig

potential_filenames = [
(project_dir or Path.cwd()) / f"sqlmesh.{ext}" for ext in ("yaml", "yml")
]

sqlmesh_yaml_file = next((f for f in potential_filenames if f.exists()), potential_filenames[0])

if not sqlmesh_yaml_file.exists():
with sqlmesh_yaml_file.open("w") as f:
yaml.dump({"model_defaults": ModelDefaultsConfig(start=yesterday_ds()).dict()}, f)

project_yaml_path = (project_dir or Path.cwd()) / PROJECT_FILENAME
if project_yaml_path.exists():
loaded_project_file = load_yaml(project_yaml_path)
start_date_keys = ("start", "+start")
if "models" in loaded_project_file and all(
k not in loaded_project_file["models"] for k in start_date_keys
):
loaded_project_file["models"]["+start"] = yesterday_ds()
# todo: this may format the file differently, is that acceptable?
with project_yaml_path.open("w") as f:
dump(loaded_project_file, f)
return yaml.load(sqlmesh_yaml_file, render_jinja=False)
12 changes: 4 additions & 8 deletions tests/dbt/cli/test_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
pytestmark = pytest.mark.slow


def test_create_injects_default_start_date(jaffle_shop_duckdb: Path):
def test_create_sets_and_persists_default_start_date(jaffle_shop_duckdb: Path):
with time_machine.travel("2020-01-02 00:00:00 UTC"):
from sqlmesh.utils.date import yesterday_ds

Expand Down Expand Up @@ -38,14 +38,10 @@ def test_create_injects_default_start_date(jaffle_shop_duckdb: Path):


def test_create_uses_configured_start_date_if_supplied(jaffle_shop_duckdb: Path):
dbt_project_yaml = jaffle_shop_duckdb / "dbt_project.yml"
sqlmesh_yaml = jaffle_shop_duckdb / "sqlmesh.yml"

contents = yaml.load(dbt_project_yaml, render_jinja=False)

contents["models"]["+start"] = "2023-12-12"

with dbt_project_yaml.open("w") as f:
yaml.dump(contents, f)
with sqlmesh_yaml.open("w") as f:
yaml.dump({"model_defaults": {"start": "2023-12-12"}}, f)

operations = create()

Expand Down