|
11 | 11 | from sqlmesh.dbt.project import Project |
12 | 12 | from sqlmesh_dbt.console import DbtCliConsole |
13 | 13 | from sqlmesh.core.model import Model |
14 | | - from sqlmesh.core.plan import Plan |
| 14 | + from sqlmesh.core.plan import Plan, PlanBuilder |
15 | 15 |
|
16 | 16 | logger = logging.getLogger(__name__) |
17 | 17 |
|
@@ -42,8 +42,39 @@ def run( |
42 | 42 | full_refresh: bool = False, |
43 | 43 | empty: bool = False, |
44 | 44 | ) -> Plan: |
45 | | - return self.context.plan( |
46 | | - **self._plan_options( |
| 45 | + plan_builder = self._plan_builder( |
| 46 | + environment=environment, |
| 47 | + select=select, |
| 48 | + exclude=exclude, |
| 49 | + full_refresh=full_refresh, |
| 50 | + empty=empty, |
| 51 | + ) |
| 52 | + |
| 53 | + plan = plan_builder.build() |
| 54 | + |
| 55 | + self.console.plan( |
| 56 | + plan_builder, |
| 57 | + default_catalog=self.context.default_catalog, |
| 58 | + # start doing work immediately (since no_diff is set, there isnt really anything for the user to say yes/no to) |
| 59 | + auto_apply=True, |
| 60 | + # dont output a diff of model changes |
| 61 | + no_diff=True, |
| 62 | + # don't throw up any prompts like "set the effective date" - use defaults |
| 63 | + no_prompts=True, |
| 64 | + ) |
| 65 | + |
| 66 | + return plan |
| 67 | + |
| 68 | + def _plan_builder( |
| 69 | + self, |
| 70 | + environment: t.Optional[str] = None, |
| 71 | + select: t.Optional[t.List[str]] = None, |
| 72 | + exclude: t.Optional[t.List[str]] = None, |
| 73 | + full_refresh: bool = False, |
| 74 | + empty: bool = False, |
| 75 | + ) -> PlanBuilder: |
| 76 | + return self.context.plan_builder( |
| 77 | + **self._plan_builder_options( |
47 | 78 | environment=environment, |
48 | 79 | select=select, |
49 | 80 | exclude=exclude, |
@@ -71,13 +102,15 @@ def _selected_models( |
71 | 102 |
|
72 | 103 | return selected_models |
73 | 104 |
|
74 | | - def _plan_options( |
| 105 | + def _plan_builder_options( |
75 | 106 | self, |
76 | | - environment: t.Optional[str] = None, |
| 107 | + # upstream dbt options |
77 | 108 | select: t.Optional[t.List[str]] = None, |
78 | 109 | exclude: t.Optional[t.List[str]] = None, |
79 | 110 | empty: bool = False, |
80 | 111 | full_refresh: bool = False, |
| 112 | + # sqlmesh extra options |
| 113 | + environment: t.Optional[str] = None, |
81 | 114 | ) -> t.Dict[str, t.Any]: |
82 | 115 | import sqlmesh.core.constants as c |
83 | 116 |
|
@@ -130,24 +163,38 @@ def _plan_options( |
130 | 163 | # `dbt --empty` adds LIMIT 0 to the queries, resulting in empty tables. In addition, it happily clobbers existing tables regardless of if they are populated. |
131 | 164 | # This *partially* lines up with --skip-backfill in SQLMesh, which indicates to not populate tables if they happened to be created/updated as part of this plan. |
132 | 165 | # However, if a table already exists and has data in it, there is no change so SQLMesh will not recreate the table and thus it will not be cleared. |
133 | | - # So in order to fully replicate dbt's --empty, we also need --full-refresh semantics in order to replace existing tables |
| 166 | + # Currently, SQLMesh has no way to say "restate with empty data", because --restate-model coupled with --skip-backfill ends up being a no-op |
134 | 167 | options["skip_backfill"] = True |
135 | | - full_refresh = True |
| 168 | + |
| 169 | + self.console.log_warning( |
| 170 | + "dbt's `--empty` drops the tables for all selected models and replaces them with empty ones.\n" |
| 171 | + "This can easily result in accidental data loss, so SQLMesh limits this to only new or modified models and leaves the tables for existing unmodified models alone.\n\n" |
| 172 | + "If you were creating empty tables to preview model changes, please consider using `--environment` to preview these changes in an isolated Virtual Data Environment instead.\n\n" |
| 173 | + "Otherwise, if you really do want dbt's `--empty` behaviour of clearing every selected table, please file an issue on GitHub so we can better understand the use-case.\n" |
| 174 | + ) |
| 175 | + |
| 176 | + if full_refresh: |
| 177 | + # --full-refresh is implemented in terms of "add every model as a restatement" |
| 178 | + # however, `--empty` sets skip_backfill=True, which causes the BackfillStage of the plan to be skipped. |
| 179 | + # the re-processing of data intervals happens in the BackfillStage, so if it gets skipped, restatements become a no-op |
| 180 | + raise ValueError("`--full-refresh` alongside `--empty` is not currently supported.") |
136 | 181 |
|
137 | 182 | if full_refresh: |
138 | | - # TODO: handling this requires some updates in the engine to enable restatements+changes in the same plan without affecting prod |
139 | | - # if the plan targets dev |
140 | | - pass |
| 183 | + options.update( |
| 184 | + dict( |
| 185 | + # Add every selected model as a restatement to force them to get repopulated from scratch |
| 186 | + restate_models=list(self.context.models) |
| 187 | + if not select_models |
| 188 | + else select_models, |
| 189 | + # by default in SQLMesh, restatements only operate on what has been committed to state. |
| 190 | + # in order to emulate dbt, we need to use the local filesystem instead, so we override this default |
| 191 | + always_include_local_changes=True, |
| 192 | + ) |
| 193 | + ) |
141 | 194 |
|
142 | 195 | return dict( |
143 | 196 | environment=environment, |
144 | 197 | select_models=select_models, |
145 | | - # dont output a diff of model changes |
146 | | - no_diff=True, |
147 | | - # don't throw up any prompts like "set the effective date" - use defaults |
148 | | - no_prompts=True, |
149 | | - # start doing work immediately (since no_diff is set, there isnt really anything for the user to say yes/no to) |
150 | | - auto_apply=True, |
151 | 198 | **options, |
152 | 199 | ) |
153 | 200 |
|
|
0 commit comments