From 85400d9a56d04b97a0b6ec9600d6d7e5e1d313e1 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 9 Jul 2025 02:02:37 +0000 Subject: [PATCH] Chore!: Update ci/cd bot to match CLI auto_categorize behaviour --- docs/integrations/github.md | 4 +- sqlmesh/core/config/root.py | 12 ++++++ sqlmesh/integrations/github/cicd/config.py | 29 ++++++++++++-- tests/core/analytics/test_collector.py | 2 +- tests/integrations/github/cicd/test_config.py | 40 +++++++++++++++++-- 5 files changed, 77 insertions(+), 10 deletions(-) diff --git a/docs/integrations/github.md b/docs/integrations/github.md index 323aff0565..1f66ef6368 100644 --- a/docs/integrations/github.md +++ b/docs/integrations/github.md @@ -293,13 +293,13 @@ Below is an example of how to define the default config for the bot in either YA | `enable_deploy_command` | Indicates if the `/deploy` command should be enabled in order to allowed synchronized deploys to production. Default: `False` | bool | N | | `command_namespace` | The namespace to use for SQLMesh commands. For example if you provide `#SQLMesh` as a value then commands will be expected in the format of `#SQLMesh/`. Default: `None` meaning no namespace is used. | string | N | | `auto_categorize_changes` | Auto categorization behavior to use for the bot. If not provided then the project-wide categorization behavior is used. See [Auto-categorize model changes](https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#auto-categorize-model-changes) for details. | dict | N | -| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed. | str | N | +| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed.| str | N | | `pr_min_intervals` | Intended for use when `default_pr_start` is set to a relative time, eg `1 week ago`. This ensures that at least this many intervals across every model are included for backfill in the PR environment. Without this, models with an interval unit wider than `default_pr_start` (such as `@monthly` models if `default_pr_start` was set to `1 week ago`) will be excluded from backfill entirely. | int | N | | `skip_pr_backfill` | Indicates if the bot should skip backfilling models in the PR environment. Default: `True` | bool | N | | `pr_include_unmodified` | Indicates whether to include unmodified models in the PR environment. Default to the project's config value (which defaults to `False`) | bool | N | | `run_on_deploy_to_prod` | Indicates whether to run latest intervals when deploying to prod. If set to false, the deployment will backfill only the changed models up to the existing latest interval in production, ignoring any missing intervals beyond this point. Default: `False` | bool | N | | `pr_environment_name` | The name of the PR environment to create for which a PR number will be appended to. Defaults to the repo name if not provided. Note: The name will be normalized to alphanumeric + underscore and lowercase. | str | N | -| `prod_branch_name` | The name of the git branch associated with production. Ex: `prod`. Default: `main` or `master` is considered prod | str | N | +| `prod_branch_name` | The name of the git branch associated with production. Ex: `prod`. Default: `main` or `master` is considered prod | str | N | Example with all properties defined: diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index 315728aceb..cd92ff8467 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -260,6 +260,18 @@ def _normalize_identifiers(key: str) -> None: return self + @model_validator(mode="after") + def _inherit_project_config_in_cicd_bot(self) -> Self: + if self.cicd_bot: + # inherit the project-level settings into the CICD bot if they have not been explicitly overridden + if self.cicd_bot.auto_categorize_changes_ is None: + self.cicd_bot.auto_categorize_changes_ = self.plan.auto_categorize_changes + + if self.cicd_bot.pr_include_unmodified_ is None: + self.cicd_bot.pr_include_unmodified_ = self.plan.include_unmodified + + return self + def get_default_test_connection( self, default_catalog: t.Optional[str] = None, diff --git a/sqlmesh/integrations/github/cicd/config.py b/sqlmesh/integrations/github/cicd/config.py index 33312c4ad7..8f84db47c8 100644 --- a/sqlmesh/integrations/github/cicd/config.py +++ b/sqlmesh/integrations/github/cicd/config.py @@ -7,6 +7,7 @@ from sqlmesh.core.config.base import BaseConfig from sqlmesh.utils.date import TimeLike from sqlmesh.utils.pydantic import model_validator +from sqlmesh.core.console import get_console class MergeMethod(str, Enum): @@ -22,10 +23,12 @@ class GithubCICDBotConfig(BaseConfig): enable_deploy_command: bool = False merge_method: t.Optional[MergeMethod] = None command_namespace: t.Optional[str] = None - auto_categorize_changes: CategorizerConfig = CategorizerConfig.all_off() + auto_categorize_changes_: t.Optional[CategorizerConfig] = Field( + default=None, alias="auto_categorize_changes" + ) default_pr_start: t.Optional[TimeLike] = None - skip_pr_backfill: bool = True - pr_include_unmodified: t.Optional[bool] = None + skip_pr_backfill_: t.Optional[bool] = Field(default=None, alias="skip_pr_backfill") + pr_include_unmodified_: t.Optional[bool] = Field(default=None, alias="pr_include_unmodified") run_on_deploy_to_prod: bool = False pr_environment_name: t.Optional[str] = None pr_min_intervals: t.Optional[int] = None @@ -50,6 +53,26 @@ def prod_branch_names(self) -> t.List[str]: return [self.prod_branch_names_] return ["main", "master"] + @property + def auto_categorize_changes(self) -> CategorizerConfig: + return self.auto_categorize_changes_ or CategorizerConfig.all_off() + + @property + def pr_include_unmodified(self) -> bool: + return self.pr_include_unmodified_ or False + + @property + def skip_pr_backfill(self) -> bool: + if self.skip_pr_backfill_ is None: + get_console().log_warning( + "`skip_pr_backfill` is unset, defaulting it to `true` (no data will be backfilled).\n" + "Future versions of SQLMesh will default to `skip_pr_backfill: false` to align with the CLI default behaviour.\n" + "If you would like to preserve the current behaviour and remove this warning, please explicitly set `skip_pr_backfill: true` in the bot config.\n\n" + "For more information on configuring the bot, see: https://sqlmesh.readthedocs.io/en/stable/integrations/github/" + ) + return True + return self.skip_pr_backfill_ + FIELDS_FOR_ANALYTICS: t.ClassVar[t.Set[str]] = { "invalidate_environment_after_deploy", "enable_deploy_command", diff --git a/tests/core/analytics/test_collector.py b/tests/core/analytics/test_collector.py index 957db3a003..9eaca07ef3 100644 --- a/tests/core/analytics/test_collector.py +++ b/tests/core/analytics/test_collector.py @@ -145,7 +145,7 @@ def test_on_cicd_command(collector: AnalyticsCollector, mocker: MockerFixture): { "seq_num": 1, "event_type": "CICD_COMMAND", - "event": '{"command_name": "test_cicd", "command_args": ["arg_1", "arg_2"], "parent_command_names": ["parent_a", "parent_b"], "cicd_bot_config": {"invalidate_environment_after_deploy": true, "enable_deploy_command": false, "auto_categorize_changes": {"external": "off", "python": "off", "sql": "off", "seed": "off"}, "skip_pr_backfill": true, "run_on_deploy_to_prod": false}}', + "event": '{"command_name": "test_cicd", "command_args": ["arg_1", "arg_2"], "parent_command_names": ["parent_a", "parent_b"], "cicd_bot_config": {"invalidate_environment_after_deploy": true, "enable_deploy_command": false, "run_on_deploy_to_prod": false}}', **common_fields, } ), diff --git a/tests/integrations/github/cicd/test_config.py b/tests/integrations/github/cicd/test_config.py index c100a1fa98..e4424cf3ba 100644 --- a/tests/integrations/github/cicd/test_config.py +++ b/tests/integrations/github/cicd/test_config.py @@ -34,11 +34,11 @@ def test_load_yaml_config_default(tmp_path): assert config.cicd_bot.invalidate_environment_after_deploy assert config.cicd_bot.merge_method is None assert config.cicd_bot.command_namespace is None - assert config.cicd_bot.auto_categorize_changes == CategorizerConfig.all_off() + assert config.cicd_bot.auto_categorize_changes == config.plan.auto_categorize_changes assert config.cicd_bot.default_pr_start is None assert not config.cicd_bot.enable_deploy_command assert config.cicd_bot.skip_pr_backfill - assert config.cicd_bot.pr_include_unmodified is None + assert not config.cicd_bot.pr_include_unmodified assert config.cicd_bot.pr_environment_name is None assert config.cicd_bot.prod_branch_names == ["main", "master"] assert not config.cicd_bot.pr_min_intervals @@ -115,11 +115,11 @@ def test_load_python_config_defaults(tmp_path): assert config.cicd_bot.invalidate_environment_after_deploy assert config.cicd_bot.merge_method is None assert config.cicd_bot.command_namespace is None - assert config.cicd_bot.auto_categorize_changes == CategorizerConfig.all_off() + assert config.cicd_bot.auto_categorize_changes == config.plan.auto_categorize_changes assert config.cicd_bot.default_pr_start is None assert not config.cicd_bot.enable_deploy_command assert config.cicd_bot.skip_pr_backfill - assert config.cicd_bot.pr_include_unmodified is None + assert not config.cicd_bot.pr_include_unmodified assert config.cicd_bot.pr_environment_name is None assert config.cicd_bot.prod_branch_names == ["main", "master"] assert not config.cicd_bot.pr_min_intervals @@ -258,3 +258,35 @@ def test_ttl_in_past(tmp_path): match="TTL '1 week' is in the past. Please specify a relative time in the future. Ex: `in 1 week` instead of `1 week`.", ): load_config_from_paths(Config, project_paths=[tmp_path / "config.yaml"]) + + +def test_properties_inherit_from_project_config(tmp_path): + (tmp_path / "config.yaml").write_text(""" +plan: + auto_categorize_changes: + external: off + python: full + sql: off + seed: full + include_unmodified: true + +cicd_bot: + type: github + +model_defaults: + dialect: duckdb +""") + + config = load_config_from_paths(Config, [tmp_path / "config.yaml"]) + + assert ( + config.cicd_bot.auto_categorize_changes + == config.plan.auto_categorize_changes + == CategorizerConfig( + external=AutoCategorizationMode.OFF, + python=AutoCategorizationMode.FULL, + sql=AutoCategorizationMode.OFF, + seed=AutoCategorizationMode.FULL, + ) + ) + assert config.cicd_bot.pr_include_unmodified == config.plan.include_unmodified == True