From 7d74e4404f98f71ddc248eac1c0b7546d99af48d Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Mon, 4 Aug 2025 13:50:02 -0700 Subject: [PATCH] Fix: Make gateway names case-insesitive --- docs/concepts/models/external_models.md | 4 +- docs/guides/configuration.md | 6 +- docs/reference/configuration.md | 4 +- sqlmesh/core/config/root.py | 15 ++- sqlmesh/core/context.py | 4 +- sqlmesh/core/loader.py | 1 + tests/core/test_context.py | 135 ++++++++++++++++++++++++ 7 files changed, 158 insertions(+), 11 deletions(-) diff --git a/docs/concepts/models/external_models.md b/docs/concepts/models/external_models.md index 922daac6b0..ef2b39a10c 100644 --- a/docs/concepts/models/external_models.md +++ b/docs/concepts/models/external_models.md @@ -56,6 +56,8 @@ If SQLMesh does not have access to an external table's metadata, the table will In some use-cases such as [isolated systems with multiple gateways](../../guides/isolated_systems.md#multiple-gateways), there are external models that only exist on a certain gateway. +**Gateway names are case-insensitive in external model configurations.** You can specify the gateway name using any case (e.g., `gateway: dev`, `gateway: DEV`, `gateway: Dev`) and SQLMesh will handle the matching correctly. + Consider the following model that queries an external table with a dynamic database based on the current gateway: ``` @@ -100,7 +102,7 @@ This example demonstrates the structure of a `external_models.yaml` file: column_d: float - name: external_db.gateway_specific_external_table description: Another external table that only exists when the gateway is set to "test" - gateway: test + gateway: test # Case-insensitive - could also be "TEST", "Test", etc. columns: column_e: int column_f: varchar diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 6e14d1f605..24371f30d0 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -322,7 +322,7 @@ SQLMesh creates schemas, physical tables, and views in the data warehouse/engine The default SQLMesh behavior described in the FAQ is appropriate for most deployments, but you can override *where* SQLMesh creates physical tables and views with the `physical_schema_mapping`, `environment_suffix_target`, and `environment_catalog_mapping` configuration options. -You can also override *what* the physical tables are called by using the `physical_table_naming_convention` option. +You can also override *what* the physical tables are called by using the `physical_table_naming_convention` option. These options are in the [environments](../reference/configuration.md#environments) section of the configuration reference page. @@ -767,7 +767,9 @@ Even though the second change should have been a metadata change (thus not requi The `gateways` configuration defines how SQLMesh should connect to the data warehouse, state backend, and scheduler. These options are in the [gateway](../reference/configuration.md#gateway) section of the configuration reference page. -Each gateway key represents a unique gateway name and configures its connections. For example, this configures the `my_gateway` gateway: +Each gateway key represents a unique gateway name and configures its connections. **Gateway names are case-insensitive** - SQLMesh automatically normalizes gateway names to lowercase during configuration validation. This means you can use any case in your configuration files (e.g., `MyGateway`, `mygateway`, `MYGATEWAY`) and they will all work correctly. + +For example, this configures the `my_gateway` gateway: === "YAML" diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 06aed36b53..df3fcf930d 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -141,7 +141,7 @@ SQLMesh UI settings. The `gateways` dictionary defines how SQLMesh should connect to the data warehouse, state backend, test backend, and scheduler. -It takes one or more named `gateway` configuration keys, each of which can define its own connections. A named gateway does not need to specify all four components and will use defaults if any are omitted - more information is provided about [gateway defaults](#gatewayconnection-defaults) below. +It takes one or more named `gateway` configuration keys, each of which can define its own connections. **Gateway names are case-insensitive** - SQLMesh normalizes all gateway names to lowercase during configuration validation, allowing you to use any case when referencing gateways. A named gateway does not need to specify all four components and will use defaults if any are omitted - more information is provided about [gateway defaults](#gatewayconnection-defaults) below. For example, a project might configure the `gate1` and `gate2` gateways: @@ -247,7 +247,7 @@ If a configuration contains multiple gateways, SQLMesh will use the first one in | Option | Description | Type | Required | | ----------------- | ---------------------------------------------------------------------------------------------------------------------------- | :----: | :------: | -| `default_gateway` | The name of a gateway to use if one is not provided explicitly (Default: the gateway defined first in the `gateways` option) | string | N | +| `default_gateway` | The name of a gateway to use if one is not provided explicitly (Default: the gateway defined first in the `gateways` option). Gateway names are case-insensitive. | string | N | ### Default connections/scheduler diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index 4dd28f97a5..df8e2637da 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -62,6 +62,9 @@ def gateways_ensure_dict(value: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: GatewayConfig.parse_obj(value) return {"": value} except Exception: + # Normalize all gateway keys to lowercase for case-insensitive matching + if isinstance(value, dict): + return {k.lower(): v for k, v in value.items()} return value @@ -298,19 +301,23 @@ def get_gateway(self, name: t.Optional[str] = None) -> GatewayConfig: if isinstance(self.gateways, dict): if name is None: if self.default_gateway: - if self.default_gateway not in self.gateways: + # Normalize default_gateway name to lowercase for lookup + default_key = self.default_gateway.lower() + if default_key not in self.gateways: raise ConfigError(f"Missing gateway with name '{self.default_gateway}'") - return self.gateways[self.default_gateway] + return self.gateways[default_key] if "" in self.gateways: return self.gateways[""] return first(self.gateways.values()) - if name not in self.gateways: + # Normalize lookup name to lowercase since gateway keys are already lowercase + lookup_key = name.lower() + if lookup_key not in self.gateways: raise ConfigError(f"Missing gateway with name '{name}'.") - return self.gateways[name] + return self.gateways[lookup_key] if name is not None: raise ConfigError("Gateway name is not supported when only one gateway is configured.") return self.gateways diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 5a0531209a..18df3a01fd 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -400,9 +400,9 @@ def __init__( self.environment_ttl = self.config.environment_ttl self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments) self.auto_categorize_changes = self.config.plan.auto_categorize_changes - self.selected_gateway = gateway or self.config.default_gateway_name + self.selected_gateway = (gateway or self.config.default_gateway_name).lower() - gw_model_defaults = self.config.gateways[self.selected_gateway].model_defaults + gw_model_defaults = self.config.get_gateway(self.selected_gateway).model_defaults if gw_model_defaults: # Merge global model defaults with the selected gateway's, if it's overriden global_defaults = self.config.model_defaults.model_dump(exclude_unset=True) diff --git a/sqlmesh/core/loader.py b/sqlmesh/core/loader.py index 59a118124c..f4c9147d1b 100644 --- a/sqlmesh/core/loader.py +++ b/sqlmesh/core/loader.py @@ -374,6 +374,7 @@ def _load(path: Path) -> t.List[Model]: # however, if there is a gateway defined, gateway-specific models take precedence if gateway: + gateway = gateway.lower() for model in external_models: if model.gateway == gateway: if model.fqn in models and models[model.fqn].gateway == gateway: diff --git a/tests/core/test_context.py b/tests/core/test_context.py index 26fc542632..6b98bf5d25 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -1221,6 +1221,12 @@ def _get_external_model_names(gateway=None): # gateway explicitly set to prod; prod model should now show assert "prod_raw.model1" in _get_external_model_names(gateway="prod") + # test uppercase gateway name should match lowercase external model definition + assert "prod_raw.model1" in _get_external_model_names(gateway="PROD") + + # test mixed case gateway name should also work + assert "prod_raw.model1" in _get_external_model_names(gateway="Prod") + def test_disabled_model(copy_to_temp_path): path = copy_to_temp_path("examples/sushi") @@ -2867,3 +2873,132 @@ def test_model_defaults_statements_with_on_virtual_update(tmp_path: Path): # Default statements should come first assert model.on_virtual_update[0].sql() == "SELECT 'Model-defailt virtual update' AS message" assert model.on_virtual_update[1].sql() == "SELECT 'Model-specific update' AS message" + + +def test_uppercase_gateway_external_models(tmp_path): + # Create a temporary SQLMesh project with uppercase gateway name + config_py = tmp_path / "config.py" + config_py.write_text(""" +from sqlmesh.core.config import Config, DuckDBConnectionConfig, GatewayConfig, ModelDefaultsConfig + +config = Config( + gateways={ + "UPPERCASE_GATEWAY": GatewayConfig( + connection=DuckDBConnectionConfig(), + ), + }, + default_gateway="UPPERCASE_GATEWAY", + model_defaults=ModelDefaultsConfig(dialect="duckdb"), +) +""") + + # Create external models file with lowercase gateway name (this should still match uppercase) + external_models_yaml = tmp_path / "external_models.yaml" + external_models_yaml.write_text(""" +- name: test_db.uppercase_gateway_table + description: Test external model with lowercase gateway name that should match uppercase gateway + gateway: uppercase_gateway # lowercase in external model, but config has UPPERCASE_GATEWAY + columns: + id: int + name: text + +- name: test_db.no_gateway_table + description: Test external model without gateway (should be available for all gateways) + columns: + id: int + name: text +""") + + # Create a model that references the external model + models_dir = tmp_path / "models" + models_dir.mkdir() + model_sql = models_dir / "test_model.sql" + model_sql.write_text(""" +MODEL ( + name test.my_model, + kind FULL, +); + +SELECT * FROM test_db.uppercase_gateway_table; +""") + + # Test with uppercase gateway name - this should find both models + context_uppercase = Context(paths=[tmp_path], gateway="UPPERCASE_GATEWAY") + + # Verify external model with lowercase gateway name in YAML is found when using uppercase gateway + gateway_specific_models = [ + model + for model in context_uppercase.models.values() + if model.name == "test_db.uppercase_gateway_table" + ] + assert len(gateway_specific_models) == 1, ( + f"External model with lowercase gateway name should be found with uppercase gateway. Found {len(gateway_specific_models)} models" + ) + + # Verify external model without gateway is also found + no_gateway_models = [ + model + for model in context_uppercase.models.values() + if model.name == "test_db.no_gateway_table" + ] + assert len(no_gateway_models) == 1, ( + f"External model without gateway should be found. Found {len(no_gateway_models)} models" + ) + + # Check that the column types are properly loaded (not UNKNOWN) + external_model = gateway_specific_models[0] + column_types = {name: str(dtype) for name, dtype in external_model.columns_to_types.items()} + assert column_types == {"id": "INT", "name": "TEXT"}, ( + f"External model column types should not be UNKNOWN, got: {column_types}" + ) + + # Test that when using a different case for the gateway parameter, we get the same results + context_mixed_case = Context( + paths=[tmp_path], gateway="uppercase_gateway" + ) # lowercase parameter + + gateway_specific_models_mixed = [ + model + for model in context_mixed_case.models.values() + if model.name == "test_db.uppercase_gateway_table" + ] + # This should work but might fail if case sensitivity is not handled correctly + assert len(gateway_specific_models_mixed) == 1, ( + f"External model should be found regardless of gateway parameter case. Found {len(gateway_specific_models_mixed)} models" + ) + + # Test a case that should demonstrate the potential issue: + # Create another external model file with uppercase gateway name in the YAML + external_models_yaml_uppercase = tmp_path / "external_models_uppercase.yaml" + external_models_yaml_uppercase.write_text(""" +- name: test_db.uppercase_in_yaml + description: Test external model with uppercase gateway name in YAML + gateway: UPPERCASE_GATEWAY # uppercase in external model yaml + columns: + id: int + status: text +""") + + # Add the new external models file to the project + models_dir = tmp_path / "external_models" + models_dir.mkdir(exist_ok=True) + (models_dir / "uppercase_gateway_models.yaml").write_text(""" +- name: test_db.uppercase_in_yaml + description: Test external model with uppercase gateway name in YAML + gateway: UPPERCASE_GATEWAY # uppercase in external model yaml + columns: + id: int + status: text +""") + + # Reload context to pick up the new external models + context_reloaded = Context(paths=[tmp_path], gateway="UPPERCASE_GATEWAY") + + uppercase_in_yaml_models = [ + model + for model in context_reloaded.models.values() + if model.name == "test_db.uppercase_in_yaml" + ] + assert len(uppercase_in_yaml_models) == 1, ( + f"External model with uppercase gateway in YAML should be found. Found {len(uppercase_in_yaml_models)} models" + )