From f17f73eb0640f75a13d4120bf3534e67cd9a22ee Mon Sep 17 00:00:00 2001 From: Vincent Chan Date: Thu, 28 Aug 2025 11:53:20 -0700 Subject: [PATCH] Chore: Allow duplicate keys in dbt project yaml files --- sqlmesh/dbt/common.py | 4 +++- sqlmesh/utils/yaml.py | 21 +++++++++++++++++++++ tests/utils/test_yaml.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/sqlmesh/dbt/common.py b/sqlmesh/dbt/common.py index ec928576ed..c74fd933da 100644 --- a/sqlmesh/dbt/common.py +++ b/sqlmesh/dbt/common.py @@ -36,7 +36,9 @@ def load_yaml(source: str | Path) -> t.Dict: try: - return load(source, render_jinja=False) + return load( + source, render_jinja=False, allow_duplicate_keys=True, keep_last_duplicate_key=True + ) except DuplicateKeyError as ex: raise ConfigError(f"{source}: {ex}" if isinstance(source, Path) else f"{ex}") diff --git a/sqlmesh/utils/yaml.py b/sqlmesh/utils/yaml.py index 0eb18d8188..d72e9d49e5 100644 --- a/sqlmesh/utils/yaml.py +++ b/sqlmesh/utils/yaml.py @@ -8,6 +8,7 @@ from pathlib import Path from ruamel import yaml +from ruamel.yaml.constructor import SafeConstructor from sqlmesh.core.constants import VAR from sqlmesh.utils.errors import SQLMeshError @@ -32,12 +33,30 @@ def YAML(typ: t.Optional[str] = "safe") -> yaml.YAML: return yaml_obj +class SafeConstructorOverride(SafeConstructor): + def check_mapping_key( + self, + node: t.Any, + key_node: t.Any, + mapping: t.Any, + key: t.Any, + value: t.Any, + ) -> bool: + """This function normally returns True if key is unique. + + It is only used by the construct_mapping function. By always returning True, + keys will always be updated and so the last value will be kept for mappings. + """ + return True + + def load( source: str | Path, raise_if_empty: bool = True, render_jinja: bool = True, allow_duplicate_keys: bool = False, variables: t.Optional[t.Dict[str, t.Any]] = None, + keep_last_duplicate_key: bool = False, ) -> t.Dict: """Loads a YAML object from either a raw string or a file.""" path: t.Optional[Path] = None @@ -56,6 +75,8 @@ def load( ) yaml = YAML() + if allow_duplicate_keys and keep_last_duplicate_key: + yaml.Constructor = SafeConstructorOverride yaml.allow_duplicate_keys = allow_duplicate_keys contents = yaml.load(source) if contents is None: diff --git a/tests/utils/test_yaml.py b/tests/utils/test_yaml.py index f2734576b6..5a2e04e5be 100644 --- a/tests/utils/test_yaml.py +++ b/tests/utils/test_yaml.py @@ -45,3 +45,37 @@ def test_yaml() -> None: decimal_value = Decimal(123.45) assert yaml.load(yaml.dump(decimal_value)) == str(decimal_value) + + +def test_load_keep_last_duplicate_key() -> None: + input_str = """ +name: first_name +name: second_name +name: third_name + +foo: bar + +mapping: + key: first_value + key: second_value + key: third_value + +sequence: + - one + - two +""" + # Default behavior of ruamel is to keep the first key encountered + assert yaml.load(input_str, allow_duplicate_keys=True) == { + "name": "first_name", + "foo": "bar", + "mapping": {"key": "first_value"}, + "sequence": ["one", "two"], + } + + # Test keeping last key + assert yaml.load(input_str, allow_duplicate_keys=True, keep_last_duplicate_key=True) == { + "name": "third_name", + "foo": "bar", + "mapping": {"key": "third_value"}, + "sequence": ["one", "two"], + }