From 8ff679b80439820ac1f8916403d0ffa67cecab6d Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Mon, 14 Jul 2025 00:19:28 +0000 Subject: [PATCH] Fix(table_diff): Properly handle null check for array types in data sample --- sqlmesh/core/console.py | 10 +++++- tests/core/test_table_diff.py | 63 +++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py index 86de61c829..b78d403a87 100644 --- a/sqlmesh/core/console.py +++ b/sqlmesh/core/console.py @@ -2711,8 +2711,16 @@ def _cells_match(x: t.Any, y: t.Any) -> bool: def _normalize(val: t.Any) -> t.Any: # Convert Pandas null to Python null for the purposes of comparison to prevent errors like the following on boolean fields: # - TypeError: boolean value of NA is ambiguous - if pd.isnull(val): + # note pd.isnull() returns either a bool or a ndarray[bool] depending on if the input + # is scalar or an array + isnull = pd.isnull(val) + + if isinstance(isnull, bool): # scalar + if isnull: + val = None + elif all(isnull): # array val = None + return list(val) if isinstance(val, (pd.Series, np.ndarray)) else val return _normalize(x) == _normalize(y) diff --git a/tests/core/test_table_diff.py b/tests/core/test_table_diff.py index 9ea0d64771..64096a6637 100644 --- a/tests/core/test_table_diff.py +++ b/tests/core/test_table_diff.py @@ -504,6 +504,69 @@ def test_data_diff_array_dict(sushi_context_fixed_date): assert stripped_output == stripped_expected +def test_data_diff_array_struct_query(): + engine_adapter = DuckDBConnectionConfig().create_engine_adapter() + + columns_to_types = {"key": exp.DataType.build("int"), "value": exp.DataType.build("int")} + + engine_adapter.create_table("table_diff_source", columns_to_types) + engine_adapter.create_table("table_diff_target", columns_to_types) + + engine_adapter.execute( + "insert into table_diff_source (key, value) values (1, 1), (1, 2), (1, 3)" + ) + engine_adapter.execute( + "insert into table_diff_target (key, value) values (1, 1), (1, 3), (1, 2)" + ) + + engine_adapter.execute( + "create view src_view as select key, array_agg(value) as val_arr, map(['k','v'], [10,11]) as val_map from table_diff_source group by 1" + ) + engine_adapter.execute( + "create view target_view as select key, array_agg(value) as val_arr, map(['k','v'],[11,10]) as val_map from table_diff_target group by 1" + ) + + table_diff = TableDiff( + adapter=engine_adapter, + source="src_view", + target="target_view", + source_alias="dev", + target_alias="prod", + on=["key"], + ) + + diff = table_diff.row_diff() + + output = capture_console_output("show_row_diff", row_diff=diff) + + assert ( + strip_ansi_codes(output) + == """Row Counts: +└── PARTIAL MATCH: 1 rows (100.0%) + +COMMON ROWS column comparison stats: + pct_match +val_arr 0.0 +val_map 0.0 + + +COMMON ROWS sample data differences: +Column: val_arr +┏━━━━━┳━━━━━━━━━┳━━━━━━━━━┓ +┃ key ┃ DEV ┃ PROD ┃ +┡━━━━━╇━━━━━━━━━╇━━━━━━━━━┩ +│ 1 │ [1 2 3] │ [1 3 2] │ +└─────┴─────────┴─────────┘ +Column: val_map +┏━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ +┃ key ┃ DEV ┃ PROD ┃ +┡━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ +│ 1 │ {'k': 10, 'v': 11} │ {'k': 11, 'v': 10} │ +└─────┴────────────────────┴────────────────────┘ +""".strip() + ) + + def test_data_diff_nullable_booleans(): engine_adapter = DuckDBConnectionConfig().create_engine_adapter()