-
Notifications
You must be signed in to change notification settings - Fork 359
Fix: Unit test CTE failures not being captured #5081
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -317,6 +317,13 @@ def _to_hashable(x: t.Any) -> t.Any: | |
| # | ||
| # This is a bit of a hack, but it's a way to get the best of both worlds. | ||
| args: t.List[t.Any] = [] | ||
|
|
||
| failed_subtest = "" | ||
|
|
||
| if subtest := getattr(self, "_subtest", None): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it even makes sense at this point to rely on subtests rather than write our own context manager which sets the current CTE and then resets it.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any other benefits that subtests offer us here?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had the same thought as I was working on this, I think it's merit is that it triggers the callback Not really a strong argument for it, but I'm not sure if there's a strong argument against it either. Happy to reconsider if you had a better thought with the custom CM |
||
| if cte := subtest.params.get("cte"): | ||
| failed_subtest = f" (CTE {cte})" | ||
|
Comment on lines
+321
to
+325
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to differentiate the CTE mismatch from the output one, e.g if both subtests fail in a single test that'd be the output: FF
======================================================================
----------------------------------------------------------------------
FAIL: test_example_full_model (/Users/vaggelisd/Desktop/tobiko/test_dir/tests/test.yaml)
----------------------------------------------------------------------
Data mismatch (CTE "filtered_orders_cte")
┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃ Row ┃ id: Expected ┃ id: Actual ┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ 0 │ 5 │ 1 │
└──────────┴─────────────────────────┴─────────────────────┘
----------------------------------------------------------------------
FAIL: test_example_full_model (/Users/vaggelisd/Desktop/tobiko/test_dir/tests/test.yaml)
----------------------------------------------------------------------
Data mismatch
┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃ Row ┃ id: Expected ┃ id: Actual ┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ 0 │ 2 │ 1 │
└──────────┴─────────────────────────┴─────────────────────┘
----------------------------------------------------------------------
Test Failure Summary
======================================================================
Ran 1 tests against duckdb in 0.14 seconds.
Failed tests (1):
• /Users/vaggelisd/Desktop/tobiko/test_dir/tests/test.yaml::test_example_full_model
====================================================================== |
||
|
|
||
| if expected.shape != actual.shape: | ||
| _raise_if_unexpected_columns(expected.columns, actual.columns) | ||
|
|
||
|
|
@@ -325,13 +332,13 @@ def _to_hashable(x: t.Any) -> t.Any: | |
| missing_rows = _row_difference(expected, actual) | ||
| if not missing_rows.empty: | ||
| args[0] += f"\n\nMissing rows:\n\n{missing_rows}" | ||
| args.append(df_to_table("Missing rows", missing_rows)) | ||
| args.append(df_to_table(f"Missing rows{failed_subtest}", missing_rows)) | ||
|
|
||
| unexpected_rows = _row_difference(actual, expected) | ||
|
|
||
| if not unexpected_rows.empty: | ||
| args[0] += f"\n\nUnexpected rows:\n\n{unexpected_rows}" | ||
| args.append(df_to_table("Unexpected rows", unexpected_rows)) | ||
| args.append(df_to_table(f"Unexpected rows{failed_subtest}", unexpected_rows)) | ||
|
|
||
| else: | ||
| diff = expected.compare(actual).rename(columns={"self": "exp", "other": "act"}) | ||
|
|
@@ -341,7 +348,8 @@ def _to_hashable(x: t.Any) -> t.Any: | |
| diff.rename(columns={"exp": "Expected", "act": "Actual"}, inplace=True) | ||
| if self.verbosity == Verbosity.DEFAULT: | ||
| args.extend( | ||
| df_to_table("Data mismatch", df) for df in _split_df_by_column_pairs(diff) | ||
| df_to_table(f"Data mismatch{failed_subtest}", df) | ||
| for df in _split_df_by_column_pairs(diff) | ||
| ) | ||
| else: | ||
| from pandas import MultiIndex | ||
|
|
@@ -351,7 +359,8 @@ def _to_hashable(x: t.Any) -> t.Any: | |
| col_diff = diff[col] | ||
| if not col_diff.empty: | ||
| table = df_to_table( | ||
| f"[bold red]Column '{col}' mismatch[/bold red]", col_diff | ||
| f"[bold red]Column '{col}' mismatch{failed_subtest}[/bold red]", | ||
| col_diff, | ||
| ) | ||
| args.append(table) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,8 @@ | |
| import typing as t | ||
| import unittest | ||
|
|
||
| from sqlmesh.core.test.definition import ModelTest | ||
|
|
||
| if t.TYPE_CHECKING: | ||
| ErrorType = t.Union[ | ||
| t.Tuple[type[BaseException], BaseException, types.TracebackType], | ||
|
|
@@ -42,7 +44,10 @@ def addSubTest( | |
| exctype, value, tb = err | ||
| err = (exctype, value, None) # type: ignore | ||
|
|
||
| super().addSubTest(test, subtest, err) | ||
| if err[0] and issubclass(err[0], test.failureException): | ||
| self.addFailure(test, err) | ||
| else: | ||
| self.addError(test, err) | ||
|
|
||
| def _print_char(self, char: str) -> None: | ||
| from sqlmesh.core.console import TerminalConsole | ||
|
|
@@ -117,4 +122,14 @@ def merge(self, other: ModelTextTestResult) -> None: | |
| skipped_args = other.skipped[0] | ||
| self.addSkip(skipped_args[0], skipped_args[1]) | ||
|
|
||
| self.testsRun += 1 | ||
| self.testsRun += other.testsRun | ||
|
|
||
| def get_fail_and_error_tests(self) -> t.List[ModelTest]: | ||
| # If tests contain failed subtests (e.g testing CTE outputs) we don't want | ||
| # to report it as different test failures | ||
| test_name_to_test = { | ||
| test.test_name: test | ||
| for test, _ in self.failures + self.errors | ||
| if isinstance(test, ModelTest) | ||
| } | ||
| return list(test_name_to_test.values()) | ||
|
Comment on lines
+127
to
+135
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simple abstraction to simplify |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,7 @@ | |
| from pathlib import Path | ||
| import unittest | ||
| from unittest.mock import call, patch | ||
| from shutil import copyfile, rmtree | ||
| from shutil import rmtree | ||
|
|
||
| import pandas as pd # noqa: TID253 | ||
| import pytest | ||
|
|
@@ -87,6 +87,7 @@ def _check_successful_or_raise( | |
| assert result is not None | ||
| if not result.wasSuccessful(): | ||
| error_or_failure_traceback = (result.errors or result.failures)[0][1] | ||
| print(error_or_failure_traceback) | ||
| if expected_msg: | ||
| assert expected_msg in error_or_failure_traceback | ||
| else: | ||
|
|
@@ -2316,6 +2317,13 @@ def test_test_with_resolve_template_macro(tmp_path: Path): | |
|
|
||
| @use_terminal_console | ||
| def test_test_output(tmp_path: Path) -> None: | ||
| def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: | ||
| with open(test_file, "r") as file: | ||
| filedata = file.read() | ||
|
|
||
| with open(new_test_file, "w") as file: | ||
| file.write(filedata.replace("test_example_full_model", f"test_{index}")) | ||
|
|
||
| init_example_project(tmp_path, engine_type="duckdb") | ||
|
|
||
| original_test_file = tmp_path / "tests" / "test_full_model.yaml" | ||
|
|
@@ -2407,8 +2415,8 @@ def test_test_output(tmp_path: Path) -> None: | |
|
|
||
| # Case 3: Assert that concurrent execution is working properly | ||
| for i in range(50): | ||
| copyfile(original_test_file, tmp_path / "tests" / f"test_success_{i}.yaml") | ||
| copyfile(new_test_file, tmp_path / "tests" / f"test_failure_{i}.yaml") | ||
| copy_test_file(original_test_file, tmp_path / "tests" / f"test_success_{i}.yaml", i) | ||
| copy_test_file(new_test_file, tmp_path / "tests" / f"test_failure_{i}.yaml", i) | ||
|
Comment on lines
+2418
to
+2419
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously we copied these files directly but unit tests don't work well if the name is duplicated, I'll follow up with a UX fix |
||
|
|
||
| with capture_output() as captured_output: | ||
| context.test() | ||
|
|
@@ -3327,3 +3335,96 @@ def execute(context: ExecutionContext, **kwargs: t.Any) -> pd.DataFrame: | |
| context=context, | ||
| ) | ||
| _check_successful_or_raise(test_default_vars.run()) | ||
|
|
||
|
|
||
| @use_terminal_console | ||
| def test_cte_failure(tmp_path: Path) -> None: | ||
| models_dir = tmp_path / "models" | ||
| models_dir.mkdir() | ||
| (models_dir / "foo.sql").write_text( | ||
| """ | ||
| MODEL ( | ||
| name test.foo, | ||
| kind full | ||
| ); | ||
|
|
||
| with model_cte as ( | ||
| SELECT 1 AS id | ||
| ) | ||
| SELECT id FROM model_cte | ||
| """ | ||
| ) | ||
|
|
||
| config = Config( | ||
| default_connection=DuckDBConnectionConfig(), | ||
| model_defaults=ModelDefaultsConfig(dialect="duckdb"), | ||
| ) | ||
| context = Context(paths=tmp_path, config=config) | ||
|
|
||
| expected_cte_failure_output = """Data mismatch (CTE "model_cte") | ||
| ┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ | ||
| ┃ Row ┃ id: Expected ┃ id: Actual ┃ | ||
| ┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ | ||
| │ 0 │ 2 │ 1 │ | ||
| └──────────┴─────────────────────────┴─────────────────────┘""" | ||
|
|
||
| expected_query_failure_output = """Data mismatch | ||
| ┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ | ||
| ┃ Row ┃ id: Expected ┃ id: Actual ┃ | ||
| ┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ | ||
| │ 0 │ 2 │ 1 │ | ||
| └──────────┴─────────────────────────┴─────────────────────┘""" | ||
|
|
||
| # Case 1: Ensure that a single CTE failure is reported correctly | ||
| tests_dir = tmp_path / "tests" | ||
| tests_dir.mkdir() | ||
| (tests_dir / "test_foo.yaml").write_text( | ||
| """ | ||
| test_foo: | ||
| model: test.foo | ||
| outputs: | ||
| ctes: | ||
| model_cte: | ||
| rows: | ||
| - id: 2 | ||
| query: | ||
| - id: 1 | ||
| """ | ||
| ) | ||
|
|
||
| with capture_output() as captured_output: | ||
| context.test() | ||
|
|
||
| output = captured_output.stdout | ||
|
|
||
| assert expected_cte_failure_output in output | ||
| assert expected_query_failure_output not in output | ||
|
|
||
| assert "Ran 1 tests" in output | ||
| assert "Failed tests (1)" in output | ||
|
|
||
| # Case 2: Ensure that both CTE and query failures are reported correctly | ||
| (tests_dir / "test_foo.yaml").write_text( | ||
| """ | ||
| test_foo: | ||
| model: test.foo | ||
| outputs: | ||
| ctes: | ||
| model_cte: | ||
| rows: | ||
| - id: 2 | ||
| query: | ||
| - id: 2 | ||
| """ | ||
| ) | ||
|
|
||
| with capture_output() as captured_output: | ||
| context.test() | ||
|
|
||
| output = captured_output.stdout | ||
|
|
||
| assert expected_cte_failure_output in output | ||
| assert expected_query_failure_output in output | ||
|
|
||
| assert "Ran 1 tests" in output | ||
| assert "Failed tests (1)" in output | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dead code,
infosis no longer used