From 3a7511af573a2ddd8bf6de82fc3a145f9b49e33d Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 11:17:07 +0100
Subject: [PATCH 01/10] feat: Expose a user-friendly version of
 `FailureInfo._lf`

---
 dataframely/filter_result.py          | 21 +++++++++++++++--
 docs/guides/features/serialization.md |  8 +++----
 docs/guides/quickstart.md             | 24 +++++++++++++++----
 tests/column_types/test_list.py       |  4 +++-
 tests/column_types/test_struct.py     |  6 ++---
 tests/schema/test_filter.py           | 34 ++++++++++++++++++++++++++-
 6 files changed, 81 insertions(+), 16 deletions(-)

diff --git a/dataframely/filter_result.py b/dataframely/filter_result.py
index be8ff1ad..efdaa64b 100644
--- a/dataframely/filter_result.py
+++ b/dataframely/filter_result.py
@@ -111,8 +111,25 @@ def _df(self) -> pl.DataFrame:
         return self._lf.collect()
 
     def invalid(self) -> pl.DataFrame:
-        """The rows of the original data frame containing the invalid rows."""
-        return self._df.drop(self._rule_columns)
+        """The rows of the original data frame containing the invalid rows.
+
+        For each row, this includes:
+            1. All columns of the original data frame.
+            2. One column for each rule indicating whether the value of the column
+             is `valid`, `invalid`, or `unknown`.
+
+        If a rule column has a value of `unknown` for a given row, that means the rule
+        could not be evaluated reliably.
+        This may happen when calling :meth:`Collection.filter` with collection-level
+        filters in addition to member-level rules, or when calling :meth:`Schema.filter`
+        with `cast=True` and dtype-casting fails for a value.
+        """
+        return self._lf.select(
+            pl.exclude(self._rule_columns),
+            pl.col(*self._rule_columns)
+            .replace_strict({True: "valid", False: "invalid", None: "unknown"})
+            .cast(pl.Enum(["valid", "invalid", "unknown"])),
+        ).collect()
 
     def counts(self) -> dict[str, int]:
         """The number of validation failures for each individual rule.
diff --git a/docs/guides/features/serialization.md b/docs/guides/features/serialization.md
index e62b71f9..7e7296ac 100644
--- a/docs/guides/features/serialization.md
+++ b/docs/guides/features/serialization.md
@@ -139,7 +139,7 @@ class HouseSchema(dy.Schema):
     price = dy.Float64(nullable=False)
 
     @dy.rule()
-    def reasonable_bathroom_to_bedrooom_ratio(cls) -> pl.Expr:
+    def reasonable_bathroom_to_bedroom_ratio(cls) -> pl.Expr:
         ratio = pl.col("num_bathrooms") / pl.col("num_bedrooms")
         return (ratio >= 1 / 3) & (ratio <= 3)
 
@@ -190,9 +190,9 @@ json.loads(HouseSchema.serialize())
                           'primary_key': False,
                           'regex': None}},
  'name': 'HouseSchema',
- 'rules': {'reasonable_bathroom_to_bedrooom_ratio': {'expr': {'__type__': 'expression',
-                                                              'value': 'gapCaW5hcnlFeHByg6RsZWZ0gapCaW5hcnlFeHByg6RsZWZ0gapCaW5hcnlFeHByg6RsZWZ0gaZDb2x1bW6tbnVtX2JhdGhyb29tc6JvcKpUcnVlRGl2aWRlpXJpZ2h0gaZDb2x1bW6sbnVtX2JlZHJvb21zom9wpEd0RXGlcmlnaHSBp0xpdGVyYWyBo0R5boGlRmxvYXTLP9VVVVVVVVWib3CjQW5kpXJpZ2h0gapCaW5hcnlFeHByg6RsZWZ0gapCaW5hcnlFeHByg6RsZWZ0gaZDb2x1bW6tbnVtX2JhdGhyb29tc6JvcKpUcnVlRGl2aWRlpXJpZ2h0gaZDb2x1bW6sbnVtX2JlZHJvb21zom9wpEx0RXGlcmlnaHSBp0xpdGVyYWyBo0R5boGjSW50xBAAAAAAAAAAAAAAAAAAAAAD'},
-                                                     'rule_type': 'Rule'}},
+ 'rules': {'reasonable_bathroom_to_bedroom_ratio': {'expr': {'__type__': 'expression',
+                                                             'value': 'gapCaW5hcnlFeHByg6RsZWZ0gapCaW5hcnlFeHByg6RsZWZ0gapCaW5hcnlFeHByg6RsZWZ0gaZDb2x1bW6tbnVtX2JhdGhyb29tc6JvcKpUcnVlRGl2aWRlpXJpZ2h0gaZDb2x1bW6sbnVtX2JlZHJvb21zom9wpEd0RXGlcmlnaHSBp0xpdGVyYWyBo0R5boGlRmxvYXTLP9VVVVVVVVWib3CjQW5kpXJpZ2h0gapCaW5hcnlFeHByg6RsZWZ0gapCaW5hcnlFeHByg6RsZWZ0gaZDb2x1bW6tbnVtX2JhdGhyb29tc6JvcKpUcnVlRGl2aWRlpXJpZ2h0gaZDb2x1bW6sbnVtX2JlZHJvb21zom9wpEx0RXGlcmlnaHSBp0xpdGVyYWyBo0R5boGjSW50xBAAAAAAAAAAAAAAAAAAAAAD'},
+                                                    'rule_type': 'Rule'}},
  'versions': {'dataframely': '2.0.0', 'format': '1', 'polars': '1.33.1'}}
 ```
 
diff --git a/docs/guides/quickstart.md b/docs/guides/quickstart.md
index c6216b91..4f674fb6 100644
--- a/docs/guides/quickstart.md
+++ b/docs/guides/quickstart.md
@@ -54,13 +54,13 @@ class HouseSchema(dy.Schema):
     price = dy.Float64(nullable=False)
 
     @dy.rule()
-    def reasonable_bathroom_to_bedrooom_ratio(cls) -> pl.Expr:
+    def reasonable_bathroom_to_bedroom_ratio(cls) -> pl.Expr:
         ratio = pl.col("num_bathrooms") / pl.col("num_bedrooms")
         return (ratio >= 1 / 3) & (ratio <= 3)
 ```
 
 The decorator `@dy.rule()` "registers" the function as a rule using its name (i.e.
-`reasonable_bathroom_to_bedrooom_ratio`).
+`reasonable_bathroom_to_bedroom_ratio`).
 The returned expression provides a boolean value for each row of the data which evaluates to `True` whenever the data
 are valid with respect to this rule.
 
@@ -81,7 +81,7 @@ class HouseSchema(dy.Schema):
     price = dy.Float64(nullable=False)
 
     @dy.rule()
-    def reasonable_bathroom_to_bedrooom_ratio(cls) -> pl.Expr:
+    def reasonable_bathroom_to_bedroom_ratio(cls) -> pl.Expr:
         ratio = pl.col("num_bathrooms") / pl.col("num_bedrooms")
         return (ratio >= 1 / 3) & (ratio <= 3)
 
@@ -189,7 +189,7 @@ Using the `counts` method on the :class:`~dataframely.FailureInfo` object will r
 
 ```python
 {
-    "reasonable_bathroom_to_bedrooom_ratio": 1,
+    "reasonable_bathroom_to_bedroom_ratio": 1,
     "minimum_zip_code_count": 2,
     "zip_code|min_length": 1,
     "num_bedrooms|nullability": 2,
@@ -205,6 +205,19 @@ failed_df = failure.invalid()
 This information tends to be very useful in tracking down issues with the data,
 both in productive systems and analytics environments.
 
+```{comment}
+New in `dataframely` v2.8.0: The `FailureInfo.invalid()` method now returns additional columns indicating which rules were violated for each row.
+```
+
+For the example above, `failed_df` would look as follows (we omitted some columns for readability):
+
+| zip_code | num_bedrooms | num_bathrooms | price  | reasonable_bathroom_to_bedroom... | minimum_zip_code_count | zip_code\|min_length | num_bedrooms\|nullability | ... |
+| -------- | ------------ | ------------- | ------ | --------------------------------- | ---------------------- | -------------------- | ------------------------- | --- |
+| 1        | 1            | 1             | 50000  | valid                             | invalid                | invalid              | valid                     |     |
+| 213      | null         | 1             | 80000  | valid                             | valid                  | valid                | invalid                   |     |
+| 123      | null         | 0             | 60000  | valid                             | invalid                | valid                | invalid                   |     |
+| 213      | 2            | 8             | 160000 | invalid                           | valid                  | valid                | valid                     |     |
+
 ## Type casting
 
 In rare cases, you might already be _absolutely certain_ that a data frame is valid with
@@ -229,7 +242,8 @@ df_concat = HouseSchema.cast(pl.concat([df1, df2]))
 Lastly, `dataframely` schemas can be used to integrate with external tools:
 
 - `HouseSchema.create_empty()` creates an empty `dy.DataFrame[HouseSchema]` that can be used for testing
-- `HouseSchema.to_sqlalchemy_columns()` provides a list of [sqlalchemy](https://www.sqlalchemy.org) columns that can be used to
+- `HouseSchema.to_sqlalchemy_columns()` provides a list of [sqlalchemy](https://www.sqlalchemy.org) columns that can be
+  used to
   create SQL tables using types and constraints in line with the schema
 - `HouseSchema.to_pyarrow_schema()` provides a [pyarrow](https://arrow.apache.org/docs/python/index.html) schema with
   appropriate column dtypes and nullability information
diff --git a/tests/column_types/test_list.py b/tests/column_types/test_list.py
index a430a6e7..c91f3c09 100644
--- a/tests/column_types/test_list.py
+++ b/tests/column_types/test_list.py
@@ -89,7 +89,9 @@ def test_nested_list_with_rules() -> None:
     df = pl.DataFrame({"a": [[["ab"]], [["a"]], [[None]]]})
     _, failures = schema.filter(df)
     # NOTE: `validation_mask` currently fails for multiply nested lists
-    assert failures.invalid().to_dict(as_series=False) == {"a": [[["a"]], [[None]]]}
+    assert failures.invalid().select("a").to_dict(as_series=False) == {
+        "a": [[["a"]], [[None]]]
+    }
     assert failures.counts() == {
         "a|inner_inner_nullability": 1,
         "a|inner_inner_min_length": 1,
diff --git a/tests/column_types/test_struct.py b/tests/column_types/test_struct.py
index 4d2375a7..3f91382c 100644
--- a/tests/column_types/test_struct.py
+++ b/tests/column_types/test_struct.py
@@ -109,7 +109,7 @@ def test_struct_with_pk() -> None:
         {"s": [{"a": "foo", "b": 1}, {"a": "bar", "b": 1}, {"a": "bar", "b": 1}]}
     )
     _, failures = schema.filter(df)
-    assert failures.invalid().to_dict(as_series=False) == {
+    assert failures.invalid().select("s").to_dict(as_series=False) == {
         "s": [{"a": "bar", "b": 1}, {"a": "bar", "b": 1}]
     }
     assert failures.counts() == {"primary_key": 2}
@@ -121,7 +121,7 @@ def test_struct_with_rules() -> None:
     )
     df = pl.DataFrame({"s": [{"a": "ab"}, {"a": "a"}, {"a": None}]})
     _, failures = schema.filter(df)
-    assert failures.invalid().to_dict(as_series=False) == {
+    assert failures.invalid().select("s").to_dict(as_series=False) == {
         "s": [{"a": "a"}, {"a": None}]
     }
     assert failures.counts() == {"s|inner_a_nullability": 1, "s|inner_a_min_length": 1}
@@ -140,7 +140,7 @@ def test_nested_struct_with_rules() -> None:
         {"s1": [{"s2": {"a": "ab"}}, {"s2": {"a": "a"}}, {"s2": {"a": None}}]}
     )
     _, failures = schema.filter(df)
-    assert failures.invalid().to_dict(as_series=False) == {
+    assert failures.invalid().select("s1").to_dict(as_series=False) == {
         "s1": [{"s2": {"a": "a"}}, {"s2": {"a": None}}]
     }
     assert failures.counts() == {
diff --git a/tests/schema/test_filter.py b/tests/schema/test_filter.py
index 6aa1e3d7..2bca127e 100644
--- a/tests/schema/test_filter.py
+++ b/tests/schema/test_filter.py
@@ -220,7 +220,7 @@ def test_filter_failure_info_original_dtype(eager: bool) -> None:
 
     assert failures.counts() == {"a|dtype": 1}
     assert failures.invalid().get_column("a").to_list() == [300]
-    assert failures.invalid().dtypes == [pl.Int64]
+    assert failures.invalid().select("a").dtypes == [pl.Int64]
 
 
 @pytest.mark.parametrize("eager", [True, False])
@@ -243,3 +243,35 @@ def test_filter_maintain_order(eager: bool) -> None:
     )
     out, _ = _filter_and_collect(schema, df, cast=True, eager=eager)
     assert out.get_column("a").is_sorted()
+
+
+@pytest.mark.parametrize("eager", [True, False])
+def test_filter_invalid_rows(eager: bool) -> None:
+    df = pl.DataFrame(
+        {
+            "a": [2, 2],
+            "b": ["bar", "foobar"],
+        }
+    )
+    _, fails = _filter_and_collect(MySchema, df, cast=True, eager=eager)
+
+    assert fails.invalid().to_dicts() == [
+        {
+            "a": 2,
+            "b": "bar",
+            "a|dtype": "valid",
+            "a|nullability": "valid",
+            "b|dtype": "valid",
+            "b|max_length": "valid",
+            "primary_key": "invalid",
+        },
+        {
+            "a": 2,
+            "b": "foobar",
+            "a|dtype": "valid",
+            "a|nullability": "valid",
+            "b|dtype": "valid",
+            "b|max_length": "invalid",
+            "primary_key": "invalid",
+        },
+    ]

From 702834a702a8a3df43112b197f83c9e78b5af0ee Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 12:22:18 +0100
Subject: [PATCH 02/10] review

---
 dataframely/filter_result.py             |  4 +-
 docs/guides/quickstart.md                |  2 +-
 tests/collection/test_filter_validate.py | 54 ++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/dataframely/filter_result.py b/dataframely/filter_result.py
index efdaa64b..0b673307 100644
--- a/dataframely/filter_result.py
+++ b/dataframely/filter_result.py
@@ -124,12 +124,12 @@ def invalid(self) -> pl.DataFrame:
         filters in addition to member-level rules, or when calling :meth:`Schema.filter`
         with `cast=True` and dtype-casting fails for a value.
         """
-        return self._lf.select(
+        return self._df.select(
             pl.exclude(self._rule_columns),
             pl.col(*self._rule_columns)
             .replace_strict({True: "valid", False: "invalid", None: "unknown"})
             .cast(pl.Enum(["valid", "invalid", "unknown"])),
-        ).collect()
+        )
 
     def counts(self) -> dict[str, int]:
         """The number of validation failures for each individual rule.
diff --git a/docs/guides/quickstart.md b/docs/guides/quickstart.md
index 4f674fb6..7cc25af8 100644
--- a/docs/guides/quickstart.md
+++ b/docs/guides/quickstart.md
@@ -205,7 +205,7 @@ failed_df = failure.invalid()
 This information tends to be very useful in tracking down issues with the data,
 both in productive systems and analytics environments.
 
-```{comment}
+```{note}
 New in `dataframely` v2.8.0: The `FailureInfo.invalid()` method now returns additional columns indicating which rules were violated for each row.
 ```
 
diff --git a/tests/collection/test_filter_validate.py b/tests/collection/test_filter_validate.py
index fe1afce0..a59ce59a 100644
--- a/tests/collection/test_filter_validate.py
+++ b/tests/collection/test_filter_validate.py
@@ -304,3 +304,57 @@ def test_maintain_order() -> None:
     out = MyShufflingCollection.validate(out.to_dict())
     assert out.first.select("a").collect().to_series().is_sorted()
     assert out.second.select("a").collect().to_series().is_sorted()
+
+
+def test_unknown_rule_outcomes(
+    data_without_filter_with_rule_violation: tuple[pl.DataFrame, pl.DataFrame],
+) -> None:
+    _, fails = MyCollection.filter(
+        {
+            "first": data_without_filter_with_rule_violation[0],
+            "second": data_without_filter_with_rule_violation[1],
+        }
+    )
+    assert fails["first"].invalid().to_dicts() == [
+        {
+            "a": 1,
+            "b": 1,
+            "a|nullability": "valid",
+            "b|nullability": "valid",
+            "equal_primary_key": "unknown",
+            "first_b_greater_second_b": "unknown",
+            "primary_key": "invalid",
+        },
+        {
+            "a": 1,
+            "b": 3,
+            "a|nullability": "valid",
+            "b|nullability": "valid",
+            "equal_primary_key": "unknown",
+            "first_b_greater_second_b": "unknown",
+            "primary_key": "invalid",
+        },
+    ]
+
+    assert fails["second"].invalid().to_dicts() == [
+        {
+            "a": 1,
+            "b": 0,
+            "primary_key": "valid",
+            "a|nullability": "valid",
+            "b|nullability": "valid",
+            "b|min": "invalid",
+            "equal_primary_key": "unknown",
+            "first_b_greater_second_b": "unknown",
+        },
+        {
+            "a": 3,
+            "b": 2,
+            "primary_key": "unknown",
+            "a|nullability": "unknown",
+            "b|nullability": "unknown",
+            "b|min": "unknown",
+            "equal_primary_key": "invalid",
+            "first_b_greater_second_b": "valid",
+        },
+    ]

From e082961aff405cd9f2f49f44764fc2d31311969d Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 17:17:02 +0100
Subject: [PATCH 03/10] trigger docs build


From 07533f289edb6dd70e534abeb9bc3984f9d33cdf Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 18:17:35 +0100
Subject: [PATCH 04/10] rename

---
 dataframely/filter_result.py             | 7 ++++++-
 tests/collection/test_filter_validate.py | 4 ++--
 tests/schema/test_filter.py              | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/dataframely/filter_result.py b/dataframely/filter_result.py
index 0b673307..3227d902 100644
--- a/dataframely/filter_result.py
+++ b/dataframely/filter_result.py
@@ -111,7 +111,12 @@ def _df(self) -> pl.DataFrame:
         return self._lf.collect()
 
     def invalid(self) -> pl.DataFrame:
-        """The rows of the original data frame containing the invalid rows.
+        """The rows of the original data frame containing the invalid rows."""
+        return self._df.drop(self._rule_columns)
+
+    def violation_details(self) -> pl.DataFrame:
+        """Same as :meth:`invalid` but with additional columns indicating the results of
+        each individual rule.
 
         For each row, this includes:
             1. All columns of the original data frame.
diff --git a/tests/collection/test_filter_validate.py b/tests/collection/test_filter_validate.py
index a59ce59a..d18e0747 100644
--- a/tests/collection/test_filter_validate.py
+++ b/tests/collection/test_filter_validate.py
@@ -315,7 +315,7 @@ def test_unknown_rule_outcomes(
             "second": data_without_filter_with_rule_violation[1],
         }
     )
-    assert fails["first"].invalid().to_dicts() == [
+    assert fails["first"].violation_details().to_dicts() == [
         {
             "a": 1,
             "b": 1,
@@ -336,7 +336,7 @@ def test_unknown_rule_outcomes(
         },
     ]
 
-    assert fails["second"].invalid().to_dicts() == [
+    assert fails["second"].violation_details().to_dicts() == [
         {
             "a": 1,
             "b": 0,
diff --git a/tests/schema/test_filter.py b/tests/schema/test_filter.py
index 2bca127e..162df988 100644
--- a/tests/schema/test_filter.py
+++ b/tests/schema/test_filter.py
@@ -246,7 +246,7 @@ def test_filter_maintain_order(eager: bool) -> None:
 
 
 @pytest.mark.parametrize("eager", [True, False])
-def test_filter_invalid_rows(eager: bool) -> None:
+def test_filter_violation_details(eager: bool) -> None:
     df = pl.DataFrame(
         {
             "a": [2, 2],
@@ -255,7 +255,7 @@ def test_filter_invalid_rows(eager: bool) -> None:
     )
     _, fails = _filter_and_collect(MySchema, df, cast=True, eager=eager)
 
-    assert fails.invalid().to_dicts() == [
+    assert fails.violation_details().to_dicts() == [
         {
             "a": 2,
             "b": "bar",

From 45c92f2f37ce5707578bd37dd4ac0a6ddc75e425 Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 18:20:54 +0100
Subject: [PATCH 05/10] doc

---
 docs/guides/quickstart.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/guides/quickstart.md b/docs/guides/quickstart.md
index 7cc25af8..241e6fee 100644
--- a/docs/guides/quickstart.md
+++ b/docs/guides/quickstart.md
@@ -206,10 +206,10 @@ This information tends to be very useful in tracking down issues with the data,
 both in productive systems and analytics environments.
 
 ```{note}
-New in `dataframely` v2.8.0: The `FailureInfo.invalid()` method now returns additional columns indicating which rules were violated for each row.
+New in `dataframely` v2.8.0: The `FailureInfo.violation_details()` method now returns additional columns indicating which rules were violated for each row.
 ```
 
-For the example above, `failed_df` would look as follows (we omitted some columns for readability):
+For the example above, `failure.violation_details()` would look as follows (we omitted some columns for readability):
 
 | zip_code | num_bedrooms | num_bathrooms | price  | reasonable_bathroom_to_bedroom... | minimum_zip_code_count | zip_code\|min_length | num_bedrooms\|nullability | ... |
 | -------- | ------------ | ------------- | ------ | --------------------------------- | ---------------------- | -------------------- | ------------------------- | --- |

From 41f66b5115c1addea5df1cdb4d696aaf847cdf8d Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 18:23:16 +0100
Subject: [PATCH 06/10] revert

---
 tests/column_types/test_list.py   | 4 +---
 tests/column_types/test_struct.py | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/column_types/test_list.py b/tests/column_types/test_list.py
index c91f3c09..a430a6e7 100644
--- a/tests/column_types/test_list.py
+++ b/tests/column_types/test_list.py
@@ -89,9 +89,7 @@ def test_nested_list_with_rules() -> None:
     df = pl.DataFrame({"a": [[["ab"]], [["a"]], [[None]]]})
     _, failures = schema.filter(df)
     # NOTE: `validation_mask` currently fails for multiply nested lists
-    assert failures.invalid().select("a").to_dict(as_series=False) == {
-        "a": [[["a"]], [[None]]]
-    }
+    assert failures.invalid().to_dict(as_series=False) == {"a": [[["a"]], [[None]]]}
     assert failures.counts() == {
         "a|inner_inner_nullability": 1,
         "a|inner_inner_min_length": 1,
diff --git a/tests/column_types/test_struct.py b/tests/column_types/test_struct.py
index 3f91382c..4d2375a7 100644
--- a/tests/column_types/test_struct.py
+++ b/tests/column_types/test_struct.py
@@ -109,7 +109,7 @@ def test_struct_with_pk() -> None:
         {"s": [{"a": "foo", "b": 1}, {"a": "bar", "b": 1}, {"a": "bar", "b": 1}]}
     )
     _, failures = schema.filter(df)
-    assert failures.invalid().select("s").to_dict(as_series=False) == {
+    assert failures.invalid().to_dict(as_series=False) == {
         "s": [{"a": "bar", "b": 1}, {"a": "bar", "b": 1}]
     }
     assert failures.counts() == {"primary_key": 2}
@@ -121,7 +121,7 @@ def test_struct_with_rules() -> None:
     )
     df = pl.DataFrame({"s": [{"a": "ab"}, {"a": "a"}, {"a": None}]})
     _, failures = schema.filter(df)
-    assert failures.invalid().select("s").to_dict(as_series=False) == {
+    assert failures.invalid().to_dict(as_series=False) == {
         "s": [{"a": "a"}, {"a": None}]
     }
     assert failures.counts() == {"s|inner_a_nullability": 1, "s|inner_a_min_length": 1}
@@ -140,7 +140,7 @@ def test_nested_struct_with_rules() -> None:
         {"s1": [{"s2": {"a": "ab"}}, {"s2": {"a": "a"}}, {"s2": {"a": None}}]}
     )
     _, failures = schema.filter(df)
-    assert failures.invalid().select("s1").to_dict(as_series=False) == {
+    assert failures.invalid().to_dict(as_series=False) == {
         "s1": [{"s2": {"a": "a"}}, {"s2": {"a": None}}]
     }
     assert failures.counts() == {

From 290ba94ce385d0830ac9658ba6c5936de4a94ff0 Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 18:32:57 +0100
Subject: [PATCH 07/10] doc

---
 docs/guides/quickstart.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/guides/quickstart.md b/docs/guides/quickstart.md
index 241e6fee..0a2066d1 100644
--- a/docs/guides/quickstart.md
+++ b/docs/guides/quickstart.md
@@ -243,8 +243,7 @@ Lastly, `dataframely` schemas can be used to integrate with external tools:
 
 - `HouseSchema.create_empty()` creates an empty `dy.DataFrame[HouseSchema]` that can be used for testing
 - `HouseSchema.to_sqlalchemy_columns()` provides a list of [sqlalchemy](https://www.sqlalchemy.org) columns that can be
-  used to
-  create SQL tables using types and constraints in line with the schema
+  used to create SQL tables using types and constraints in line with the schema
 - `HouseSchema.to_pyarrow_schema()` provides a [pyarrow](https://arrow.apache.org/docs/python/index.html) schema with
   appropriate column dtypes and nullability information
 - You can use `dy.DataFrame[HouseSchema]` (or the `LazyFrame` equivalent) as fields in

From 7a37afcabfe822e23e349cb57e5264f72cdcb931 Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 18:33:17 +0100
Subject: [PATCH 08/10] fix

---
 docs/guides/quickstart.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/guides/quickstart.md b/docs/guides/quickstart.md
index 0a2066d1..c11e2230 100644
--- a/docs/guides/quickstart.md
+++ b/docs/guides/quickstart.md
@@ -242,8 +242,8 @@ df_concat = HouseSchema.cast(pl.concat([df1, df2]))
 Lastly, `dataframely` schemas can be used to integrate with external tools:
 
 - `HouseSchema.create_empty()` creates an empty `dy.DataFrame[HouseSchema]` that can be used for testing
-- `HouseSchema.to_sqlalchemy_columns()` provides a list of [sqlalchemy](https://www.sqlalchemy.org) columns that can be
-  used to create SQL tables using types and constraints in line with the schema
+- `HouseSchema.to_sqlalchemy_columns()` provides a list of [sqlalchemy](https://www.sqlalchemy.org) columns that can be used to
+  create SQL tables using types and constraints in line with the schema
 - `HouseSchema.to_pyarrow_schema()` provides a [pyarrow](https://arrow.apache.org/docs/python/index.html) schema with
   appropriate column dtypes and nullability information
 - You can use `dy.DataFrame[HouseSchema]` (or the `LazyFrame` equivalent) as fields in

From 3677d3ba199a4ba4f363b326515bb9af09c70623 Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 18:34:24 +0100
Subject: [PATCH 09/10] revert

---
 tests/schema/test_filter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/schema/test_filter.py b/tests/schema/test_filter.py
index 162df988..18a37e79 100644
--- a/tests/schema/test_filter.py
+++ b/tests/schema/test_filter.py
@@ -220,7 +220,7 @@ def test_filter_failure_info_original_dtype(eager: bool) -> None:
 
     assert failures.counts() == {"a|dtype": 1}
     assert failures.invalid().get_column("a").to_list() == [300]
-    assert failures.invalid().select("a").dtypes == [pl.Int64]
+    assert failures.invalid().dtypes == [pl.Int64]
 
 
 @pytest.mark.parametrize("eager", [True, False])

From fde12c998061b553e26782170f3a2893b84076f2 Mon Sep 17 00:00:00 2001
From: Andreas Albert <andreas.albert@quantco.com>
Date: Mon, 16 Feb 2026 18:37:05 +0100
Subject: [PATCH 10/10] details

---
 dataframely/filter_result.py             | 9 +++++----
 docs/guides/quickstart.md                | 7 ++++---
 tests/collection/test_filter_validate.py | 4 ++--
 tests/schema/test_filter.py              | 4 ++--
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/dataframely/filter_result.py b/dataframely/filter_result.py
index 3227d902..f2f88b75 100644
--- a/dataframely/filter_result.py
+++ b/dataframely/filter_result.py
@@ -114,7 +114,7 @@ def invalid(self) -> pl.DataFrame:
         """The rows of the original data frame containing the invalid rows."""
         return self._df.drop(self._rule_columns)
 
-    def violation_details(self) -> pl.DataFrame:
+    def details(self) -> pl.DataFrame:
         """Same as :meth:`invalid` but with additional columns indicating the results of
         each individual rule.
 
@@ -131,9 +131,10 @@ def violation_details(self) -> pl.DataFrame:
         """
         return self._df.select(
             pl.exclude(self._rule_columns),
-            pl.col(*self._rule_columns)
-            .replace_strict({True: "valid", False: "invalid", None: "unknown"})
-            .cast(pl.Enum(["valid", "invalid", "unknown"])),
+            pl.col(*self._rule_columns).replace_strict(
+                {True: "valid", False: "invalid", None: "unknown"},
+                return_dtype=pl.Enum(["valid", "invalid", "unknown"]),
+            ),
         )
 
     def counts(self) -> dict[str, int]:
diff --git a/docs/guides/quickstart.md b/docs/guides/quickstart.md
index c11e2230..4eb341ae 100644
--- a/docs/guides/quickstart.md
+++ b/docs/guides/quickstart.md
@@ -206,10 +206,10 @@ This information tends to be very useful in tracking down issues with the data,
 both in productive systems and analytics environments.
 
 ```{note}
-New in `dataframely` v2.8.0: The `FailureInfo.violation_details()` method now returns additional columns indicating which rules were violated for each row.
+New in `dataframely` v2.8.0: The `FailureInfo.details()` method now returns additional columns indicating which rules were violated for each row.
 ```
 
-For the example above, `failure.violation_details()` would look as follows (we omitted some columns for readability):
+For the example above, `failure.details()` would look as follows (we omitted some columns for readability):
 
 | zip_code | num_bedrooms | num_bathrooms | price  | reasonable_bathroom_to_bedroom... | minimum_zip_code_count | zip_code\|min_length | num_bedrooms\|nullability | ... |
 | -------- | ------------ | ------------- | ------ | --------------------------------- | ---------------------- | -------------------- | ------------------------- | --- |
@@ -242,7 +242,8 @@ df_concat = HouseSchema.cast(pl.concat([df1, df2]))
 Lastly, `dataframely` schemas can be used to integrate with external tools:
 
 - `HouseSchema.create_empty()` creates an empty `dy.DataFrame[HouseSchema]` that can be used for testing
-- `HouseSchema.to_sqlalchemy_columns()` provides a list of [sqlalchemy](https://www.sqlalchemy.org) columns that can be used to
+- `HouseSchema.to_sqlalchemy_columns()` provides a list of [sqlalchemy](https://www.sqlalchemy.org) columns that can be
+  used to
   create SQL tables using types and constraints in line with the schema
 - `HouseSchema.to_pyarrow_schema()` provides a [pyarrow](https://arrow.apache.org/docs/python/index.html) schema with
   appropriate column dtypes and nullability information
diff --git a/tests/collection/test_filter_validate.py b/tests/collection/test_filter_validate.py
index d18e0747..926ea3a9 100644
--- a/tests/collection/test_filter_validate.py
+++ b/tests/collection/test_filter_validate.py
@@ -315,7 +315,7 @@ def test_unknown_rule_outcomes(
             "second": data_without_filter_with_rule_violation[1],
         }
     )
-    assert fails["first"].violation_details().to_dicts() == [
+    assert fails["first"].details().to_dicts() == [
         {
             "a": 1,
             "b": 1,
@@ -336,7 +336,7 @@ def test_unknown_rule_outcomes(
         },
     ]
 
-    assert fails["second"].violation_details().to_dicts() == [
+    assert fails["second"].details().to_dicts() == [
         {
             "a": 1,
             "b": 0,
diff --git a/tests/schema/test_filter.py b/tests/schema/test_filter.py
index 18a37e79..99557e79 100644
--- a/tests/schema/test_filter.py
+++ b/tests/schema/test_filter.py
@@ -246,7 +246,7 @@ def test_filter_maintain_order(eager: bool) -> None:
 
 
 @pytest.mark.parametrize("eager", [True, False])
-def test_filter_violation_details(eager: bool) -> None:
+def test_filter_details(eager: bool) -> None:
     df = pl.DataFrame(
         {
             "a": [2, 2],
@@ -255,7 +255,7 @@ def test_filter_violation_details(eager: bool) -> None:
     )
     _, fails = _filter_and_collect(MySchema, df, cast=True, eager=eager)
 
-    assert fails.violation_details().to_dicts() == [
+    assert fails.details().to_dicts() == [
         {
             "a": 2,
             "b": "bar",