Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions pydough/exploration/explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@
PyDoughExpressionQDAG,
PyDoughQDAG,
Reference,
Singular,
SubCollection,
TableCollection,
TopK,
Where,
)
from pydough.qdag.collections.user_collection_qdag import (
PyDoughUserGeneratedCollectionQDag,
)
from pydough.unqualified import (
UnqualifiedNode,
UnqualifiedRoot,
Expand Down Expand Up @@ -282,12 +286,18 @@ def explain_unqualified(
if root is not None:
qualified_node = qualify_node(node, session)
else:
# If the root is None, it means that the node was an expression
# without information about its context.
lines.append(
f"Cannot call pydough.explain on {display_raw(node)}.\n"
"Did you mean to use pydough.explain_term?"
)
# No root in the tree (e.g. UnqualifiedGeneratedCollection, or a
# bare expression like LOWER(first_name + last_name)). Try to
# qualify anyway for generated collections. If it still fails,
# raise an exception.
try:
qualified_node = qualify_node(node, session)
except Exception:
lines.append(
f"Cannot call pydough.explain on {display_raw(node)}.\n"
"Did you mean to use pydough.explain_term?"
)
return "\n".join(lines)
except PyDoughQDAGException as e:
# If the qualification failed, dump an appropriate message indicating
# why pydough_explain did not work on it.
Expand Down Expand Up @@ -347,6 +357,24 @@ def explain_unqualified(
lines.append(
f"This node, specifically, accesses the unpartitioned data of a partitioning (child name: {qualified_node.partition_child_name})."
)
case Singular():
lines.append(
"This node makes the preceding collection singular via a CROSS product.\n"
"Each record from the parent context is paired with all records from the child."
)
lines.append(
f"Child collection: {qualified_node.preceding_context.to_string()}"
)
case PyDoughUserGeneratedCollectionQDag():
collection_name = qualified_node.name
columns = sorted(qualified_node.calc_terms)
lines.append(
f"This node accesses user-generated collection '{collection_name}'.\n"
f"Columns: {', '.join(columns)}"
)
lines.append(
f"Unique columns: {', '.join(qualified_node.unique_terms)}"
)
case ChildOperator():
if len(qualified_node.children):
lines.append(
Expand Down
6 changes: 6 additions & 0 deletions pydough/exploration/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
qualify_node,
qualify_term,
)
from pydough.unqualified.unqualified_node import (
UnqualifiedCross,
UnqualifiedSingular,
)


def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None:
Expand All @@ -57,6 +61,8 @@ def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None:
| UnqualifiedOrderBy()
| UnqualifiedTopK()
| UnqualifiedPartition()
| UnqualifiedCross()
| UnqualifiedSingular()
):
predecessor: UnqualifiedNode = node._parcel[0]
return find_unqualified_root(predecessor)
Expand Down
131 changes: 129 additions & 2 deletions tests/test_exploration.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
contextless_expr_impl,
contextless_func_impl,
customers_without_orders_impl,
dataframe_collection_exploration_impl,
filter_impl,
global_agg_calc_impl,
global_calc_impl,
Expand All @@ -34,10 +35,12 @@
parts_avg_price_child_impl,
parts_avg_price_impl,
parts_with_german_supplier,
range_collection_exploration_impl,
region_n_suppliers_in_red_impl,
region_nations_back_name,
region_nations_suppliers_impl,
region_nations_suppliers_name_impl,
singular_impl,
subcollection_calc_backref_impl,
suppliers_iff_balance_impl,
table_calc_impl,
Expand Down Expand Up @@ -1199,6 +1202,126 @@ def test_graph_structure(
),
id="partition_child",
),
pytest.param(
(
"TPCH",
singular_impl,
"""
PyDough collection representing the following logic:
──┬─ TPCH
└─┬─ TableCollection[nations]
└─┬─ TPCH
└─── TableCollection[regions]

This node, specifically, accesses the collection regions.
Call pydough.explain(graph['regions']) to learn more about this collection.

The following terms will be included in the result if this collection is executed:
comment, key, name

The collection has access to the following expressions:
comment, key, name

The collection has access to the following collections:
nations

Call pydough.explain_term(collection, term) to learn more about any of these
expressions or collections that the collection has access to.
""",
"""
This node, specifically, accesses the collection regions.
Call pydough.explain(graph['regions']) to learn more about this collection.

The collection has access to the following expressions:
comment, key, name

The collection has access to the following collections:
nations

Call pydough.explain_term(collection, term) to learn more about any of these
expressions or collections that the collection has access to.

Call pydough.explain(collection, verbose=True) for more details.
""",
),
id="singular",
),
pytest.param(
(
"TPCH",
range_collection_exploration_impl,
"""
PyDough collection representing the following logic:
──┬─ TPCH
└─── RangeCollection('rng', i=range(1, 5))

This node accesses user-generated collection 'rng'.
Columns: i
Unique columns: i

The following terms will be included in the result if this collection is executed:
i

The collection has access to the following expressions:
i

Call pydough.explain_term(collection, term) to learn more about any of these
expressions or collections that the collection has access to.
""",
"""
This node accesses user-generated collection 'rng'.
Columns: i
Unique columns: i

The collection has access to the following expressions:
i

Call pydough.explain_term(collection, term) to learn more about any of these
expressions or collections that the collection has access to.

Call pydough.explain(collection, verbose=True) for more details.
""",
),
id="range_collection",
),
pytest.param(
(
"TPCH",
dataframe_collection_exploration_impl,
"""
PyDough collection representing the following logic:
──┬─ TPCH
└─── DataframeCollection(name='df_coll', shape=(1, 1), columns=['id'])

This node accesses user-generated collection 'df_coll'.
Columns: id
Unique columns: id

The following terms will be included in the result if this collection is executed:
id

The collection has access to the following expressions:
id

Call pydough.explain_term(collection, term) to learn more about any of these
expressions or collections that the collection has access to.
""",
"""
This node accesses user-generated collection 'df_coll'.
Columns: id
Unique columns: id

The collection has access to the following expressions:
id

Call pydough.explain_term(collection, term) to learn more about any of these
expressions or collections that the collection has access to.

Call pydough.explain(collection, verbose=True) for more details.
""",
),
id="dataframe_collection",
),
pytest.param(
(
"TPCH",
Expand Down Expand Up @@ -1261,11 +1384,15 @@ def test_graph_structure(
"TPCH",
contextless_aggfunc_impl,
"""
Cannot call pydough.explain on COUNT(customers).
If pydough.explain is called on an unqualified PyDough code, it is expected to
be a collection, but instead received the following expression:
COUNT(customers)
Did you mean to use pydough.explain_term?
""",
"""
Cannot call pydough.explain on COUNT(customers).
If pydough.explain is called on an unqualified PyDough code, it is expected to
be a collection, but instead received the following expression:
COUNT(customers)
Did you mean to use pydough.explain_term?
""",
),
Expand Down
18 changes: 18 additions & 0 deletions tests/test_pydough_functions/exploration_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"contextless_expr_impl",
"contextless_func_impl",
"customers_without_orders_impl",
"dataframe_collection_exploration_impl",
"filter_impl",
"global_agg_calc_impl",
"global_calc_impl",
Expand All @@ -23,10 +24,12 @@
"parts_avg_price_child_impl",
"parts_avg_price_impl",
"parts_with_german_supplier",
"range_collection_exploration_impl",
"region_n_suppliers_in_red_impl",
"region_nations_back_name",
"region_nations_suppliers_impl",
"region_nations_suppliers_name_impl",
"singular_impl",
"subcollection_calc_backref_impl",
"suppliers_iff_balance_impl",
"table_calc_impl",
Expand All @@ -35,6 +38,8 @@

from collections.abc import Callable

import pandas as pd

import pydough
from pydough.metadata import GraphMetadata
from pydough.unqualified import UnqualifiedNode
Expand Down Expand Up @@ -112,6 +117,19 @@ def partition_child_impl() -> UnqualifiedNode:
)


def singular_impl() -> UnqualifiedNode:
return nations.CROSS(regions)


def range_collection_exploration_impl() -> UnqualifiedNode:
return pydough.range_collection("rng", "i", 1, 5)


def dataframe_collection_exploration_impl() -> UnqualifiedNode:
df = pd.DataFrame({"id": [1]})
return pydough.dataframe_collection("df_coll", df, ["id"])


def nation_expr_impl() -> UnqualifiedNode:
return nations.name

Expand Down