diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 2fc29c301..eab3b77ef 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -35,11 +35,15 @@ PyDoughExpressionQDAG, PyDoughQDAG, Reference, + Singular, SubCollection, TableCollection, TopK, Where, ) +from pydough.qdag.collections.user_collection_qdag import ( + PyDoughUserGeneratedCollectionQDag, +) from pydough.unqualified import ( UnqualifiedNode, UnqualifiedRoot, @@ -282,12 +286,18 @@ def explain_unqualified( if root is not None: qualified_node = qualify_node(node, session) else: - # If the root is None, it means that the node was an expression - # without information about its context. - lines.append( - f"Cannot call pydough.explain on {display_raw(node)}.\n" - "Did you mean to use pydough.explain_term?" - ) + # No root in the tree (e.g. UnqualifiedGeneratedCollection, or a + # bare expression like LOWER(first_name + last_name)). Try to + # qualify anyway for generated collections. If it still fails, + # raise an exception. + try: + qualified_node = qualify_node(node, session) + except Exception: + lines.append( + f"Cannot call pydough.explain on {display_raw(node)}.\n" + "Did you mean to use pydough.explain_term?" + ) + return "\n".join(lines) except PyDoughQDAGException as e: # If the qualification failed, dump an appropriate message indicating # why pydough_explain did not work on it. @@ -347,6 +357,24 @@ def explain_unqualified( lines.append( f"This node, specifically, accesses the unpartitioned data of a partitioning (child name: {qualified_node.partition_child_name})." ) + case Singular(): + lines.append( + "This node makes the preceding collection singular via a CROSS product.\n" + "Each record from the parent context is paired with all records from the child." + ) + lines.append( + f"Child collection: {qualified_node.preceding_context.to_string()}" + ) + case PyDoughUserGeneratedCollectionQDag(): + collection_name = qualified_node.name + columns = sorted(qualified_node.calc_terms) + lines.append( + f"This node accesses user-generated collection '{collection_name}'.\n" + f"Columns: {', '.join(columns)}" + ) + lines.append( + f"Unique columns: {', '.join(qualified_node.unique_terms)}" + ) case ChildOperator(): if len(qualified_node.children): lines.append( diff --git a/pydough/exploration/term.py b/pydough/exploration/term.py index 6ab9734bb..650605afc 100644 --- a/pydough/exploration/term.py +++ b/pydough/exploration/term.py @@ -34,6 +34,10 @@ qualify_node, qualify_term, ) +from pydough.unqualified.unqualified_node import ( + UnqualifiedCross, + UnqualifiedSingular, +) def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None: @@ -57,6 +61,8 @@ def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None: | UnqualifiedOrderBy() | UnqualifiedTopK() | UnqualifiedPartition() + | UnqualifiedCross() + | UnqualifiedSingular() ): predecessor: UnqualifiedNode = node._parcel[0] return find_unqualified_root(predecessor) diff --git a/tests/test_exploration.py b/tests/test_exploration.py index 0ad8db374..ca0d37493 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -17,6 +17,7 @@ contextless_expr_impl, contextless_func_impl, customers_without_orders_impl, + dataframe_collection_exploration_impl, filter_impl, global_agg_calc_impl, global_calc_impl, @@ -34,10 +35,12 @@ parts_avg_price_child_impl, parts_avg_price_impl, parts_with_german_supplier, + range_collection_exploration_impl, region_n_suppliers_in_red_impl, region_nations_back_name, region_nations_suppliers_impl, region_nations_suppliers_name_impl, + singular_impl, subcollection_calc_backref_impl, suppliers_iff_balance_impl, table_calc_impl, @@ -1199,6 +1202,126 @@ def test_graph_structure( ), id="partition_child", ), + pytest.param( + ( + "TPCH", + singular_impl, + """ +PyDough collection representing the following logic: + ──┬─ TPCH + └─┬─ TableCollection[nations] + └─┬─ TPCH + └─── TableCollection[regions] + +This node, specifically, accesses the collection regions. +Call pydough.explain(graph['regions']) to learn more about this collection. + +The following terms will be included in the result if this collection is executed: + comment, key, name + +The collection has access to the following expressions: + comment, key, name + +The collection has access to the following collections: + nations + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + """, + """ +This node, specifically, accesses the collection regions. +Call pydough.explain(graph['regions']) to learn more about this collection. + +The collection has access to the following expressions: + comment, key, name + +The collection has access to the following collections: + nations + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + +Call pydough.explain(collection, verbose=True) for more details. + """, + ), + id="singular", + ), + pytest.param( + ( + "TPCH", + range_collection_exploration_impl, + """ +PyDough collection representing the following logic: + ──┬─ TPCH + └─── RangeCollection('rng', i=range(1, 5)) + +This node accesses user-generated collection 'rng'. +Columns: i +Unique columns: i + +The following terms will be included in the result if this collection is executed: + i + +The collection has access to the following expressions: + i + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + """, + """ +This node accesses user-generated collection 'rng'. +Columns: i +Unique columns: i + +The collection has access to the following expressions: + i + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + +Call pydough.explain(collection, verbose=True) for more details. + """, + ), + id="range_collection", + ), + pytest.param( + ( + "TPCH", + dataframe_collection_exploration_impl, + """ +PyDough collection representing the following logic: + ──┬─ TPCH + └─── DataframeCollection(name='df_coll', shape=(1, 1), columns=['id']) + +This node accesses user-generated collection 'df_coll'. +Columns: id +Unique columns: id + +The following terms will be included in the result if this collection is executed: + id + +The collection has access to the following expressions: + id + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + """, + """ +This node accesses user-generated collection 'df_coll'. +Columns: id +Unique columns: id + +The collection has access to the following expressions: + id + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + +Call pydough.explain(collection, verbose=True) for more details. + """, + ), + id="dataframe_collection", + ), pytest.param( ( "TPCH", @@ -1261,11 +1384,15 @@ def test_graph_structure( "TPCH", contextless_aggfunc_impl, """ -Cannot call pydough.explain on COUNT(customers). +If pydough.explain is called on an unqualified PyDough code, it is expected to +be a collection, but instead received the following expression: + COUNT(customers) Did you mean to use pydough.explain_term? """, """ -Cannot call pydough.explain on COUNT(customers). +If pydough.explain is called on an unqualified PyDough code, it is expected to +be a collection, but instead received the following expression: + COUNT(customers) Did you mean to use pydough.explain_term? """, ), diff --git a/tests/test_pydough_functions/exploration_examples.py b/tests/test_pydough_functions/exploration_examples.py index 4eb978303..711d55833 100644 --- a/tests/test_pydough_functions/exploration_examples.py +++ b/tests/test_pydough_functions/exploration_examples.py @@ -8,6 +8,7 @@ "contextless_expr_impl", "contextless_func_impl", "customers_without_orders_impl", + "dataframe_collection_exploration_impl", "filter_impl", "global_agg_calc_impl", "global_calc_impl", @@ -23,10 +24,12 @@ "parts_avg_price_child_impl", "parts_avg_price_impl", "parts_with_german_supplier", + "range_collection_exploration_impl", "region_n_suppliers_in_red_impl", "region_nations_back_name", "region_nations_suppliers_impl", "region_nations_suppliers_name_impl", + "singular_impl", "subcollection_calc_backref_impl", "suppliers_iff_balance_impl", "table_calc_impl", @@ -35,6 +38,8 @@ from collections.abc import Callable +import pandas as pd + import pydough from pydough.metadata import GraphMetadata from pydough.unqualified import UnqualifiedNode @@ -112,6 +117,19 @@ def partition_child_impl() -> UnqualifiedNode: ) +def singular_impl() -> UnqualifiedNode: + return nations.CROSS(regions) + + +def range_collection_exploration_impl() -> UnqualifiedNode: + return pydough.range_collection("rng", "i", 1, 5) + + +def dataframe_collection_exploration_impl() -> UnqualifiedNode: + df = pd.DataFrame({"id": [1]}) + return pydough.dataframe_collection("df_coll", df, ["id"]) + + def nation_expr_impl() -> UnqualifiedNode: return nations.name