From fa72c02b0c985f0e6b2180794c81b79d69ca8734 Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Mon, 9 Mar 2026 14:40:55 -0700 Subject: [PATCH 1/4] [run CI] update explain --- pydough/exploration/explain.py | 39 +++++- pydough/exploration/term.py | 6 + tests/test_exploration.py | 123 ++++++++++++++++++ .../exploration_examples.py | 18 +++ 4 files changed, 180 insertions(+), 6 deletions(-) diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 2fc29c301..85214d551 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -35,11 +35,15 @@ PyDoughExpressionQDAG, PyDoughQDAG, Reference, + Singular, SubCollection, TableCollection, TopK, Where, ) +from pydough.qdag.collections.user_collection_qdag import ( + PyDoughUserGeneratedCollectionQDag, +) from pydough.unqualified import ( UnqualifiedNode, UnqualifiedRoot, @@ -282,12 +286,17 @@ def explain_unqualified( if root is not None: qualified_node = qualify_node(node, session) else: - # If the root is None, it means that the node was an expression - # without information about its context. - lines.append( - f"Cannot call pydough.explain on {display_raw(node)}.\n" - "Did you mean to use pydough.explain_term?" - ) + # No root in the tree (e.g. UnqualifiedGeneratedCollection). Try + # to qualify anyway; the qualifier uses session's graph as context. + try: + qualified_node = qualify_node(node, session) + except PyDoughQDAGException: + raise + except Exception: + lines.append( + f"Cannot call pydough.explain on {display_raw(node)}.\n" + "Did you mean to use pydough.explain_term?" + ) except PyDoughQDAGException as e: # If the qualification failed, dump an appropriate message indicating # why pydough_explain did not work on it. @@ -347,6 +356,24 @@ def explain_unqualified( lines.append( f"This node, specifically, accesses the unpartitioned data of a partitioning (child name: {qualified_node.partition_child_name})." ) + case Singular(): + lines.append( + "This node makes the preceding collection singular via a CROSS product.\n" + "Each record from the parent context is paired with all records from the child." + ) + lines.append( + f"Child collection: {qualified_node.preceding_context.to_string()}" + ) + case PyDoughUserGeneratedCollectionQDag(): + collection_name = qualified_node.name + columns = sorted(qualified_node.calc_terms) + lines.append( + f"This node accesses user-generated collection '{collection_name}'.\n" + f"Columns: {', '.join(columns)}" + ) + lines.append( + f"Unique columns: {', '.join(qualified_node.unique_terms)}" + ) case ChildOperator(): if len(qualified_node.children): lines.append( diff --git a/pydough/exploration/term.py b/pydough/exploration/term.py index 6ab9734bb..650605afc 100644 --- a/pydough/exploration/term.py +++ b/pydough/exploration/term.py @@ -34,6 +34,10 @@ qualify_node, qualify_term, ) +from pydough.unqualified.unqualified_node import ( + UnqualifiedCross, + UnqualifiedSingular, +) def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None: @@ -57,6 +61,8 @@ def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None: | UnqualifiedOrderBy() | UnqualifiedTopK() | UnqualifiedPartition() + | UnqualifiedCross() + | UnqualifiedSingular() ): predecessor: UnqualifiedNode = node._parcel[0] return find_unqualified_root(predecessor) diff --git a/tests/test_exploration.py b/tests/test_exploration.py index 0ad8db374..36e16fa2d 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -17,6 +17,7 @@ contextless_expr_impl, contextless_func_impl, customers_without_orders_impl, + dataframe_collection_exploration_impl, filter_impl, global_agg_calc_impl, global_calc_impl, @@ -34,10 +35,12 @@ parts_avg_price_child_impl, parts_avg_price_impl, parts_with_german_supplier, + range_collection_exploration_impl, region_n_suppliers_in_red_impl, region_nations_back_name, region_nations_suppliers_impl, region_nations_suppliers_name_impl, + singular_impl, subcollection_calc_backref_impl, suppliers_iff_balance_impl, table_calc_impl, @@ -1199,6 +1202,126 @@ def test_graph_structure( ), id="partition_child", ), + pytest.param( + ( + "TPCH", + singular_impl, + """ +PyDough collection representing the following logic: + ──┬─ TPCH + └─┬─ TableCollection[nations] + └─┬─ TPCH + └─── TableCollection[regions] + +This node, specifically, accesses the collection regions. +Call pydough.explain(graph['regions']) to learn more about this collection. + +The following terms will be included in the result if this collection is executed: + comment, key, name + +The collection has access to the following expressions: + comment, key, name + +The collection has access to the following collections: + nations + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + """, + """ +This node, specifically, accesses the collection regions. +Call pydough.explain(graph['regions']) to learn more about this collection. + +The collection has access to the following expressions: + comment, key, name + +The collection has access to the following collections: + nations + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + +Call pydough.explain(collection, verbose=True) for more details. + """, + ), + id="singular", + ), + pytest.param( + ( + "TPCH", + range_collection_exploration_impl, + """ +PyDough collection representing the following logic: + ──┬─ TPCH + └─── RangeCollection('rng', i=range(1, 5)) + +This node accesses user-generated collection 'rng'. +Columns: i +Unique columns: i + +The following terms will be included in the result if this collection is executed: + i + +The collection has access to the following expressions: + i + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + """, + """ +This node accesses user-generated collection 'rng'. +Columns: i +Unique columns: i + +The collection has access to the following expressions: + i + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + +Call pydough.explain(collection, verbose=True) for more details. + """, + ), + id="range_collection", + ), + pytest.param( + ( + "TPCH", + dataframe_collection_exploration_impl, + """ +PyDough collection representing the following logic: + ──┬─ TPCH + └─── DataframeCollection(name='df_coll', shape=(1, 1), columns=['id']) + +This node accesses user-generated collection 'df_coll'. +Columns: id +Unique columns: id + +The following terms will be included in the result if this collection is executed: + id + +The collection has access to the following expressions: + id + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + """, + """ +This node accesses user-generated collection 'df_coll'. +Columns: id +Unique columns: id + +The collection has access to the following expressions: + id + +Call pydough.explain_term(collection, term) to learn more about any of these +expressions or collections that the collection has access to. + +Call pydough.explain(collection, verbose=True) for more details. + """, + ), + id="dataframe_collection", + ), pytest.param( ( "TPCH", diff --git a/tests/test_pydough_functions/exploration_examples.py b/tests/test_pydough_functions/exploration_examples.py index 4eb978303..711d55833 100644 --- a/tests/test_pydough_functions/exploration_examples.py +++ b/tests/test_pydough_functions/exploration_examples.py @@ -8,6 +8,7 @@ "contextless_expr_impl", "contextless_func_impl", "customers_without_orders_impl", + "dataframe_collection_exploration_impl", "filter_impl", "global_agg_calc_impl", "global_calc_impl", @@ -23,10 +24,12 @@ "parts_avg_price_child_impl", "parts_avg_price_impl", "parts_with_german_supplier", + "range_collection_exploration_impl", "region_n_suppliers_in_red_impl", "region_nations_back_name", "region_nations_suppliers_impl", "region_nations_suppliers_name_impl", + "singular_impl", "subcollection_calc_backref_impl", "suppliers_iff_balance_impl", "table_calc_impl", @@ -35,6 +38,8 @@ from collections.abc import Callable +import pandas as pd + import pydough from pydough.metadata import GraphMetadata from pydough.unqualified import UnqualifiedNode @@ -112,6 +117,19 @@ def partition_child_impl() -> UnqualifiedNode: ) +def singular_impl() -> UnqualifiedNode: + return nations.CROSS(regions) + + +def range_collection_exploration_impl() -> UnqualifiedNode: + return pydough.range_collection("rng", "i", 1, 5) + + +def dataframe_collection_exploration_impl() -> UnqualifiedNode: + df = pd.DataFrame({"id": [1]}) + return pydough.dataframe_collection("df_coll", df, ["id"]) + + def nation_expr_impl() -> UnqualifiedNode: return nations.name From ac3f74bea8cd89efc774645b2426a7fa140d5554 Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Mon, 9 Mar 2026 15:16:47 -0700 Subject: [PATCH 2/4] [run CI] fix generic error --- pydough/exploration/explain.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 85214d551..92b7d85a7 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -286,17 +286,19 @@ def explain_unqualified( if root is not None: qualified_node = qualify_node(node, session) else: - # No root in the tree (e.g. UnqualifiedGeneratedCollection). Try - # to qualify anyway; the qualifier uses session's graph as context. + # No root in the tree (e.g. UnqualifiedGeneratedCollection, or a + # bare expression like LOWER(first_name + last_name)). Try to + # qualify anyway for generated collections; on any failure show the + # generic "Cannot call explain" message so contextless expressions + # get a consistent response instead of "Unrecognized term". try: qualified_node = qualify_node(node, session) - except PyDoughQDAGException: - raise except Exception: lines.append( f"Cannot call pydough.explain on {display_raw(node)}.\n" "Did you mean to use pydough.explain_term?" ) + return "\n".join(lines) except PyDoughQDAGException as e: # If the qualification failed, dump an appropriate message indicating # why pydough_explain did not work on it. From 535fa12b71522f6c4f4cf8ce79858f6f25e75b9e Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Mon, 9 Mar 2026 15:17:21 -0700 Subject: [PATCH 3/4] [run CI] fix generic error --- pydough/exploration/explain.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 92b7d85a7..eab3b77ef 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -288,9 +288,8 @@ def explain_unqualified( else: # No root in the tree (e.g. UnqualifiedGeneratedCollection, or a # bare expression like LOWER(first_name + last_name)). Try to - # qualify anyway for generated collections; on any failure show the - # generic "Cannot call explain" message so contextless expressions - # get a consistent response instead of "Unrecognized term". + # qualify anyway for generated collections. If it still fails, + # raise an exception. try: qualified_node = qualify_node(node, session) except Exception: From a27ded72b275eac1ade34012e3806af403f49e6a Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Mon, 9 Mar 2026 17:19:07 -0700 Subject: [PATCH 4/4] [run CI] update message --- tests/test_exploration.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_exploration.py b/tests/test_exploration.py index 36e16fa2d..ca0d37493 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -1384,11 +1384,15 @@ def test_graph_structure( "TPCH", contextless_aggfunc_impl, """ -Cannot call pydough.explain on COUNT(customers). +If pydough.explain is called on an unqualified PyDough code, it is expected to +be a collection, but instead received the following expression: + COUNT(customers) Did you mean to use pydough.explain_term? """, """ -Cannot call pydough.explain on COUNT(customers). +If pydough.explain is called on an unqualified PyDough code, it is expected to +be a collection, but instead received the following expression: + COUNT(customers) Did you mean to use pydough.explain_term? """, ),