@@ -75,17 +75,22 @@ def _is_metadata_var(
7575 ) -> t .Optional [bool ]:
7676 is_metadata_so_far = used_variables .get (name , True )
7777 if is_metadata_so_far is False :
78+ # We've concluded this variable is definitely not metadata-only
7879 return False
7980
8081 appears_under_metadata_macro_func = expr_under_metadata_macro_func .get (id (expression ))
8182 if is_metadata_so_far and (
8283 appears_in_metadata_expression or appears_under_metadata_macro_func
8384 ):
85+ # The variable appears in a metadata expression, e.g., audits (...),
86+ # or in the AST of metadata-only macro call, e.g., @FOO(@x)
8487 return True
8588
89+ # The variable appears in the AST of a macro call, but we don't know if it's metadata-only
8690 if appears_under_metadata_macro_func is False :
8791 return None
8892
93+ # The variable appears elsewhere, e.g., in the model's query: SELECT @x
8994 return False
9095
9196 def _is_metadata_macro (name : str , appears_in_metadata_expression : bool ) -> bool :
@@ -131,6 +136,14 @@ def _is_metadata_macro(name: str, appears_in_metadata_expression: bool) -> bool:
131136 var_name , macro_func_or_var , is_metadata
132137 )
133138 elif id (macro_func_or_var ) not in visited_macro_funcs :
139+ # We only care about the top-level macro function calls to determine the metadata
140+ # status of the variables referenced in their ASTs. For example, in @m1(@m2(@x)),
141+ # if m1 is metadata-only but m2 is not, we can still determine that @x only affects
142+ # the metadata hash, since m2's result feeds into a metadata-only macro function.
143+ #
144+ # Generally, if the top-level call is known to be metadata-only or appear in a
145+ # metadata expression, then we can avoid traversing nested macro function calls.
146+
134147 var_refs , _expr_under_metadata_macro_func , _visited_macro_funcs = (
135148 _extract_macro_func_variable_references (macro_func_or_var , is_metadata )
136149 )
@@ -192,7 +205,7 @@ def _extract_macro_func_variable_references(
192205 macro_func : exp .Expression ,
193206 is_metadata : bool ,
194207) -> t .Tuple [t .Set [str ], t .Dict [int , bool ], t .Set [int ]]:
195- references = set ()
208+ var_references = set ()
196209 visited_macro_funcs = set ()
197210 expr_under_metadata_macro_func = {}
198211
@@ -204,19 +217,19 @@ def _extract_macro_func_variable_references(
204217 args = this .expressions
205218
206219 if this .name .lower () in (c .VAR , c .BLUEPRINT_VAR ) and args and args [0 ].is_string :
207- references .add (args [0 ].this .lower ())
220+ var_references .add (args [0 ].this .lower ())
208221 expr_under_metadata_macro_func [id (n )] = is_metadata
209222 elif isinstance (n , d .MacroVar ):
210- references .add (n .name .lower ())
223+ var_references .add (n .name .lower ())
211224 expr_under_metadata_macro_func [id (n )] = is_metadata
212225 elif isinstance (n , (exp .Identifier , d .MacroStrReplace , d .MacroSQL )) and "@" in n .name :
213- references .update (
226+ var_references .update (
214227 (braced_identifier or identifier ).lower ()
215228 for _ , identifier , braced_identifier , _ in MacroStrTemplate .pattern .findall (n .name )
216229 )
217230 expr_under_metadata_macro_func [id (n )] = is_metadata
218231
219- return (references , expr_under_metadata_macro_func , visited_macro_funcs )
232+ return (var_references , expr_under_metadata_macro_func , visited_macro_funcs )
220233
221234
222235def _add_variables_to_python_env (
@@ -238,16 +251,22 @@ def _add_variables_to_python_env(
238251 for var_name , is_metadata in python_used_variables .items ():
239252 used_variables [var_name ] = is_metadata and used_variables .get (var_name , True )
240253
241- # Variables are treated as metadata when:
242- # - They are only referenced in metadata-only contexts, such as `audits (...)`, virtual statements, etc
243- # - They are only referenced in metadata-only macros, either as their arguments or within their definitions
254+ # Variables are treated as metadata-only when all of their references either:
255+ # - appear in metadata-only expressions, such as `audits (...)`, virtual statements, etc
256+ # - appear in the ASTs or definitions of metadata-only macros
257+ #
258+ # See also: https://github.com/TobikoData/sqlmesh/pull/4936#issuecomment-3136339936,
259+ # specifically the "Terminology" and "Observations" section.
244260 metadata_used_variables = {
245261 var_name for var_name , is_metadata in used_variables .items () if is_metadata
246262 }
247263 for used_var , outermost_macro_func in (outermost_macro_func_ancestor_by_var or {}).items ():
248264 used_var_is_metadata = used_variables .get (used_var )
249265 if used_var_is_metadata is False :
250266 continue
267+
268+ # At this point we can decide whether a variable reference in a macro call's AST is
269+ # metadata-only, because we've annotated the corresponding macro call in the python env.
251270 if outermost_macro_func in python_env and python_env [outermost_macro_func ].is_metadata :
252271 metadata_used_variables .add (used_var )
253272
0 commit comments