@@ -75,17 +75,22 @@ def _is_metadata_var(
7575 ) -> t.Optional[bool]:
7676 is_metadata_so_far = used_variables.get(name, True)
7777 if is_metadata_so_far is False:
78+ # We've concluded this variable is definitely not metadata-only
7879 return False
7980
8081 appears_under_metadata_macro_func = expr_under_metadata_macro_func.get(id(expression))
8182 if is_metadata_so_far and (
8283 appears_in_metadata_expression or appears_under_metadata_macro_func
8384 ):
85+ # The variable appears in a metadata expression, e.g., audits (...),
86+ # or in the AST of metadata-only macro call, e.g., @FOO(@x)
8487 return True
8588
89+ # The variable appears in the AST of a macro call, but we don't know if it's metadata-only
8690 if appears_under_metadata_macro_func is False:
8791 return None
8892
93+ # The variable appears elsewhere, e.g., in the model's query: SELECT @x
8994 return False
9095
9196 def _is_metadata_macro(name: str, appears_in_metadata_expression: bool) -> bool:
@@ -131,6 +136,14 @@ def _is_metadata_macro(name: str, appears_in_metadata_expression: bool) -> bool:
131136 var_name, macro_func_or_var, is_metadata
132137 )
133138 elif id(macro_func_or_var) not in visited_macro_funcs:
139+ # We only care about the top-level macro function calls to determine the metadata
140+ # status of the variables referenced in their ASTs. For example, in @m1(@m2(@x)),
141+ # if m1 is metadata-only but m2 is not, we can still determine that @x only affects
142+ # the metadata hash, since m2's result feeds into a metadata-only macro function.
143+ #
144+ # Generally, if the top-level call is known to be metadata-only or appear in a
145+ # metadata expression, then we can avoid traversing nested macro function calls.
146+
134147 var_refs, _expr_under_metadata_macro_func, _visited_macro_funcs = (
135148 _extract_macro_func_variable_references(macro_func_or_var, is_metadata)
136149 )
@@ -192,7 +205,7 @@ def _extract_macro_func_variable_references(
192205 macro_func: exp.Expression,
193206 is_metadata: bool,
194207) -> t.Tuple[t.Set[str], t.Dict[int, bool], t.Set[int]]:
195- references = set()
208+ var_references = set()
196209 visited_macro_funcs = set()
197210 expr_under_metadata_macro_func = {}
198211
@@ -204,19 +217,19 @@ def _extract_macro_func_variable_references(
204217 args = this.expressions
205218
206219 if this.name.lower() in (c.VAR, c.BLUEPRINT_VAR) and args and args[0].is_string:
207- references .add(args[0].this.lower())
220+ var_references .add(args[0].this.lower())
208221 expr_under_metadata_macro_func[id(n)] = is_metadata
209222 elif isinstance(n, d.MacroVar):
210- references .add(n.name.lower())
223+ var_references .add(n.name.lower())
211224 expr_under_metadata_macro_func[id(n)] = is_metadata
212225 elif isinstance(n, (exp.Identifier, d.MacroStrReplace, d.MacroSQL)) and "@" in n.name:
213- references .update(
226+ var_references .update(
214227 (braced_identifier or identifier).lower()
215228 for _, identifier, braced_identifier, _ in MacroStrTemplate.pattern.findall(n.name)
216229 )
217230 expr_under_metadata_macro_func[id(n)] = is_metadata
218231
219- return (references , expr_under_metadata_macro_func, visited_macro_funcs)
232+ return (var_references , expr_under_metadata_macro_func, visited_macro_funcs)
220233
221234
222235def _add_variables_to_python_env(
@@ -238,16 +251,22 @@ def _add_variables_to_python_env(
238251 for var_name, is_metadata in python_used_variables.items():
239252 used_variables[var_name] = is_metadata and used_variables.get(var_name, True)
240253
241- # Variables are treated as metadata when:
242- # - They are only referenced in metadata-only contexts, such as `audits (...)`, virtual statements, etc
243- # - They are only referenced in metadata-only macros, either as their arguments or within their definitions
254+ # Variables are treated as metadata-only when all of their references either:
255+ # - appear in metadata-only expressions, such as `audits (...)`, virtual statements, etc
256+ # - appear in the ASTs or definitions of metadata-only macros
257+ #
258+ # See also: https://github.com/TobikoData/sqlmesh/pull/4936#issuecomment-3136339936,
259+ # specifically the "Terminology" and "Observations" section.
244260 metadata_used_variables = {
245261 var_name for var_name, is_metadata in used_variables.items() if is_metadata
246262 }
247263 for used_var, outermost_macro_func in (outermost_macro_func_ancestor_by_var or {}).items():
248264 used_var_is_metadata = used_variables.get(used_var)
249265 if used_var_is_metadata is False:
250266 continue
267+
268+ # At this point we can decide whether a variable reference in a macro call's AST is
269+ # metadata-only, because we've annotated the corresponding macro call in the python env.
251270 if outermost_macro_func in python_env and python_env[outermost_macro_func].is_metadata:
252271 metadata_used_variables.add(used_var)
253272
0 commit comments